"""
hbase很早期的数据存在type kind字段没有的情况
item_mongo['type'] = item.get('type', "deep")  # 早期有数据没有值
item_mongo['kind'] = item.get('kind', "news")  # 早期有数据没有值
"""
# import datetime
# import json
# import threading
# import time
# import traceback
#
# from base.utils import config, kafka_helper_copy
# from base.utils.kafka_helper import consumer_myself
# from base.utils import mongo_helper, redis_helper
# from base.utils.log import logger
#
#
# def run_mongo_timer():
#     total = 0
#     while True:
#         try:
#             item = consumer_myself("aic.spider", "database_mongodb")
#             item_mongo = {}
#             if item is None:
#                 continue
#             if type(item) == bytes:
#                 item = item.decode()
#                 item = json.loads(item)
#             if type({}) != type(item):
#                 item = json.loads(item)
#             try:
#                 item = json.loads(item)
#             except:
#                 pass
#             if item.get('type') == "url连接采集":
#                 item['type'] = "deep"
#             item_mongo['type'] = item.get('type', "deep")  # 早期有数据没有值
#             item_mongo['kind'] = item.get('kind', "news")  # 早期有数据没有值
#             # mongoDB相关信息
#             item_mongo['source'] = item['source']
#             item_mongo['url'] = item['url']
#             item_mongo['title'] = item.get('title_h5', "title")
#
#             item_mongo['spider_time'] = item['spider_time']
#             if "spider_time" in item and item['spider_time'] < int(time.time()) - 6 * 86400:
#                 logger.warning("本条数据不是最近采集的,不予入库")
#                 return
#             item_mongo['html5'] = item['html5']
#
#             mongo_helper.insert_one(config.DB_NEWS_DETAIL, item_mongo)
#             print(f"aic_news_detail存储成功 {item_mongo['title']}")
#             item_mongo.pop("html5")
#             if item['spider_time'] > int(time.time()) - 60 * 20:
#                 mongo_helper.insert_one(config.DB_NEWS_LIST, item_mongo)
#                 total += 1
#                 print(f"aic_news_list存储成功 {item_mongo['title']} {total}")
#             # mongo_helper.update_one(table="aic.spider".replace('.','_'), filter={'url': item['url']}, update={'$set': {'state.database_mongodb': 1}})
#             # id = mongo_helper.query_one(table=config.DB_NEWS_LIST, filter={"url": item_mongo["url"]})["_id"]
#             # redis_helper.handle_redis.cache_list_lpush(config.KEY_ES, json.dumps({"id": id}, ensure_ascii=False))
#         except:
#             traceback.print_exc()
#
#
# if __name__ == '__main__':
#     run_mongo_timer()
