"""
ES遍历-按时间遍历。现在已经不需要通配符了
"""

# 遍历所有es数据
# spider_time范围[1599494400,现在]
import datetime
import sys
import time

sys.path.append("/")  # 解决潜在的路径依赖问题
sys.path.append("../../")  # 解决潜在的路径依赖问题
sys.path.append("../../../")  # 解决潜在的路径依赖问题
sys.path.append("../../../../")  # 解决潜在的路径依赖问题
from base.utils.es_helper import es_query
from base.utils.redis_helper import handle_redis

if __name__ == '__main__':
    timeNow = int(time.time())
    ca = 1800

    for tt in range(1609344000, 1612022400, ca):
        print(tt, tt + ca)
        print(datetime.datetime.fromtimestamp(tt))
        query = {
            "_source": {
                "includes": ["url", "version"],
                "excludes": []
            },
            "query": {
                "bool": {
                    "must": [
                        {
                            "range": {
                                "spider_time": {
                                    "gt": tt,
                                    "lt": tt + ca
                                }
                            }
                        }
                    ]
                }
            },
            "size": 10000 #这里设置1万以上无效，最高是10000
        }
        items = es_query(database="aic_ik-2021.01", table="news", query=query)['hits']['hits']
        i = 0
        for i, item in enumerate(items):
            if i % 100 == 0:
                print(i, end=" ")
            data = item["_source"]
            # 存入成功后,存储news_set_es:VERSION:YYYYMM
            key_name = "news_set_es:" + str(data['version']) + ":" + item["_index"][7:11] + item["_index"][12:14]
            handle_redis.cache_set(key_name, item["_id"])
            if i >= 10000:
                print("数据总量超过10000,请手工处理")
        else:
            print(i)
