# !/usr/bin/env python3
# -*- coding: utf-8 -*-

import jieba
from collections import Counter

from base.utils.es_helper import es_query


def dict2list(dic: dict):
    # 将字典转化为列表
    keys = dic.keys()
    vals = dic.values()
    lst = [(key, val) for key, val in zip(keys, vals)]
    return lst


dic = []
query = {"query": {"bool": {"must": [{"term": {"kind": "news"}}], "must_not": [], "should": []}}, "from": 0, "size": 10,
         "sort": [], "aggs": {}}

docs = es_query("aic_ik-2021.01", None, query)["hits"]["hits"]
for doc in docs:
    text = doc["_source"]["nlp_text"]
    # seg_list = jieba.cut_for_search(row[4])
    seg_list = jieba.cut(text)
    for x in seg_list:
        if len(x) >= 2:
            dic.append(x)

c = Counter(dic).most_common(200)
print(c)
