import os


def rename(path):
    """
    清洗所有的文件名为大写
    """
    filelist = os.listdir(path)
    for files in filelist:
        olddir = os.path.join(path, files)
        if os.path.isdir(olddir):
            continue
        filename = files.split('.')[0]  # 根据自己的文件格式进行分割
        filetype1 = files.split('.')[1]
        if len(filename) == 6:
            name_new = filename.upper() + "." + filetype1
            newdir = os.path.join(path, name_new)
        else:
            name_new = "问题" + filename.upper() + "." + filetype1
            newdir = os.path.join(path, name_new)
        os.rename(olddir, newdir)


def isAllZh(s):
    for c in s:
        if '\u4e00' <= c <= '\u9fa5':
            return True
    return False


def isSuccess(name_head):
    if len(name_head) != 6:
        print(f"名字长度不为6:{name_head}")
        return False
    if isAllZh(name_head):
        print(f"包含中文:{name_head}")
        return False

    if "0" in name_head or "O" in name_head:
        print(f"存在0或者O:{name_head}")
        return False
    if "1" in name_head or "I" in name_head or "L" in name_head:
        print(f"存在1、I或者L:{name_head}")
        return False
    if "Z" in name_head:
        print(f"存在Z:{name_head}")
        return False
    if "U" in name_head or "N" in name_head:
        print(f"存在U或者N:{name_head}")
        return False
    if "9" in name_head or "Q" in name_head:
        print(f"存在9或者Q:{name_head}")
        return False
    if "G" in name_head or "W" in name_head:
        print(f"存在G或者W:{name_head}")
        return False
    return True


def cheak(path):
    """
    删除不符合标准的图片
    """
    for name in os.listdir(path):
        name_head = name.split(".")[0]
        # print(f"正在检查:{name_head}")
        if not isSuccess(name_head):
            print(f"[delete]{name_head}不符合要求")
            os.remove(f'{path}/{name}')
            pass


def report(path):
    total = 0
    zifu_total = 0
    zifu_set = set()
    zifu_msg = {}
    for name in os.listdir(path):
        name_head = name.split(".")[0]
        total += 1
        zifu_total += len(name_head)
        for zifu in name_head:
            zifu_set.add(zifu)
            if zifu in zifu_msg:
                zifu_msg[zifu]["total"] += 1
            else:
                zifu_msg[zifu] = {"total": 1}

    print("-------样本报告-------")
    print(f"总计样本数: {total}")
    print(f"总计字符数: {zifu_total}")
    print(f"字符种类数: {len(zifu_set)}")
    zifu_list = list(zifu_set)
    zifu_list.sort()
    print(f"字符枚举: {zifu_list}")
    print(f"异常字符（数量明显偏低）:")
    for zifu in zifu_set:
        if zifu_msg[zifu]["total"] < (zifu_total / len(zifu_set)) / 10:
            print(f"{zifu} ({zifu_msg[zifu]['total']},{round(100 * zifu_msg[zifu]['total'] / zifu_total, 4)}%)")
    print("-------字符报告-------")
    for zifu in zifu_list:
        print(f"{zifu} ({zifu_msg[zifu]['total']},{round(100 * zifu_msg[zifu]['total'] / zifu_total, 4)}%)")


if __name__ == '__main__':
    path = r"yzm"  # 文件位置
    rename(path)  # 全部转为大写
    cheak(path)  # 删除错误图片
    report(path)  # 样本报告
