"""
beautifulSoup删除标签的例子。踩坑问刘言，应该是select的不能用此方法删除
"""
import requests
from bs4 import BeautifulSoup


# 过滤
def guolv(html, father_id, tag_name, black_word):
    bs4 = BeautifulSoup(html, "lxml")
    for s in bs4.select("*"):
        if black_word in str(s) and s.parent.get("id") == father_id:
            s.extract()
    return bs4.prettify()


if __name__ == '__main__':
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "max-age=0",
        "Connection": "keep-alive",
        "Cookie": "BIDUPSID=694C700828ADECCFF490ED732EB14A93; PSTM=1598929943; BAIDUID=694C700828ADECCF629ECF87C0AB8636:FG=1; BD_UPN=12314753; BDUSS=zhQUVBsdTRJR284YWVLMkFNZUEyQkdoME13eVJ1RjZCTm9JVklsN3dYbGxBSlJmRVFBQUFBJCQAAAAAAAAAAAEAAADcZtWZyXOsSqxKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGVzbF9lc2xfN; BDUSS_BFESS=zhQUVBsdTRJR284YWVLMkFNZUEyQkdoME13eVJ1RjZCTm9JVklsN3dYbGxBSlJmRVFBQUFBJCQAAAAAAAAAAAEAAADcZtWZyXOsSqxKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGVzbF9lc2xfN; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=1433_33102_33059_31254_33098_33100_32938_32845; yjs_js_security_passport=83b9e3a935d13e6473b9f0ca8e816bfa995e09f8_1606383539_js; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=1; H_PS_645EC=5af4O7lN2o4sDI64jS5iQWkNfszuN%2FW5ZpZAIj9ntS4p2xMrmPBTxs73uNk; BA_HECTOR=a4858k2021840180bv1fs0r680q",
        "Host": "www.baidu.com",
        "Referer": "https://www.baidu.com/",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
    }
    res = requests.get(
        "https://www.baidu.com/s?wd=%E5%BE%AE%E4%BF%A1%E5%B0%8F%E7%A8%8B%E5%BA%8F&rsv_spt=1&rsv_iqid=0x8615fc790004e113&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&rsv_sug3=2&rsv_sug1=1&rsv_sug7=100&rsv_btype=i&inputT=1590&rsv_sug4=1591",
        headers=headers)
    html = res.text
    html2 = guolv(html, "content_left", "div", "广告")
    print(html2)
