import requests
from bs4 import BeautifulSoup
import sys
import re
def find_sub_domain(site, pages):
Subdomain = []
# 请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Referer': "/search?q=&sp=-1&pq=&sc=8-9&qs=n&sk=&cvid=4ADFD86C6FEF49279A09DF578A997EC0&first=8&FORM=PORE",
'Cookie': "MUID=09F689A7A8796FBB2A449815A9936E45; SUID=M; MUIDB=09F689A7A8796FBB2A449815A9936E45; _EDGE_S=SID=06EB8F7C11DC6122102E9EC810BA6056; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=243B793FFB114F0DAF6B5E13EC7C7D66&dmnchg=1; _SS=SID=06EB8F7C11DC6122102E9EC810BA6056; _UR=QS=0&TQS=0; _HPVN=CS=eyJQbiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiUCJ9LCJTYyI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiSCJ9LCJReiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyMi0wNS0zMFQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIkRmdCI6bnVsbCwiTXZzIjowLCJGbHQiOjAsIkltcCI6Mn0=; ipv6=hit=1653877828725&t=4; ZHCHATSTRONGATTRACT=TRUE; SRCHUSR=DOB=20220530&T=1653874238000&TPC=1653874280000; ZHCHATWEAKATTRACT=TRUE; SNRHOP=TS=637894711574924019&I=1; SRCHHPGUSR=SRCHLANG=zh-Hans&BRW=NOTP&BRH=M&CW=767&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&PV=10.0.0&HV=1653874333&WTS=63789471038&BZA=0"
}
# page是需要查询的网页数
for i in range(1, int(pages)+1):
# 拼凑url
url = "/search?q="+site+"&sp=-1&pq="+site + "&sc=8-9&qs=n&sk=&cvid=1B54F90605E44CF58E0CAB6DB948D0D0&first=" + str(((int(i)-1)*10)+8) +"&FORM=PERE" + str(i)
# 发起get请求,拿到返回值
html = (url, headers=headers)
# 把返回值解析成html
soup = BeautifulSoup(, '')
# 把所有的cite标签全部提取出来
job_bt = soup.find_all('cite')
# 对拿到的域名进行正则匹配,这里提前编译好匹配规则
pattern = re.compile(r"\w+\.")
for i in job_bt:
# 通过正则表达式,拿到子域名
get = (pattern, str(i))[0]
if get in Subdomain:
pass
else:
(get)
print(get)
if __name__ == '__main__':
if len() == 3:
site = [1]
page = [2]
else:
print("py -3 E:/python脚本/子域名爆破脚本.py domain pages")
(-1)
Subdomain = find_sub_domain(site, page)