python子域名扫描脚本

时间:2025-05-13 08:32:22
  • import requests
  • from bs4 import BeautifulSoup
  • import sys
  • import re
  • def find_sub_domain(site, pages):
  • Subdomain = []
  • # 请求头信息
  • headers = {
  • 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
  • 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  • 'Referer': "/search?q=&sp=-1&pq=&sc=8-9&qs=n&sk=&cvid=4ADFD86C6FEF49279A09DF578A997EC0&first=8&FORM=PORE",
  • 'Cookie': "MUID=09F689A7A8796FBB2A449815A9936E45; SUID=M; MUIDB=09F689A7A8796FBB2A449815A9936E45; _EDGE_S=SID=06EB8F7C11DC6122102E9EC810BA6056; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=243B793FFB114F0DAF6B5E13EC7C7D66&dmnchg=1; _SS=SID=06EB8F7C11DC6122102E9EC810BA6056; _UR=QS=0&TQS=0; _HPVN=CS=eyJQbiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiUCJ9LCJTYyI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiSCJ9LCJReiI6eyJDbiI6MSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyMi0wNS0zMFQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIkRmdCI6bnVsbCwiTXZzIjowLCJGbHQiOjAsIkltcCI6Mn0=; ipv6=hit=1653877828725&t=4; ZHCHATSTRONGATTRACT=TRUE; SRCHUSR=DOB=20220530&T=1653874238000&TPC=1653874280000; ZHCHATWEAKATTRACT=TRUE; SNRHOP=TS=637894711574924019&I=1; SRCHHPGUSR=SRCHLANG=zh-Hans&BRW=NOTP&BRH=M&CW=767&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&PV=10.0.0&HV=1653874333&WTS=63789471038&BZA=0"
  • }
  • # page是需要查询的网页数
  • for i in range(1, int(pages)+1):
  • # 拼凑url
  • url = "/search?q="+site+"&sp=-1&pq="+site + "&sc=8-9&qs=n&sk=&cvid=1B54F90605E44CF58E0CAB6DB948D0D0&first=" + str(((int(i)-1)*10)+8) +"&FORM=PERE" + str(i)
  • # 发起get请求,拿到返回值
  • html = (url, headers=headers)
  • # 把返回值解析成html
  • soup = BeautifulSoup(, '')
  • # 把所有的cite标签全部提取出来
  • job_bt = soup.find_all('cite')
  • # 对拿到的域名进行正则匹配,这里提前编译好匹配规则
  • pattern = re.compile(r"\w+\.")
  • for i in job_bt:
  • # 通过正则表达式,拿到子域名
  • get = (pattern, str(i))[0]
  • if get in Subdomain:
  • pass
  • else:
  • (get)
  • print(get)
  • if __name__ == '__main__':
  • if len() == 3:
  • site = [1]
  • page = [2]
  • else:
  • print("py -3 E:/python脚本/子域名爆破脚本.py domain pages")
  • (-1)
  • Subdomain = find_sub_domain(site, page)