Python检测URL状态

时间:2022-09-22 17:00:47

Python检测URL状态,并追加保存200的URL:

1.Requests

#! /usr/bin/env python
#coding=utf-8
import sys
import requests
def getHttpStatusCode(url):
    try:
        request = requests.get(url)
        httpStatusCode = request.status_code
        return httpStatusCode
    except requests.exceptions.HTTPError as e:
        return e

if __name__ == "__main__":
    with open('1.txt', 'r') as f:
        for line in f:
            try:
                status = getHttpStatusCode(line.strip('\n'))#换行符
                if status == 200:
                    with open('200.txt','a') as f:
                        f.write(line + '\n')
                        print line
                else:
                    print 'no 200 code'
            except Exception as e:
                print e

  

 1 #! /usr/bin/env python
 2 # -*--coding:utf-8*-
 3 
 4 import requests
 5 
 6 def request_status(line):
 7     conn = requests.get(line)
 8     if conn.status_code == 200:
 9         with open('url_200.txt', 'a') as f:
10             f.write(line + '\n')
11         return line13     else:
14         return None
15 
16 
17 if __name__ == '__main__':
18     with open('/1.txt', 'rb') as f:
19         for line in f:
20             try:
21                 purge_url = request_status(line.strip('\n'))
22             except Exception as e:
23                 pass

2.Urllib

#! /usr/bin/env python
#coding:utf-8
import os,urllib,linecache
import sys
result = list()

for x in linecache.updatecache(r'1.txt'):
    try:
       a = urllib.urlopen(x.replace('/n','')).getcode()
       #print x,a
    except Exception,e:
        print e
    if a == 200:
        #result.append(x)                             #保存
        #result.sort()                                       #排序结果
        #open('2.txt', 'w').write('%s' % '\n'.join(result)) #保存入结果文件
        with open ('200urllib.txt','a') as f: ## r只读,w可写,a追加
            f.write(x + '\n')
    else:
        print 'error'