python3入门教程

时间:2023-03-08 22:36:29

python : 3.5

jdk : 1.7

eclipse : 4.5.2(有点低了,需要对应Neon 4.6,不然总是会弹出提示框)

应该学习最新版本的 Python 3 还是旧版本的 Python 2.7?

MySqlDB官网只支持Python3.4,这里Python3.5使用第三方库PyMysql连接Mysql数据库。

http://dev.mysql.com/downloads/connector/python/2.0.html

python3入门教程

PyMysql下载地址:

https://pypi.python.org/pypi/PyMySQL#downloads

Windows下安装方法:

下载解压后,进入PyMySql-0.6.7目录,执行python setup.py install安装

python3入门教程

test1.py

 import urllib.request as request
def baidu_tieba(url, begin_page, end_page):
for i in range(begin_page, end_page + 1):
sName = 'D:/360Downloads/test/'+str(i).zfill(5)+'.html'
print('正在下载第'+str(i)+'个页面, 并保存为'+sName)
m = request.urlopen(url+str(i)).read()
with open(sName,'wb') as file:
file.write(m)
file.close()
if __name__ == "__main__":
url = "http://tieba.baidu.com/p/"
begin_page = 1
end_page = 3
baidu_tieba(url, begin_page, end_page)

test2.py

 import urllib.request as request
import re
import os
import urllib.error as error
def baidu_tieba(url, begin_page, end_page):
count = 1
for i in range(begin_page, end_page + 1):
sName = 'D:/360Downloads/test/' + str(i).zfill(5) + '.html'
print('正在下载第' + str(i) + '个页面, 并保存为' + sName)
m = request.urlopen(url + str(i)).read()
# 创建目录保存每个网页上的图片
dirpath = 'D:/360Downloads/test/'
dirname = str(i)
new_path = os.path.join(dirpath, dirname)
if not os.path.isdir(new_path):
os.makedirs(new_path)
page_data = m.decode('gbk', 'ignore')
page_image = re.compile('<img src=\"(.+?)\"')
for image in page_image.findall(page_data):
pattern = re.compile(r'^http://.*.png$')
if pattern.match(image):
try:
image_data = request.urlopen(image).read()
image_path = dirpath + dirname + '/' + str(count) + '.png'
count += 1
print(image_path)
with open(image_path, 'wb') as image_file:
image_file.write(image_data)
image_file.close()
except error.URLError as e:
print('Download failed')
with open(sName, 'wb') as file:
file.write(m)
file.close()
if __name__ == "__main__":
url = "http://tieba.baidu.com/p/"
begin_page = 1
end_page = 3
baidu_tieba(url, begin_page, end_page)

test3.py

 #python3.4 爬虫教程
#爬取网站上的图片
#林炳文Evankaka(博客:http://blog.****.net/evankaka/)
import urllib.request
import socket
import re
import sys
import os
targetDir = r"D:\PythonWorkPlace\load" #文件保存路径
def destFile(path):
if not os.path.isdir(targetDir):
os.makedirs(targetDir)
pos = path.rindex('/')
t = os.path.join(targetDir, path[pos+1:])
print(t)
return t
if __name__ == "__main__": #程序运行入口
weburl = "http://www.douban.com/"
webheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib.request.Request(url=weburl, headers=webheaders) #构造请求报头
webpage = urllib.request.urlopen(req) #发送请求报头
contentBytes = webpage.read()
for link, t in set(re.findall(r'(https:[^\s]*?(jpg|png|gif))', str(contentBytes))): #正则表达式查找所有的图片
print(link)
try:
urllib.request.urlretrieve(link, destFile(link)) #下载图片
except:
print('失败') #异常抛出

test4.py

 '''
第一个示例:简单的网页爬虫 爬取豆瓣首页
''' import urllib.request #网址
url = "http://bj.58.com/caishui/28707491160259x.shtml?adtype=1&entinfo=28707491160259_0&adact=3&psid=156713756196890928513274724" #请求
request = urllib.request.Request(url) #爬取结果
response = urllib.request.urlopen(request) data = response.read() #设置解码方式
data = data.decode('utf-8') #打印结果
print(data) #打印爬取网页的各类信息 # print(type(response))
# print(response.geturl())
# print(response.info())
# print(response.getcode())

test5.py

 #!/usr/bin/env python
#-*-coding: utf-8 -*-
import re
import urllib.request as request
from bs4 import BeautifulSoup as bs
import csv
import os
import sys
from imp import reload
reload(sys) def GetAllLink():
num = int(input("爬取多少页:>"))
if not os.path.exists('./data/'):
os.mkdir('./data/') for i in range(num):
if i+1 == 1:
url = 'http://nj.58.com/piao/'
GetPage(url, i)
else:
url = 'http://nj.58.com/piao/pn%s/' %(i+1)
GetPage(url, i) def GetPage(url, num):
Url = url
user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0'
headers = { 'User-Agent' : user_agent }
req = request.Request(Url, headers = headers)
page = request.urlopen(req).read().decode('utf-8')
soup = bs(page, "html.parser")
table = soup.table
tag = table.find_all('tr')
# 提取出所需的那段
soup2 = bs(str(tag), "html.parser")
title = soup2.find_all('a','t') #标题与url
price = soup2.find_all('b', 'pri') #价格
fixedprice = soup2.find_all('del') #原价
date = soup2.find_all('span','pr25') #时间 atitle = []
ahref = []
aprice = []
afixedprice = []
adate = [] for i in title:
#print i.get_text(), i.get('href')
atitle.append(i.get_text())
ahref.append(i.get('href'))
for i in price:
#print i.get_text()
aprice.append(i.get_text())
for i in fixedprice:
#print j.get_text()
afixedprice.append(i.get_text())
for i in date:
#print i.get_text()
adate.append(i.get_text()) csvfile = open('./data/ticket_%s.csv'%num, 'w')
writer = csv.writer(csvfile)
writer.writerow(['标题','url','售价','原价','演出时间'])
'''
每个字段必有title,但是不一定有时间date
如果没有date日期,我们就设为'---'
'''
if len(atitle) > len(adate):
for i in range(len(atitle) - len(adate)):
adate.append('---')
for i in range(len(atitle) - len(afixedprice)):
afixedprice.append('---')
for i in range(len(atitle) - len(aprice)):
aprice.append('---') for i in range(len(atitle)):
message = atitle[i]+'|'+ahref[i]+'|'+aprice[i]+ '|'+afixedprice[i]+'|'+ adate[i]
writer.writerow([i for i in str(message).split('|')])
print ("[Result]:> 页面 %s 信息保存完毕!"%(num+1))
csvfile.close() if __name__ == '__main__':
GetAllLink()

test6.py

 #!/usr/bin/env python
#-*-coding: utf-8 -*-
import urllib.request as request
from bs4 import BeautifulSoup as bs
import sys
from imp import reload
reload(sys) def GetAllLink():
num = int(input("爬取多少页:>")) for i in range(num):
if i+1 == 1:
url = 'http://bj.58.com/caishui/?key=%E4%BB%A3%E7%90%86%E8%AE%B0%E8%B4%A6%E5%85%AC%E5%8F%B8&cmcskey=%E4%BB%A3%E7%90%86%E8%AE%B0%E8%B4%A6%E5%85%AC%E5%8F%B8&final=1&jump=1&specialtype=gls'
GetPage(url, i)
else:
url = 'http://bj.58.com/caishui/pn%s/'%(i+1)+'?key=%E4%BB%A3%E7%90%86%E8%AE%B0%E8%B4%A6%E5%85%AC%E5%8F%B8&cmcskey=%E4%BB%A3%E7%90%86%E8%AE%B0%E8%B4%A6%E5%85%AC%E5%8F%B8&final=1&specialtype=gls&PGTID=0d30215f-0000-1941-5161-367b7a641048&ClickID=4'
GetPage(url, i) def GetPage(url, num):
Url = url
user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0'
headers = { 'User-Agent' : user_agent }
req = request.Request(Url, headers = headers)
page = request.urlopen(req).read().decode('utf-8')
soup = bs(page, "html.parser")
table = soup.table
tag = table.find_all('tr') # 提取出所需的那段
soup2 = bs(str(tag), "html.parser") title = soup2.find_all('a','t') #标题与url
companyName = soup2.find_all('a','sellername') #公司名称 atitle = []
ahref = []
acompanyName = [] for i in title:
atitle.append(i.get_text())
ahref.append(i.get('href'))
for i in companyName:
acompanyName.append(i.get_text())
for i in range(len(ahref)):
getSonPage(str(ahref[i])) def getSonPage(url):
Url = url
user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0'
headers = { 'User-Agent' : user_agent }
req = request.Request(Url, headers = headers)
page = request.urlopen(req).read().decode('utf-8')
soup = bs(page, "html.parser")
print("=========================")
#类别
print(soup.find('div','su_con').get_text())
#服务区域
print(soup.find('div','su_con quyuline').get_text())
#联&nbsp;系&nbsp;人
print(soup.find_all('ul','suUl')[0].find_all('li')[2].find_all('a')[0].get_text())
#商家地址
print(soup.find_all('ul','suUl')[0].find_all('li')[3].find('div','su_con').get_text().replace("\n",'').replace("\r",'').replace('\t','').replace('&nbsp;',''))
#服务项目
print(soup.find('article','description_con').get_text().replace("_____________________________________","\n\r").replace("___________________________________","\n\r").replace("(以下为公司北京区域分布图)",""))
print("=========================") if __name__ == '__main__':
GetAllLink()

test7.py

 import pymysql
conn = pymysql.connect(host='192.168.1.102', port=3306,user='root',passwd='',db='test',charset='UTF8')
cur = conn.cursor()
cur.execute("select version()")
for i in cur:
print(i)
cur.close()
conn.close()