Python复习笔记(十)Http协议--Web服务器-并发服务器

时间:2021-06-03 22:45:37

1. HTTP协议(超文本传输协议)

Python复习笔记(十)Http协议--Web服务器-并发服务器

Python复习笔记(十)Http协议--Web服务器-并发服务器

Python复习笔记(十)Http协议--Web服务器-并发服务器

浏览器===>服务器发送的请求格式如下:(浏览器告诉服务器,浏览器的信息)

GET / HTTP/1.1
Host: www.baidu.com
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9

服务器==>浏览器回送的数据格式如下:(告诉浏览器,服务器的版本,按照什么格式解析)

header:告诉浏览器特殊信息(必须有HTTP/1.1 200 OK)

HTTP/1.1 200 OK
Bdpagetype: 2
Bdqid: 0xb49ba00a00010431
Cache-Control: private
Connection: Keep-Alive
Content-Encoding: gzip
Content-Type: text/html;charset=utf-8
Date: Sat, 09 Mar 2019 14:40:59 GMT
Expires: Sat, 09 Mar 2019 14:40:59 GMT
Server: BWS/1.1
Set-Cookie: BDSVRTM=70; path=/
Set-Cookie: BD_HOME=1; path=/
Set-Cookie: H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; path=/; domain=.baidu.com
Strict-Transport-Security: max-age=172800
X-Ua-Compatible: IE=Edge,chrome=1
Transfer-Encoding: chunked
Cookie: BAIDUID=969EF83E73AFFBF96897E645871A1957:FG=1; BIDUPSID=969EF83E73AFFBF96897E645871A1957; PSTM=1549905544; BD_UPN=12314353; delPer=0; BD_CK_SAM=1; PSINO=1; ___rl__test__cookies=1552141627935; OUTFOX_SEARCH_USER_ID_NCOO=1556761245.5429947; BDRCVFR[QxxZVyx49rf]=I67x6TjHwwYf0; H_WISE_SIDS=125704_114553_129323_106370_128146_128229_120193_123018_129449_118893_118871_118854_118832_118787_107312_129945_129387_129088_129558_117336_129751_117432_128791_128402_129655_128246_124639_129620_129008_128967_129641_129293_128805_129692_129838_129981_129808_127764_129482_129643_129508_124030_130091_110085_129844_123289_128842_127417_128808_129049; FEED_SIDS=231735_0309_22; plus_lsv=393c3756be30db54; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; plus_cv=1::m:49a3f4a6; Hm_lvt_12423ecbc0e2ca965d84259063d35238=1552141644; SE_LAUNCH=5%3A25869027_0%3A25869027; rsv_i=caa1rmCs0PpQpYzAbKe5ZOe7IPqcdsJjz9yFp5uzkt9iporuXUkXb39N0K1sIreyWXdiYvSq2TEnLzJMu1rSJdPaAoRZgSo; Hm_lpvt_12423ecbc0e2ca965d84259063d35238=1552141679; BDRCVFR[Usf3Hj-5366]=mk3SLVN4HKm; BDUSS=RJTzhiLTA2fkFSRmxiOGZYRVZEbVVMRU1FQmNsbHJDT2xRSHlPT1ZaV2NYS3RjQUFBQUFBJCQAAAAAAAAAAAEAAABEIlRw0LC2uTkwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJzPg1ycz4NcY2; BD_HOME=1; BDRCVFR[4r8LXJfwh-6]=8QV4RScte5tfjRLnjbdnHRsg17xUvNV; H_PS_645EC=1e9fAcA0iG5RIf%2Bi4FlV0onp3XbZL2oEMPgGRb1L2abD%2BGzuKThL6MgB%2Be%2FwIJ6wYVm0C2fUHAUL; H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; sug=3; sugstore=1; ORIGIN=2; bdime=0

body

<!DOCTYPE html>
<!--STATUS OK-->

Python复习笔记(十)Http协议--Web服务器-并发服务器

2. Python模拟返回固定页面的http服务器

import socket

def service_client(new_socket):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
request = new_socket.recv(1024)
print(request) # 2. 返回Http格式的数据,给浏览器
# 2.1 准备发送给浏览器数据---header
response = "HTTP/1.1 200 OK\r\n"
response += "\r\n"
# 2.2 准备发送给浏览器的数据---body
response += "haaaaaaaaaaa"
new_socket.send(response.encode("utf-8"))
new_socket.close() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128) while True:
# 4. 等待新客户端的链接
new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务
service_client(new_socket) # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()

Python复习笔记(十)Http协议--Web服务器-并发服务器

 返回指定html页面

import socket
import re def service_client(new_socket):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
request = new_socket.recv(1024).decode("utf-8")
# print(request) request_lines = request.splitlines()
print(request_lines) # GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name) # 2. 返回Http格式
# 2.1 准备发送给浏览器数据---header
response = "HTTP/1.1 200 OK\r\n"
response += "\r\n"
# 2.2 准备发送给浏览器的数据---body
# response += "haaaaaaaaaaa" f = open("./templates" + file_name, "rb")
html_content = f.read()
f.close() # 将response header发送给浏览器
new_socket.send(response.encode("utf-8"))
new_socket.send(html_content) # 关闭套接字
new_socket.close() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128) while True:
# 4. 等待新客户端的链接
new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务
service_client(new_socket) # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()

Python复习笔记(十)Http协议--Web服务器-并发服务器

3. 多进程/线程实现http服务器

3.1 多进程: 需要在主进程调用new_socket.close()

import socket
import re
import multiprocessing def service_client(new_socket):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
request = new_socket.recv(1024).decode("utf-8")
# print(request)
request_lines = request.splitlines()
print(request_lines)
# GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name)
# 2. 返回Http格式
# 2.1 准备发送给浏览器数据---header
response = "HTTP/1.1 200 OK\r\n"
response += "\r\n"
# 2.2 准备发送给浏览器的数据---body
# response += "haaaaaaaaaaa"
f = open("./templates" + file_name, "rb")
html_content = f.read()
f.close()
# 将response header发送给浏览器
new_socket.send(response.encode("utf-8"))
new_socket.send(html_content)
# 关闭套接字
new_socket.close() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128)
while True:
# 4. 等待新客户端的链接
new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务
p = multiprocessing.Process(target=service_client, args=(new_socket, ))
p.start()
new_socket.close() # 关闭监听套接字
tcp_server_socket.close()
if __name__ == '__main__':
main()
主进程里的: new_socket.close()作用
# fd: 文件描述符, 就是一个数字, 对应一个特殊的文件, 例如网络接口
# 到子进程时候, new_socket会被复制一份, 所以要在主进程里 调用 new_socket.close()
# 主进程不调用close时, 浏览器会一直的等待, 四次挥手就不会开始!

3.2 多线程: 无需在主线程调用new_socket.close()--否则报错

import socket
import re
import threading def service_client(new_socket):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
request = new_socket.recv(1024).decode("utf-8")
# print(request)
request_lines = request.splitlines()
print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name)
# 2. 返回Http格式
# 2.1 准备发送给浏览器数据---header
response = "HTTP/1.1 200 OK\r\n"
response += "\r\n"
# 2.2 准备发送给浏览器的数据---body
# response += "haaaaaaaaaaa"
f = open("./templates" + file_name, "rb")
html_content = f.read()
f.close()
# 将response header发送给浏览器
new_socket.send(response.encode("utf-8"))
new_socket.send(html_content)
# 关闭套接字
new_socket.close() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128)
while True:
# 4. 等待新客户端的链接
new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务
p = threading.Thread(target=service_client, args=(new_socket, ))
p.start() # new_socket.close() # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()

区别: 线程比线程耗费的资源小, 以下用协程实现, 会更方便

3.3 协程: 使用gevent实现http服务器

效率最高

import socket
import re
import gevent
from gevent import monkey monkey.patch_all() def service_client(new_socket):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
request = new_socket.recv(1024).decode("utf-8")
# print(request)
request_lines = request.splitlines()
print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name)
# 2. 返回Http格式
# 2.1 准备发送给浏览器数据---header
response = "HTTP/1.1 200 OK\r\n"
response += "\r\n"
# 2.2 准备发送给浏览器的数据---body
# response += "haaaaaaaaaaa"
f = open("./templates" + file_name, "rb")
html_content = f.read()
f.close()
# 将response header发送给浏览器
new_socket.send(response.encode("utf-8"))
new_socket.send(html_content)
# 关闭套接字
new_socket.close() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128)
while True:
# 4. 等待新客户端的链接
new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务
gevent.spawn(service_client, new_socket) # new_socket.close() # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()

4. Web静态服务器--单进程/线程/非堵塞模式

4.1 长连接和短连接

  • HTTP/1.1 长连接:  三次握手一次, 不断开的情况下, 通过一个Socket, 可以连续获取数据
  • HTTP/1.0 短连接

Python复习笔记(十)Http协议--Web服务器-并发服务器

短连接

import socket
import re
import gevent
from gevent import monkey
import time client_socker_list = list() def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 while True:
try:
new_socket, new_addr = tcp_server_socket.accept()
except Exception as ret:
print("没有新的客户端到来")
else:
print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
new_socket.setblocking(False)
client_socker_list.append(new_socket) for client_socket in client_socker_list:
try:
recv_data = new_socket.recv(1024)
except Exception as ret:
print(ret)
print("这个客户端没有发送过来数据")
else:
# 对方发送过来数据
if recv_data:
print("客户端发送过来了数据")
else:
# 对方调用close 导致了 recv返回
client_socker_list.remove(client_socket)
client_socket.close()
print("客户端已经关闭") time.sleep(1) # 关闭监听套接字
tcp_server_socket.close()
time.sleep(1) # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()
  • 核心: 用多线程/进程原因, 这个套接字必定堵塞,  堵塞到数据收到为止
  • 只要设置为非堵塞, 则可以实现 单进程单线程单任务, 还能做到多个客户端一起服务.

4.2 长连接来实现单进程/单线程--非堵塞模式(Content-Length)

上述实现都是基于短连接, 请求一次之后就断开连接了

import socket
import re
import gevent
from gevent import monkey
import time client_socker_list = list() def service_client(new_socket, request):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
# request = new_socket.recv(1024).decode("utf-8")
# print(request)
request_lines = request.splitlines()
print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name) # 2. 返回Http格式
try:
f = open("./templates" + file_name, "rb")
except:
response = "HTTP/1.1 404 NOT FOUND\r\n"
response += "\r\n"
response += "-----------File note found----------"
new_socket.send(response.encode("utf-8"))
else:
html_content = f.read()
f.close() # 2.1 准备发送给浏览器数据---header
response_body = html_content response_header = "HTTP/1.1 200 OK\r\n"
response_header += "Content-Length:%d\r\n" % len(response_body) # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
response_header += "\r\n" # 此时都是二进制字符串
response = response_header.encode("utf-8") + response_body new_socket.send(response)
# response body发送给 浏览器
new_socket.send(html_content) def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 while True:
try:
new_socket, new_addr = tcp_server_socket.accept()
except Exception as ret:
print("没有新的客户端到来")
else:
print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
new_socket.setblocking(False)
client_socker_list.append(new_socket) for client_socket in client_socker_list:
try:
recv_data = new_socket.recv(1024).decode("utf-8")
except Exception as ret:
print(ret)
print("这个客户端没有发送过来数据")
else:
# 对方发送过来数据
if recv_data:
print("客户端发送过来了数据")
service_client(client_socket, recv_data)
else:
client_socket.close()
client_socker_list.remove(client_socket)
print("客户端已经关闭") # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()

5. Web静态服务器--epoll

5.1 IO多路复用

select/epoll的好处: 在于单个process就可以同时处理多个网络连接的IO

它的基本原理就是 select, poll, epoll 这个function会不断的轮询所负责的所有Socket, 当某个Socket有数据到达了, 就通知用户进程

5.2 epoll简单模型

1. 减少了复制的过程

2. 以事件通知的方式(高效率, 注: 轮询的方式效率很低)

3. 有一个特殊的内存, 是应用程序和Kernel共享的, 在这个内存里要添加的,监听的,判断到来的套接字对应的文件描述符, 检测时不是轮询而是事件通知。

4. 使用了内存映射技术(mmap)技术

5. 采用基于事件的就绪通知方式

Python复习笔记(十)Http协议--Web服务器-并发服务器

5.3 epoll版的http服务器

#!/bin/python3
# -*- encoding=utf-8 -*- import socket
import re
import time
import select def service_client(new_socket, request):
"""为这个客户端返回数据"""
# 1. 接受浏览器发送过来的请求,即Http请求
# GET / HTTP/1.1
# .....
# request = new_socket.recv(1024).decode("utf-8")
# print(request)
request_lines = request.splitlines()
print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1
ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
if ret:
file_name = ret.group(1)
print("*"*50, file_name) # 2. 返回Http格式
try:
f = open("./templates" + file_name, "rb")
except:
response = "HTTP/1.1 404 NOT FOUND\r\n"
response += "\r\n"
response += "-----------File note found----------"
new_socket.send(response.encode("utf-8"))
else:
html_content = f.read()
f.close() # 2.1 准备发送给浏览器数据---header
response_body = html_content response_header = "HTTP/1.1 200 OK\r\n"
response_header += "Content-Length:%d\r\n" % len(response_body) # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
response_header += "\r\n" # 此时都是二进制字符串
response = response_header.encode("utf-8") + response_body new_socket.send(response)
# response body发送给 浏览器
new_socket.send(html_content) def main():
"""用来完成整体的控制"""
# 1. 创建套接字
tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定
# 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
# 下次运行程序时,可以立即执行
tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字
tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 # 创建一个epoll对象
epl = select.epoll() # 将监听套接字对应的fd(文件描述符)注册到epoll中
epl.register(tcp_server_socket.fileno(), select.EPOLLIN) # EPOLLIN:表示检测是否有输入 # {fd:socket, fd2:socket2}
fd_event_dict = dict()
while True:
# 返回值是个列表
fd_event_list = epl.poll() # poll默认会堵塞, 直到 os检测到数据到来, 通过事件通知方式 告诉这个程序, 此时才会解堵塞 # [(fd, event), (套接字对应的文件描述符, 这个文件描述符到底是什么事件 例如 可以调用recv接收等)]
for fd, event in fd_event_list:
# 4. 等待新客户端的链接
if fd == tcp_server_socket.fileno():
new_socket, client_addr = tcp_server_socket.accept()
epl.register(new_socket.fileno(), select.EPOLLIN)
fd_event_dict[new_socket.fileno()] = new_socket # fd:socket
elif event == select.EPOLLIN:
# 判断已经链接的客户端是否有数据发送过来
recv_data = fd_event_dict[fd].recv(1024).decode("utf-8")
if recv_data:
service_client(fd_event_dict[fd], recv_data)
else:
fd_event_dict[fd].close()
epl.unregister(fd)
del fd_event_dict[fd] # 关闭监听套接字
tcp_server_socket.close() if __name__ == '__main__':
main()