一個簡單的client

import socket
import ssl

def parsed_url(url):
"""
解析 url 返回 (protocol host port path)
"""
# 檢查協(xié)議
protocol = 'http'
if url[:7] == 'http://':
u = url.split('://')[1]
elif url[:8] == 'https://':
protocol = 'https'
u = url.split('://')[1]
else:
# '://' 定位 然后取第一個 / 的位置來切片
u = url

# https://g.cn:1234/hello
# g.cn:1234/hello

# 檢查默認(rèn) path
i = u.find('/')
if i == -1:
    host = u
    path = '/'
else:
    host = u[:i]
    path = u[i:]

# 檢查端口
port_dict = {
    'http': 80,
    'https': 443,
}
# 默認(rèn)端口
port = port_dict[protocol]
# if host.find(':') != -1:
if ':' in host:
    h = host.split(':')
    host = h[0]
    port = int(h[1])

return protocol, host, port, path

def socket_by_protocol(protocol):
"""
根據(jù)協(xié)議返回一個 socket 實例
"""
if protocol == 'http':
s = socket.socket()
else:
# HTTPS 協(xié)議需要使用 ssl.wrap_socket 包裝一下原始的 socket
s = ssl.wrap_socket(socket.socket())
return s

def response_by_socket(s):
"""
參數(shù)是一個 socket 實例
返回這個 socket 讀取的所有數(shù)據(jù)
"""
response = b''
buffer_size = 1024
while True:
r = s.recv(buffer_size)
if len(r) == 0:
break
response += r
return response

def parsed_response(r):
"""
把 response 解析出 狀態(tài)碼 headers body 返回
狀態(tài)碼是 int
headers 是 dict
body 是 str
"""
header, body = r.split('\r\n\r\n', 1)
h = header.split('\r\n')
status_code = h[0].split()[1]
status_code = int(status_code)

headers = {}
for line in h[1:]:
    k, v = line.split(': ')
    headers[k] = v
return status_code, headers, body

復(fù)雜的邏輯全部封裝成函數(shù)

def get(url):
"""
用 GET 請求 url 并返回響應(yīng)
"""
protocol, host, port, path = parsed_url(url)
# 寫 what 不寫 how
s = socket_by_protocol(protocol)
s.connect((host, port))

request = 'GET {} HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n'.format(path, host)
encoding = 'utf-8'
s.send(request.encode(encoding))

response = response_by_socket(s)
print('get response, ', response)
r = response.decode(encoding)

status_code, headers, body = parsed_response(r)
if status_code in [301, 302]:
    url = headers['Location']
    return get(url)

return status_code, headers, body

def main():
url = 'http://movie.douban.com/top250'
status_code, headers, body = get(url)
print('main', status_code)
# print('main headers ({})'.format(headers))
# print('main body', body)

以下 test 開頭的函數(shù)是單元測試

def test_parsed_url():
"""
parsed_url 函數(shù)很容易出錯, 所以我們寫測試函數(shù)來運行看檢測是否正確運行
"""
http = 'http'
https = 'https'
host = 'g.cn'
path = '/'
test_items = [
('http://chong.cn', (http, host, 80, path)),
('http://chong.cn/', (http, host, 80, path)),
('http://chong.cn:90', (http, host, 90, path)),
('http://chong.cn:90/', (http, host, 90, path)),
('https://chong.cn', (https, host, 443, path)),
('https://chong.cn:233/', (https, host, 233, path)),
]
for t in test_items:
url, expected = t
u = parsed_url(url)
# 如果斷言成功, 條件成立, 則通過測試
# 否則為測試失敗, 中斷程序報錯
e = "parsed_url ERROR, ({}) ({}) ({})".format(url, u, expected)
assert u == expected, e

def test_parsed_response():
"""
測試是否能正確解析響應(yīng)
"""
# NOTE, 行末的 \ 表示連接多行字符串
response = 'HTTP/1.1 301 Moved Permanently\r\n'
'Content-Type: text/html\r\n'
'Location: https://movie.douban.com/top250\r\n'
'Content-Length: 178\r\n\r\n'
'test body'
status_code, header, body = parsed_response(response)
assert status_code == 301
assert len(list(header.keys())) == 3
assert body == 'test body'

def test_get():
"""
測試是否能正確處理 HTTP 和 HTTPS
"""
urls = [
'http://movie.douban.com/top250',
'https://movie.douban.com/top250',
]
# 這里就直接調(diào)用了 get 如果出錯就會掛, 測試得比較簡單
for u in urls:
get(u)

def test():
"""
用于測試的主函數(shù)
"""
test_parsed_url()
# test_get()
# test_parsed_response()

if name == 'main':
# test()
main()

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容