一、文档
1.1 官方文档
https://requests.readthedocs.io/projects/cn/zh_CN/latest/
1.2 快速上手
https://requests.readthedocs.io/projects/cn/zh_CN/latest/user/quickstart.html
二、安装
三、属性及类型
- 类型:<class ‘requests.models.Response’>
- r.txt : 获取网站源码
- r.encoding : 访问或定制编码方式
- r.url : 获取请求头的url
- r.content : 响应的字节类型
- r.status_code : 响应的状态码
- r.headers : 响应的头信息
案例
import requests
url = 'http://www.baidu.com'
response = requests.get(url)
print(type(response))
response.encoding = 'utf-8'
print(response.text)
print(response.url)
print(response.content)
print(response.status_code)
print(response.headers)
|
四、get请求
案例分析
import requests
url = 'http://www.baidu.com/s?'
headers = { 'Cookie': 'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BD_UPN=12314753; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; B64_BOT=1; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; COOKIE_SESSION=113649_2_3_5_3_4_1_0_3_2_0_0_2739_15_0_0_1688194489_1688192054_1688308136%7C6%23215_3_1688192049%7C2; BD_HOME=1; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; BD_CK_SAM=1; PSINO=5; delPer=0; sug=3; sugstore=0; ORIGIN=2; bdime=0; H_PS_645EC=5e541XNlvvE%2FkL3KCc6m0SGXL5515PiQb9HuxciMePSNsz%2FgCsEc4nVX5wo; baikeVisitId=8fb4715c-9cd5-4de2-98b7-b88eae7d13d8', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', }
data = { 'wd': '北京' }
response = requests.get(url=url, params=data, headers=headers)
response.encoding = 'utf-8'
content = response.text
print(content)
|
- 直接调用requests.get(url=url, params=data, headers=headers)获取响应体
- 参数使用params传递,url中‘?’可以省略
- 参数无需urlencode编码
- 不需要对请求对象进行定制
五、post请求
案例分析
import requests
url = 'https://fanyi.baidu.com/sug'
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 'Cookie': 'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=5; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1688342307; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1688342317; ab_sr=1.0.1_NTMzYTdkYjMxOThhYzAyODQzMWE1N2I2YzFjNWE3MmE0ODU4MzlkMzA5NzhhMmZjYTU3YjNiMWVjZWY0NjYyYjRlMzkwZDU2OGIyOWZlZGFmMzRjZTY1NzViNGRhYzM5YWRlMDcyYThlZmRhNWY1MzBhMjRmZDUzNmQwNmI2NWNiZGE2ZjhiODMyYjI1ODRmZWI1ZjU5NjA2NDZjM2ZmMmM5OGIwZjQ5ZGYyMDZjNTVkN2IwZDU2M2I4MGQxYjQ3' }
data = { 'kw': 'eye' }
response = requests.post(url=url, data=data, headers=headers) response.encoding = 'utf-8' content = response.text
import json
obj = json.loads(content)
print(obj)
|
- post请求不需要编码
- post请求参数是data
- 不需要请求对象的定制
六、代理
案例
import requests
url = 'http://www.baidu.com/s'
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 'Cookie': 'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=5; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1688342307; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1688342317; ab_sr=1.0.1_NTMzYTdkYjMxOThhYzAyODQzMWE1N2I2YzFjNWE3MmE0ODU4MzlkMzA5NzhhMmZjYTU3YjNiMWVjZWY0NjYyYjRlMzkwZDU2OGIyOWZlZGFmMzRjZTY1NzViNGRhYzM5YWRlMDcyYThlZmRhNWY1MzBhMjRmZDUzNmQwNmI2NWNiZGE2ZjhiODMyYjI1ODRmZWI1ZjU5NjA2NDZjM2ZmMmM5OGIwZjQ5ZGYyMDZjNTVkN2IwZDU2M2I4MGQxYjQ3', }
data = { 'wd': 'ip' } proxy = { 'http': '101.43.93.67:7890' }
response = requests.get(url=url, params=data, headers=headers, proxies=proxy)
content = response.text
with open('ip_proxy.html', 'w', encoding='utf-8') as fp: fp.write(content)
|
七、cookie验证码登录
7.1 手动版
import requests
url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', }
response = requests.get(url=url, headers=headers) content = response.text
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, 'lxml')
viewstate = soup.select_one('#__VIEWSTATE').attrs.get('value') print(viewstate)
viewstategenerator = soup.select_one('#__VIEWSTATEGENERATOR').attrs.get('value') print(viewstategenerator)
code = soup.select_one('#imgCode').attrs.get('src') code_url = 'https://so.gushiwen.cn' + code
session = requests.session() response_code = session.get(code_url)
content_code = response_code.content
with open('code.jpg','wb') as fp: fp.write(content_code)
code_name = input('请输入你的验证码:')
url_post = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'
data_post = { '__VIEWSTATE': viewstate, '__VIEWSTATEGENERATOR': viewstategenerator, 'from': 'http://so.gushiwen.cn/user/collect.aspx', 'email': 'yt000000x@163.com', 'pwd': 'Zyt18834162690.', 'code': code_name, 'denglu': '登录', }
response_post = session.post(url=url, headers=headers, data=data_post)
content_post = response_post.text
with open('gushiwen.html', 'w', encoding='utf-8') as fp: fp.write(content_post)
|
7.2 自动版
超级鹰:付费