一、文档

1.1 官方文档

https://requests.readthedocs.io/projects/cn/zh_CN/latest/

1.2 快速上手

https://requests.readthedocs.io/projects/cn/zh_CN/latest/user/quickstart.html

二、安装

pip install requests

三、属性及类型

  1. 类型:<class ‘requests.models.Response’>
  2. r.txt : 获取网站源码
  3. r.encoding : 访问或定制编码方式
  4. r.url : 获取请求头的url
  5. r.content : 响应的字节类型
  6. r.status_code : 响应的状态码
  7. r.headers : 响应的头信息

案例

# _*_ coding : utf-8 _*_
# @Time : 2023/7/3 0:45
# @Author : bamboo
# @File : requests_base_use
# @Project : py-pro

import requests

url = 'http://www.baidu.com'

response = requests.get(url)

# 一个类型和六个属性
print(type(response))
# <class 'requests.models.Response'>

# 设置响应编码格式
response.encoding = 'utf-8'

# 字符串形式访问网页源码
print(response.text)

# 返回url路径
print(response.url)

# 返回网页的二进制源码
print(response.content)

# 返回状态码
print(response.status_code)

# 返回响应头
print(response.headers)

四、get请求

案例分析

# _*_ coding : utf-8 _*_
# @Time : 2023/7/3 7:31
# @Author : bamboo
# @File : requests_get
# @Project : py-pro

import requests

url = 'http://www.baidu.com/s?'

headers = {
'Cookie': 'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BD_UPN=12314753; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; B64_BOT=1; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; COOKIE_SESSION=113649_2_3_5_3_4_1_0_3_2_0_0_2739_15_0_0_1688194489_1688192054_1688308136%7C6%23215_3_1688192049%7C2; BD_HOME=1; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; BD_CK_SAM=1; PSINO=5; delPer=0; sug=3; sugstore=0; ORIGIN=2; bdime=0; H_PS_645EC=5e541XNlvvE%2FkL3KCc6m0SGXL5515PiQb9HuxciMePSNsz%2FgCsEc4nVX5wo; baikeVisitId=8fb4715c-9cd5-4de2-98b7-b88eae7d13d8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}

data = {
'wd': '北京'
}
# 使用get请求
response = requests.get(url=url, params=data, headers=headers)
# 设置字符编码
response.encoding = 'utf-8'
# 获取网页源码
content = response.text

print(content)
  1. 直接调用requests.get(url=url, params=data, headers=headers)获取响应体
  2. 参数使用params传递,url中‘?’可以省略
  3. 参数无需urlencode编码
  4. 不需要对请求对象进行定制

五、post请求

案例分析

# _*_ coding : utf-8 _*_
# @Time : 2023/7/3 7:57
# @Author : bamboo
# @File : requests_post
# @Project : py-pro

import requests

url = 'https://fanyi.baidu.com/sug'

headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Cookie':
'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=5; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1688342307; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1688342317; ab_sr=1.0.1_NTMzYTdkYjMxOThhYzAyODQzMWE1N2I2YzFjNWE3MmE0ODU4MzlkMzA5NzhhMmZjYTU3YjNiMWVjZWY0NjYyYjRlMzkwZDU2OGIyOWZlZGFmMzRjZTY1NzViNGRhYzM5YWRlMDcyYThlZmRhNWY1MzBhMjRmZDUzNmQwNmI2NWNiZGE2ZjhiODMyYjI1ODRmZWI1ZjU5NjA2NDZjM2ZmMmM5OGIwZjQ5ZGYyMDZjNTVkN2IwZDU2M2I4MGQxYjQ3'
}

data = {
'kw': 'eye'
}

response = requests.post(url=url, data=data, headers=headers)
response.encoding = 'utf-8'
content = response.text

import json

obj = json.loads(content)

print(obj)
  1. post请求不需要编码
  2. post请求参数是data
  3. 不需要请求对象的定制

六、代理

案例

# _*_ coding : utf-8 _*_
# @Time : 2023/7/3 8:35
# @Author : bamboo
# @File : requests_proxy
# @Project : py-pro

import requests

url = 'http://www.baidu.com/s'

headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Cookie':
'BIDUPSID=06131A5CB3B3012812267B71938D92FF; PSTM=1679290628; BDUSS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BDUSS_BFESS=XFtamFsanNsZWJWTFdoMUhTSXNKWXJxMnNGeVVqblIxTnVUd05zNlc3S0NrbnBrRVFBQUFBJCQAAAAAAAAAAAEAAADiwVCv0uCzvl~csgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIIFU2SCBVNkM; BAIDUID=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; BAIDUID_BFESS=4E8EC46552D9E18C72A862024180E013:SL=0:NR=10:FG=1; ZFY=BXOBRbhZzq3:AgQrE2UdwmoKEVxRJv4XVrrWQOLZv9lE:C; BA_HECTOR=8500a1al200k052104850l8d1ia32d91p; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=36553_38857_38795_38958_38955_38832_38920_38806_38989_26350; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; PSINO=5; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1688342307; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1688342317; ab_sr=1.0.1_NTMzYTdkYjMxOThhYzAyODQzMWE1N2I2YzFjNWE3MmE0ODU4MzlkMzA5NzhhMmZjYTU3YjNiMWVjZWY0NjYyYjRlMzkwZDU2OGIyOWZlZGFmMzRjZTY1NzViNGRhYzM5YWRlMDcyYThlZmRhNWY1MzBhMjRmZDUzNmQwNmI2NWNiZGE2ZjhiODMyYjI1ODRmZWI1ZjU5NjA2NDZjM2ZmMmM5OGIwZjQ5ZGYyMDZjNTVkN2IwZDU2M2I4MGQxYjQ3',
}

data = {
'wd': 'ip'
}
proxy = {
'http': '101.43.93.67:7890'
}

response = requests.get(url=url, params=data, headers=headers, proxies=proxy)

content = response.text

with open('ip_proxy.html', 'w', encoding='utf-8') as fp:
fp.write(content)

七、cookie验证码登录

7.1 手动版

# _*_ coding : utf-8 _*_
# @Time : 2023/7/3 12:36
# @Author : bamboo
# @File : requests_cookie_gushiwenwang
# @Project : py-pro

import requests

url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}

# __VIEWSTATE: FVud1hwxOclqROq1vkkinGXLUoLE2ZENHw1bPLzxkLQZVGvTd+gqo6iYe4iBo0gAgzvSvQkMiz1flYcarhJhNuJtVrtO7u9/UzQEKRtmbR31ALBS4hslUDPTVITAvxHzhQEVxRAEHa8/Le+CZA1PPVbo1QY=
# __VIEWSTATEGENERATOR: C93BE1AE
# from: http://so.gushiwen.cn/user/collect.aspx
# email: 55@qq.com
# pwd: 123456
# code: 2sbo
# denglu: 登录

# 获取页面源码
response = requests.get(url=url, headers=headers)
content = response.text

# 解析页面源码 __VIEWSTATE __VIEWSTATEGENERATOR
from bs4 import BeautifulSoup

soup = BeautifulSoup(content, 'lxml')

# 获取__VIEWSTATE
viewstate = soup.select_one('#__VIEWSTATE').attrs.get('value')
print(viewstate)

# 获取__VIEWSTATEGENERATOR
viewstategenerator = soup.select_one('#__VIEWSTATEGENERATOR').attrs.get('value')
print(viewstategenerator)

# 获取验证码图片
code = soup.select_one('#imgCode').attrs.get('src')
code_url = 'https://so.gushiwen.cn' + code

# 保存验证码
# 此方法保存会连续两次访问验证码,导致验证码不一致,使用session解决问题
# import urllib.request
# urllib.request.urlretrieve(code_url, 'code.jpg')
# 使用以下正确方式
session = requests.session()
response_code = session.get(code_url)
# 接收二进制验证码图片数据
content_code = response_code.content
# 写出验证码图片
with open('code.jpg','wb') as fp:
fp.write(content_code)


code_name = input('请输入你的验证码:')

url_post = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

data_post = {
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategenerator,
'from': 'http://so.gushiwen.cn/user/collect.aspx',
'email': 'yt000000x@163.com',
'pwd': 'Zyt18834162690.',
'code': code_name,
'denglu': '登录',
}

response_post = session.post(url=url, headers=headers, data=data_post)

content_post = response_post.text

with open('gushiwen.html', 'w', encoding='utf-8') as fp:
fp.write(content_post)

7.2 自动版

超级鹰:付费