curl_cffi
-爬虫工作的老伙计。支持原生模拟浏览器 TLS/JA3 指纹。效率比requests
要高不少。支持异步请求,持久化请求
TLS指纹获取
https://tls.browserleaks.com/json
安装
pip install curl_cffi
异步封装-单次请求
class CurlClient:
def __init__(
self,
timeout: int = 30,
proxy: Optional[str] = None
):
self.timeout = timeout
self.proxy = proxy
async def _request(
self,
method,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[BrowserTypeLiteral] = None
):
try:
async with requests.AsyncSession(
proxy=proxy or self.proxy,
allow_redirects=allow_redirects,
timeout=timeout or self.timeout,
impersonate=impersonate
) as client:
response = await client.request(
method=method.upper(),
url=url,
params=params,
headers=headers,
data=data,
json=json
)
return response
except Exception as e:
print(f"Request failed: {e}")
return None
async def http_get(
self, url,
params: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[BrowserTypeLiteral] = None
):
"""
:param url: 请求url
:param params: 请求参数
:param headers: 请求头
:param proxy: 请求代理 http://xxx.xxx.xxx.xxx:xxxx
:param allow_redirects: 允许重定向
:param timeout: 请求超时时间
:param impersonate: 浏览器指纹
:return:
"""
return await self._request(
method="GET",
url=url,
params=params,
headers=headers,
proxy=proxy,
allow_redirects=allow_redirects,
timeout=timeout,
impersonate=impersonate
)
async def http_post(self,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[BrowserTypeLiteral] = None
):
"""
:param url: 请求url
:param params: 请求参数
:param data: 请求表单
:param json: 请求json
:param headers: 请求头
:param proxy: 请求代理 http://xxx.xxx.xxx.xxx:xxxx
:param allow_redirects: 允许重定向
:param timeout: 请求超时时间
:param impersonate: 浏览器指纹
:return:
"""
return await self._request(
method="POST",
url=url,
params=params,
data=data,
json=json,
headers=headers,
proxy=proxy,
allow_redirects=allow_redirects,
timeout=timeout,
impersonate=impersonate
)
异步封装-持久化session
class CurlLongClient:
def __init__(
self,
timeout: int = 30,
proxy: Optional[str] = None
):
self.timeout = timeout
self.proxy = proxy
self.client = requests.AsyncSession(
proxy=self.proxy,
timeout=self.timeout
)
async def _request(
self,
method,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
allow_redirects: bool = True,
impersonate: Optional[BrowserTypeLiteral] = None
):
try:
response = await self.client.request(
method=method.upper(),
url=url,
params=params,
headers=headers,
data=data,
json=json,
allow_redirects=allow_redirects,
impersonate=impersonate
)
return response
except Exception as e:
print(f"Request failed: {e}")
return None
async def http_get(
self, url,
params: dict = None,
headers: dict = None,
allow_redirects: bool = True,
impersonate: Optional[BrowserTypeLiteral] = None
):
"""
:param url: 请求url
:param params: 请求参数
:param headers: 请求头
:param allow_redirects: 允许重定向
:param impersonate: 浏览器指纹
:return:
"""
return await self._request(
method="GET",
url=url,
params=params,
headers=headers,
allow_redirects=allow_redirects,
impersonate=impersonate
)
async def http_post(self,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[BrowserTypeLiteral] = None
):
"""
:param url: 请求url
:param params: 请求参数
:param data: 请求表单
:param json: 请求json
:param headers: 请求头
:param allow_redirects: 允许重定向
:param impersonate: 浏览器指纹
:return:
"""
return await self._request(
method="POST",
url=url,
params=params,
data=data,
json=json,
headers=headers,
allow_redirects=allow_redirects,
impersonate=impersonate
)
async def close(self):
"""在请求完成后关闭客户端"""
await self.client.close()
同步封装-单次请求
class SyncCurlClient:
def __init__(
self,
timeout: int = 30,
proxy: Optional[str] = None
):
self.timeout = timeout
self.proxy = proxy
def _request(
self,
method,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[str] = None
):
try:
with requests.Session(proxy=proxy, allow_redirects=allow_redirects, impersonate=impersonate) as client:
response = client.request(
method=method.upper(),
url=url,
params=params,
headers=headers,
data=data,
json=json,
timeout=timeout or self.timeout
)
return response
except Exception as e:
print(f"Request failed: {e}")
return None
def http_get(
self, url,
params: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[str] = None
):
"""Synchronous GET request"""
return self._request(
method="GET",
url=url,
params=params,
headers=headers,
proxy=proxy,
timeout=timeout,
allow_redirects=allow_redirects,
impersonate=impersonate
)
def http_post(self,
url,
params: dict = None,
data: dict = None,
json: dict = None,
headers: dict = None,
proxy: Optional[str] = None,
allow_redirects: bool = True,
timeout: int = 30,
impersonate: Optional[str] = None
):
"""Synchronous POST request"""
return self._request(
method="POST",
url=url,
params=params,
data=data,
json=json,
headers=headers,
proxy=proxy,
timeout=timeout,
allow_redirects=allow_redirects,
impersonate=impersonate
)
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END
暂无评论内容