Python中aiohttp如何使用-Python教程-PHP中文网

Python中aiohttp如何使用

WBOY

发布： 2023-05-11 09:22:05

转载

1968 人浏览过

1.定义

aiohttp 是一个基于 asyncio 的异步 HTTP 网络模块，它既提供了服务端，又提供了客户端

2.基本使用

import aiohttp
import asyncio


async def fetch(session, url):
    # 声明一个支持异步的上下文管理器
    async with session.get(url) as response:
        # response.text()是coroutine对象 需要加await
        return await response.text(), response.status


async def main():
    # 声明一个支持异步的上下文管理器
    async with aiohttp.ClientSession() as session:
        html, status = await fetch(session, &#39;https://cuiqingcai.com&#39;)
        print(f&#39;html: {html[:100]}...&#39;)
        print(f&#39;status: {status}&#39;)


if __name__ == &#39;__main__&#39;:
    #  Python 3.7 及以后,不需要显式声明事件循环,可以使用 asyncio.run(main())来代替最后的启动操作
    asyncio.get_event_loop().run_until_complete(main())

登录后复制

3.请求类型

session.post(&#39;http://httpbin.org/post&#39;, data=b&#39;data&#39;)
session.put(&#39;http://httpbin.org/put&#39;, data=b&#39;data&#39;)
session.delete(&#39;http://httpbin.org/delete&#39;)
session.head(&#39;http://httpbin.org/get&#39;)
session.options(&#39;http://httpbin.org/get&#39;)
session.patch(&#39;http://httpbin.org/patch&#39;, data=b&#39;data&#39;)

登录后复制

4.相应字段

print(&#39;status:&#39;, response.status) # 状态码
print(&#39;headers:&#39;, response.headers)# 响应头
print(&#39;body:&#39;, await response.text())# 响应体
print(&#39;bytes:&#39;, await response.read())# 响应体二进制内容
print(&#39;json:&#39;, await response.json())# 响应体json数据

登录后复制

5.超时设置

import aiohttp
import asyncio
async def main():
   #设置 1 秒的超时 
   timeout = aiohttp.ClientTimeout(total=1)
   async with aiohttp.ClientSession(timeout=timeout) as session:
       async with session.get(&#39;https://httpbin.org/get&#39;) as response:
           print(&#39;status:&#39;, response.status)
if __name__ == &#39;__main__&#39;:
   asyncio.get_event_loop().run_until_complete(main())

登录后复制

6.并发限制

import asyncio
import aiohttp
# 声明最大并发量为5
CONCURRENCY = 5
semaphore = asyncio.Semaphore(CONCURRENCY)
URL = &#39;https://www.baidu.com&#39;

session = None
async def scrape_api():
   async with semaphore:
       print(&#39;scraping&#39;, URL)
       async with session.get(URL) as response:
           await asyncio.sleep(1)
           return await response.text()
    
async def main():
   global session
   session = aiohttp.ClientSession()
   scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
   await asyncio.gather(*scrape_index_tasks)
if __name__ == &#39;__main__&#39;:
   asyncio.get_event_loop().run_until_complete(main())

登录后复制

7.实际应用

import asyncio
import aiohttp
import logging
import json
logging.basicConfig(level=logging.INFO,
                    format=&#39;%(asctime)s - %(levelname)s: %(message)s&#39;)
INDEX_URL = &#39;https://dynamic5.scrape.center/api/book/?limit=18&offset={offset}&#39;
DETAIL_URL = &#39;https://dynamic5.scrape.center/api/book/{id}&#39;
PAGE_SIZE = 18
PAGE_NUMBER = 100
CONCURRENCY = 5

semaphore = asyncio.Semaphore(CONCURRENCY)
session = None

async def scrape_api(url):
   async with semaphore:
       try:
           logging.info(&#39;scraping %s&#39;, url)
           async with session.get(url) as response:
               return await response.json()
       except aiohttp.ClientError:
           logging.error(&#39;error occurred while scraping %s&#39;, url, exc_info=True)

async def scrape_index(page):
   url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
   return await scrape_api(url)

async def main():
   global session
   session = aiohttp.ClientSession()
   scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
   results = await asyncio.gather(*scrape_index_tasks)
   logging.info(&#39;results %s&#39;, json.dumps(results, ensure_ascii=False, indent=2))
   

if __name__ == &#39;__main__&#39;:
   asyncio.get_event_loop().run_until_complete(main())

登录后复制

以上是Python中aiohttp如何使用的详细内容。更多信息请关注PHP中文网其他相关文章！