import httpx from typing import Callable, Awaitable, Optional # 流式请求LLm的方法 async def stream_post_request( url, headers=None, json=None, chunk_handler: Optional[Callable[[bytes], Awaitable[bytes]]] = None ): async with httpx.AsyncClient(http2=True) as client: async with client.stream( method="POST", url=url, headers=headers, json=json ) as response: async for chunk in response.aiter_bytes(): if chunk_handler: # 支持异步处理 chunk = await chunk_handler(chunk) yield chunk