20 lines
616 B
Python
20 lines
616 B
Python
from openai import OpenAI
|
|
|
|
# 连接本地 vLLM 服务
|
|
client = OpenAI(
|
|
base_url="http://localhost:8000/v1", # 容器映射的地址
|
|
api_key="token-abc123", # 与你启动命令中的 --api-key 一致
|
|
)
|
|
|
|
# 发起对话
|
|
response = client.chat.completions.create(
|
|
model="gemma-4-E2B-it", # --served-model-name 指定的名称
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "请用中文介绍一下你自己"}
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=512,
|
|
)
|
|
|
|
print(response.choices[0].message.content) |