Files
pj_llama/local_api_test.py
aken1023 8a929936ad Initial commit with Llama API client and docs
Add Python scripts for Llama API chat clients, endpoint testing, and quick tests. Include documentation (README, CONTRIBUTING, 操作指南), license, and .gitignore. Supports multiple endpoints and models for OpenAI-compatible Llama API usage.
2025-09-19 21:44:02 +08:00

243 lines
7.1 KiB
Python

"""
內網 Llama API 測試程式
使用 OpenAI 相容格式連接到本地 API 端點
"""
from openai import OpenAI
import requests
import json
from datetime import datetime
# API 配置
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
# 內網端點列表
LOCAL_ENDPOINTS = [
"http://192.168.0.6:21180/v1",
"http://192.168.0.6:21181/v1",
"http://192.168.0.6:21182/v1",
"http://192.168.0.6:21183/v1"
]
# 可用模型
MODELS = [
"gpt-oss-120b",
"deepseek-r1-671b",
"qwen3-embedding-8b"
]
def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"):
"""使用 requests 測試端點"""
print(f"\n[使用 requests 測試]")
print(f"端點: {endpoint}")
print(f"模型: {model}")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": model,
"messages": [
{"role": "user", "content": "Say 'Hello, I am working!' if you can see this."}
],
"temperature": 0.7,
"max_tokens": 50
}
try:
response = requests.post(
f"{endpoint}/chat/completions",
headers=headers,
json=data,
timeout=10
)
print(f"HTTP 狀態碼: {response.status_code}")
if response.status_code == 200:
result = response.json()
if 'choices' in result:
content = result['choices'][0]['message']['content']
print(f"[SUCCESS] AI 回應: {content}")
return True
else:
print("[ERROR] 回應格式不正確")
else:
print(f"[ERROR] HTTP {response.status_code}")
if response.status_code != 502: # 避免顯示 HTML 錯誤頁
print(f"詳情: {response.text[:200]}")
except requests.exceptions.ConnectTimeout:
print("[TIMEOUT] 連接超時")
except requests.exceptions.ConnectionError:
print("[CONNECTION ERROR] 無法連接到端點")
except Exception as e:
print(f"[ERROR] {str(e)[:100]}")
return False
def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"):
"""使用 OpenAI SDK 測試端點"""
print(f"\n[使用 OpenAI SDK 測試]")
print(f"端點: {endpoint}")
print(f"模型: {model}")
try:
client = OpenAI(
api_key=API_KEY,
base_url=endpoint,
timeout=10.0
)
response = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Hello, please respond with a simple greeting."}
],
temperature=0.7,
max_tokens=50
)
content = response.choices[0].message.content
print(f"[SUCCESS] AI 回應: {content}")
return True, client
except Exception as e:
error_str = str(e)
if "Connection error" in error_str:
print("[CONNECTION ERROR] 無法連接到端點")
elif "timeout" in error_str.lower():
print("[TIMEOUT] 請求超時")
elif "502" in error_str:
print("[ERROR] 502 Bad Gateway")
else:
print(f"[ERROR] {error_str[:100]}")
return False, None
def find_working_endpoint():
"""尋找可用的端點"""
print("="*60)
print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)
working_endpoints = []
for endpoint in LOCAL_ENDPOINTS:
print(f"\n測試端點: {endpoint}")
print("-"*40)
# 先用 requests 快速測試
if test_endpoint_with_requests(endpoint):
working_endpoints.append(endpoint)
print(f"[OK] 端點 {endpoint} 可用!")
else:
# 再用 OpenAI SDK 測試
success, _ = test_endpoint_with_openai(endpoint)
if success:
working_endpoints.append(endpoint)
print(f"[OK] 端點 {endpoint} 可用!")
return working_endpoints
def interactive_chat(endpoint, model="gpt-oss-120b"):
"""互動式對話"""
print(f"\n連接到: {endpoint}")
print(f"使用模型: {model}")
print("="*60)
print("開始對話 (輸入 'exit' 結束)")
print("="*60)
client = OpenAI(
api_key=API_KEY,
base_url=endpoint
)
messages = []
while True:
user_input = input("\n你: ").strip()
if user_input.lower() in ['exit', 'quit']:
print("對話結束")
break
if not user_input:
continue
messages.append({"role": "user", "content": user_input})
try:
print("\nAI 思考中...")
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=1000
)
ai_response = response.choices[0].message.content
print(f"\nAI: {ai_response}")
messages.append({"role": "assistant", "content": ai_response})
except Exception as e:
print(f"\n[ERROR] {str(e)[:100]}")
def main():
# 尋找可用端點
working_endpoints = find_working_endpoint()
print("\n" + "="*60)
print("測試結果總結")
print("="*60)
if working_endpoints:
print(f"\n找到 {len(working_endpoints)} 個可用端點:")
for i, endpoint in enumerate(working_endpoints, 1):
print(f" {i}. {endpoint}")
# 選擇端點
if len(working_endpoints) == 1:
selected_endpoint = working_endpoints[0]
print(f"\n自動選擇唯一可用端點: {selected_endpoint}")
else:
print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):")
choice = input().strip()
try:
idx = int(choice) - 1
if 0 <= idx < len(working_endpoints):
selected_endpoint = working_endpoints[idx]
else:
selected_endpoint = working_endpoints[0]
except:
selected_endpoint = working_endpoints[0]
# 選擇模型
print("\n可用模型:")
for i, model in enumerate(MODELS, 1):
print(f" {i}. {model}")
print("\n請選擇模型 (1-3, 預設: 1):")
choice = input().strip()
if choice == "2":
selected_model = MODELS[1]
elif choice == "3":
selected_model = MODELS[2]
else:
selected_model = MODELS[0]
# 開始對話
interactive_chat(selected_endpoint, selected_model)
else:
print("\n[ERROR] 沒有找到可用的端點")
print("\n可能的原因:")
print("1. 內網 API 服務未啟動")
print("2. 防火牆阻擋了連接")
print("3. IP 地址或端口設定錯誤")
print("4. 不在同一個網路環境")
if __name__ == "__main__":
main()