Add Python scripts for Llama API chat clients, endpoint testing, and quick tests. Include documentation (README, CONTRIBUTING, 操作指南), license, and .gitignore. Supports multiple endpoints and models for OpenAI-compatible Llama API usage.
243 lines
7.1 KiB
Python
243 lines
7.1 KiB
Python
"""
|
|
內網 Llama API 測試程式
|
|
使用 OpenAI 相容格式連接到本地 API 端點
|
|
"""
|
|
|
|
from openai import OpenAI
|
|
import requests
|
|
import json
|
|
from datetime import datetime
|
|
|
|
# API 配置
|
|
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
|
|
|
|
# 內網端點列表
|
|
LOCAL_ENDPOINTS = [
|
|
"http://192.168.0.6:21180/v1",
|
|
"http://192.168.0.6:21181/v1",
|
|
"http://192.168.0.6:21182/v1",
|
|
"http://192.168.0.6:21183/v1"
|
|
]
|
|
|
|
# 可用模型
|
|
MODELS = [
|
|
"gpt-oss-120b",
|
|
"deepseek-r1-671b",
|
|
"qwen3-embedding-8b"
|
|
]
|
|
|
|
def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"):
|
|
"""使用 requests 測試端點"""
|
|
print(f"\n[使用 requests 測試]")
|
|
print(f"端點: {endpoint}")
|
|
print(f"模型: {model}")
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {API_KEY}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
data = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "user", "content": "Say 'Hello, I am working!' if you can see this."}
|
|
],
|
|
"temperature": 0.7,
|
|
"max_tokens": 50
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{endpoint}/chat/completions",
|
|
headers=headers,
|
|
json=data,
|
|
timeout=10
|
|
)
|
|
|
|
print(f"HTTP 狀態碼: {response.status_code}")
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if 'choices' in result:
|
|
content = result['choices'][0]['message']['content']
|
|
print(f"[SUCCESS] AI 回應: {content}")
|
|
return True
|
|
else:
|
|
print("[ERROR] 回應格式不正確")
|
|
else:
|
|
print(f"[ERROR] HTTP {response.status_code}")
|
|
if response.status_code != 502: # 避免顯示 HTML 錯誤頁
|
|
print(f"詳情: {response.text[:200]}")
|
|
|
|
except requests.exceptions.ConnectTimeout:
|
|
print("[TIMEOUT] 連接超時")
|
|
except requests.exceptions.ConnectionError:
|
|
print("[CONNECTION ERROR] 無法連接到端點")
|
|
except Exception as e:
|
|
print(f"[ERROR] {str(e)[:100]}")
|
|
|
|
return False
|
|
|
|
def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"):
|
|
"""使用 OpenAI SDK 測試端點"""
|
|
print(f"\n[使用 OpenAI SDK 測試]")
|
|
print(f"端點: {endpoint}")
|
|
print(f"模型: {model}")
|
|
|
|
try:
|
|
client = OpenAI(
|
|
api_key=API_KEY,
|
|
base_url=endpoint,
|
|
timeout=10.0
|
|
)
|
|
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=[
|
|
{"role": "user", "content": "Hello, please respond with a simple greeting."}
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=50
|
|
)
|
|
|
|
content = response.choices[0].message.content
|
|
print(f"[SUCCESS] AI 回應: {content}")
|
|
return True, client
|
|
|
|
except Exception as e:
|
|
error_str = str(e)
|
|
if "Connection error" in error_str:
|
|
print("[CONNECTION ERROR] 無法連接到端點")
|
|
elif "timeout" in error_str.lower():
|
|
print("[TIMEOUT] 請求超時")
|
|
elif "502" in error_str:
|
|
print("[ERROR] 502 Bad Gateway")
|
|
else:
|
|
print(f"[ERROR] {error_str[:100]}")
|
|
|
|
return False, None
|
|
|
|
def find_working_endpoint():
|
|
"""尋找可用的端點"""
|
|
print("="*60)
|
|
print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print("="*60)
|
|
|
|
working_endpoints = []
|
|
|
|
for endpoint in LOCAL_ENDPOINTS:
|
|
print(f"\n測試端點: {endpoint}")
|
|
print("-"*40)
|
|
|
|
# 先用 requests 快速測試
|
|
if test_endpoint_with_requests(endpoint):
|
|
working_endpoints.append(endpoint)
|
|
print(f"[OK] 端點 {endpoint} 可用!")
|
|
else:
|
|
# 再用 OpenAI SDK 測試
|
|
success, _ = test_endpoint_with_openai(endpoint)
|
|
if success:
|
|
working_endpoints.append(endpoint)
|
|
print(f"[OK] 端點 {endpoint} 可用!")
|
|
|
|
return working_endpoints
|
|
|
|
def interactive_chat(endpoint, model="gpt-oss-120b"):
|
|
"""互動式對話"""
|
|
print(f"\n連接到: {endpoint}")
|
|
print(f"使用模型: {model}")
|
|
print("="*60)
|
|
print("開始對話 (輸入 'exit' 結束)")
|
|
print("="*60)
|
|
|
|
client = OpenAI(
|
|
api_key=API_KEY,
|
|
base_url=endpoint
|
|
)
|
|
|
|
messages = []
|
|
|
|
while True:
|
|
user_input = input("\n你: ").strip()
|
|
|
|
if user_input.lower() in ['exit', 'quit']:
|
|
print("對話結束")
|
|
break
|
|
|
|
if not user_input:
|
|
continue
|
|
|
|
messages.append({"role": "user", "content": user_input})
|
|
|
|
try:
|
|
print("\nAI 思考中...")
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
temperature=0.7,
|
|
max_tokens=1000
|
|
)
|
|
|
|
ai_response = response.choices[0].message.content
|
|
print(f"\nAI: {ai_response}")
|
|
messages.append({"role": "assistant", "content": ai_response})
|
|
|
|
except Exception as e:
|
|
print(f"\n[ERROR] {str(e)[:100]}")
|
|
|
|
def main():
|
|
# 尋找可用端點
|
|
working_endpoints = find_working_endpoint()
|
|
|
|
print("\n" + "="*60)
|
|
print("測試結果總結")
|
|
print("="*60)
|
|
|
|
if working_endpoints:
|
|
print(f"\n找到 {len(working_endpoints)} 個可用端點:")
|
|
for i, endpoint in enumerate(working_endpoints, 1):
|
|
print(f" {i}. {endpoint}")
|
|
|
|
# 選擇端點
|
|
if len(working_endpoints) == 1:
|
|
selected_endpoint = working_endpoints[0]
|
|
print(f"\n自動選擇唯一可用端點: {selected_endpoint}")
|
|
else:
|
|
print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):")
|
|
choice = input().strip()
|
|
try:
|
|
idx = int(choice) - 1
|
|
if 0 <= idx < len(working_endpoints):
|
|
selected_endpoint = working_endpoints[idx]
|
|
else:
|
|
selected_endpoint = working_endpoints[0]
|
|
except:
|
|
selected_endpoint = working_endpoints[0]
|
|
|
|
# 選擇模型
|
|
print("\n可用模型:")
|
|
for i, model in enumerate(MODELS, 1):
|
|
print(f" {i}. {model}")
|
|
|
|
print("\n請選擇模型 (1-3, 預設: 1):")
|
|
choice = input().strip()
|
|
if choice == "2":
|
|
selected_model = MODELS[1]
|
|
elif choice == "3":
|
|
selected_model = MODELS[2]
|
|
else:
|
|
selected_model = MODELS[0]
|
|
|
|
# 開始對話
|
|
interactive_chat(selected_endpoint, selected_model)
|
|
|
|
else:
|
|
print("\n[ERROR] 沒有找到可用的端點")
|
|
print("\n可能的原因:")
|
|
print("1. 內網 API 服務未啟動")
|
|
print("2. 防火牆阻擋了連接")
|
|
print("3. IP 地址或端口設定錯誤")
|
|
print("4. 不在同一個網路環境")
|
|
|
|
if __name__ == "__main__":
|
|
main() |