Initial commit: Llama API Client with full documentation

- Added complete Python client for Llama AI models
- Support for internal network endpoints (tested and working)
- Support for external network endpoints (configured)
- Interactive chat interface with multiple models
- Automatic endpoint testing and failover
- Response cleaning for special markers
- Full documentation in English and Chinese
- Complete test suite and examples
- MIT License and contribution guidelines
This commit is contained in:
2025-09-19 21:38:15 +08:00
commit c6cc91da7f
18 changed files with 2072 additions and 0 deletions

243
local_api_test.py Normal file
View File

@@ -0,0 +1,243 @@
"""
內網 Llama API 測試程式
使用 OpenAI 相容格式連接到本地 API 端點
"""
from openai import OpenAI
import requests
import json
from datetime import datetime
# API 配置
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
# 內網端點列表
LOCAL_ENDPOINTS = [
"http://192.168.0.6:21180/v1",
"http://192.168.0.6:21181/v1",
"http://192.168.0.6:21182/v1",
"http://192.168.0.6:21183/v1"
]
# 可用模型
MODELS = [
"gpt-oss-120b",
"deepseek-r1-671b",
"qwen3-embedding-8b"
]
def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"):
"""使用 requests 測試端點"""
print(f"\n[使用 requests 測試]")
print(f"端點: {endpoint}")
print(f"模型: {model}")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": model,
"messages": [
{"role": "user", "content": "Say 'Hello, I am working!' if you can see this."}
],
"temperature": 0.7,
"max_tokens": 50
}
try:
response = requests.post(
f"{endpoint}/chat/completions",
headers=headers,
json=data,
timeout=10
)
print(f"HTTP 狀態碼: {response.status_code}")
if response.status_code == 200:
result = response.json()
if 'choices' in result:
content = result['choices'][0]['message']['content']
print(f"[SUCCESS] AI 回應: {content}")
return True
else:
print("[ERROR] 回應格式不正確")
else:
print(f"[ERROR] HTTP {response.status_code}")
if response.status_code != 502: # 避免顯示 HTML 錯誤頁
print(f"詳情: {response.text[:200]}")
except requests.exceptions.ConnectTimeout:
print("[TIMEOUT] 連接超時")
except requests.exceptions.ConnectionError:
print("[CONNECTION ERROR] 無法連接到端點")
except Exception as e:
print(f"[ERROR] {str(e)[:100]}")
return False
def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"):
"""使用 OpenAI SDK 測試端點"""
print(f"\n[使用 OpenAI SDK 測試]")
print(f"端點: {endpoint}")
print(f"模型: {model}")
try:
client = OpenAI(
api_key=API_KEY,
base_url=endpoint,
timeout=10.0
)
response = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Hello, please respond with a simple greeting."}
],
temperature=0.7,
max_tokens=50
)
content = response.choices[0].message.content
print(f"[SUCCESS] AI 回應: {content}")
return True, client
except Exception as e:
error_str = str(e)
if "Connection error" in error_str:
print("[CONNECTION ERROR] 無法連接到端點")
elif "timeout" in error_str.lower():
print("[TIMEOUT] 請求超時")
elif "502" in error_str:
print("[ERROR] 502 Bad Gateway")
else:
print(f"[ERROR] {error_str[:100]}")
return False, None
def find_working_endpoint():
"""尋找可用的端點"""
print("="*60)
print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)
working_endpoints = []
for endpoint in LOCAL_ENDPOINTS:
print(f"\n測試端點: {endpoint}")
print("-"*40)
# 先用 requests 快速測試
if test_endpoint_with_requests(endpoint):
working_endpoints.append(endpoint)
print(f"[OK] 端點 {endpoint} 可用!")
else:
# 再用 OpenAI SDK 測試
success, _ = test_endpoint_with_openai(endpoint)
if success:
working_endpoints.append(endpoint)
print(f"[OK] 端點 {endpoint} 可用!")
return working_endpoints
def interactive_chat(endpoint, model="gpt-oss-120b"):
"""互動式對話"""
print(f"\n連接到: {endpoint}")
print(f"使用模型: {model}")
print("="*60)
print("開始對話 (輸入 'exit' 結束)")
print("="*60)
client = OpenAI(
api_key=API_KEY,
base_url=endpoint
)
messages = []
while True:
user_input = input("\n你: ").strip()
if user_input.lower() in ['exit', 'quit']:
print("對話結束")
break
if not user_input:
continue
messages.append({"role": "user", "content": user_input})
try:
print("\nAI 思考中...")
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=1000
)
ai_response = response.choices[0].message.content
print(f"\nAI: {ai_response}")
messages.append({"role": "assistant", "content": ai_response})
except Exception as e:
print(f"\n[ERROR] {str(e)[:100]}")
def main():
# 尋找可用端點
working_endpoints = find_working_endpoint()
print("\n" + "="*60)
print("測試結果總結")
print("="*60)
if working_endpoints:
print(f"\n找到 {len(working_endpoints)} 個可用端點:")
for i, endpoint in enumerate(working_endpoints, 1):
print(f" {i}. {endpoint}")
# 選擇端點
if len(working_endpoints) == 1:
selected_endpoint = working_endpoints[0]
print(f"\n自動選擇唯一可用端點: {selected_endpoint}")
else:
print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):")
choice = input().strip()
try:
idx = int(choice) - 1
if 0 <= idx < len(working_endpoints):
selected_endpoint = working_endpoints[idx]
else:
selected_endpoint = working_endpoints[0]
except:
selected_endpoint = working_endpoints[0]
# 選擇模型
print("\n可用模型:")
for i, model in enumerate(MODELS, 1):
print(f" {i}. {model}")
print("\n請選擇模型 (1-3, 預設: 1):")
choice = input().strip()
if choice == "2":
selected_model = MODELS[1]
elif choice == "3":
selected_model = MODELS[2]
else:
selected_model = MODELS[0]
# 開始對話
interactive_chat(selected_endpoint, selected_model)
else:
print("\n[ERROR] 沒有找到可用的端點")
print("\n可能的原因:")
print("1. 內網 API 服務未啟動")
print("2. 防火牆阻擋了連接")
print("3. IP 地址或端口設定錯誤")
print("4. 不在同一個網路環境")
if __name__ == "__main__":
main()