Initial commit: Llama API Client with full documentation

- Added complete Python client for Llama AI models - Support for internal network endpoints (tested and working) - Support for external network endpoints (configured) - Interactive chat interface with multiple models - Automatic endpoint testing and failover - Response cleaning for special markers - Full documentation in English and Chinese - Complete test suite and examples - MIT License and contribution guidelines
2025-09-19 21:38:15 +08:00
commit c6cc91da7f
18 changed files with 2072 additions and 0 deletions
--- a/local_api_test.py
+++ b/local_api_test.py
@@ -0,0 +1,243 @@
+"""
+內網 Llama API 測試程式
+使用 OpenAI 相容格式連接到本地 API 端點
+"""
+
+from openai import OpenAI
+import requests
+import json
+from datetime import datetime
+
+# API 配置
+API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
+
+# 內網端點列表
+LOCAL_ENDPOINTS = [
+    "http://192.168.0.6:21180/v1",
+    "http://192.168.0.6:21181/v1",
+    "http://192.168.0.6:21182/v1",
+    "http://192.168.0.6:21183/v1"
+]
+
+# 可用模型
+MODELS = [
+    "gpt-oss-120b",
+    "deepseek-r1-671b",
+    "qwen3-embedding-8b"
+]
+
+def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"):
+    """使用 requests 測試端點"""
+    print(f"\n[使用 requests 測試]")
+    print(f"端點: {endpoint}")
+    print(f"模型: {model}")
+    
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+    
+    data = {
+        "model": model,
+        "messages": [
+            {"role": "user", "content": "Say 'Hello, I am working!' if you can see this."}
+        ],
+        "temperature": 0.7,
+        "max_tokens": 50
+    }
+    
+    try:
+        response = requests.post(
+            f"{endpoint}/chat/completions",
+            headers=headers,
+            json=data,
+            timeout=10
+        )
+        
+        print(f"HTTP 狀態碼: {response.status_code}")
+        
+        if response.status_code == 200:
+            result = response.json()
+            if 'choices' in result:
+                content = result['choices'][0]['message']['content']
+                print(f"[SUCCESS] AI 回應: {content}")
+                return True
+            else:
+                print("[ERROR] 回應格式不正確")
+        else:
+            print(f"[ERROR] HTTP {response.status_code}")
+            if response.status_code != 502:  # 避免顯示 HTML 錯誤頁
+                print(f"詳情: {response.text[:200]}")
+                
+    except requests.exceptions.ConnectTimeout:
+        print("[TIMEOUT] 連接超時")
+    except requests.exceptions.ConnectionError:
+        print("[CONNECTION ERROR] 無法連接到端點")
+    except Exception as e:
+        print(f"[ERROR] {str(e)[:100]}")
+    
+    return False
+
+def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"):
+    """使用 OpenAI SDK 測試端點"""
+    print(f"\n[使用 OpenAI SDK 測試]")
+    print(f"端點: {endpoint}")
+    print(f"模型: {model}")
+    
+    try:
+        client = OpenAI(
+            api_key=API_KEY,
+            base_url=endpoint,
+            timeout=10.0
+        )
+        
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "user", "content": "Hello, please respond with a simple greeting."}
+            ],
+            temperature=0.7,
+            max_tokens=50
+        )
+        
+        content = response.choices[0].message.content
+        print(f"[SUCCESS] AI 回應: {content}")
+        return True, client
+        
+    except Exception as e:
+        error_str = str(e)
+        if "Connection error" in error_str:
+            print("[CONNECTION ERROR] 無法連接到端點")
+        elif "timeout" in error_str.lower():
+            print("[TIMEOUT] 請求超時")
+        elif "502" in error_str:
+            print("[ERROR] 502 Bad Gateway")
+        else:
+            print(f"[ERROR] {error_str[:100]}")
+    
+    return False, None
+
+def find_working_endpoint():
+    """尋找可用的端點"""
+    print("="*60)
+    print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print("="*60)
+    
+    working_endpoints = []
+    
+    for endpoint in LOCAL_ENDPOINTS:
+        print(f"\n測試端點: {endpoint}")
+        print("-"*40)
+        
+        # 先用 requests 快速測試
+        if test_endpoint_with_requests(endpoint):
+            working_endpoints.append(endpoint)
+            print(f"[OK] 端點 {endpoint} 可用！")
+        else:
+            # 再用 OpenAI SDK 測試
+            success, _ = test_endpoint_with_openai(endpoint)
+            if success:
+                working_endpoints.append(endpoint)
+                print(f"[OK] 端點 {endpoint} 可用！")
+    
+    return working_endpoints
+
+def interactive_chat(endpoint, model="gpt-oss-120b"):
+    """互動式對話"""
+    print(f"\n連接到: {endpoint}")
+    print(f"使用模型: {model}")
+    print("="*60)
+    print("開始對話 (輸入 'exit' 結束)")
+    print("="*60)
+    
+    client = OpenAI(
+        api_key=API_KEY,
+        base_url=endpoint
+    )
+    
+    messages = []
+    
+    while True:
+        user_input = input("\n你: ").strip()
+        
+        if user_input.lower() in ['exit', 'quit']:
+            print("對話結束")
+            break
+            
+        if not user_input:
+            continue
+            
+        messages.append({"role": "user", "content": user_input})
+        
+        try:
+            print("\nAI 思考中...")
+            response = client.chat.completions.create(
+                model=model,
+                messages=messages,
+                temperature=0.7,
+                max_tokens=1000
+            )
+            
+            ai_response = response.choices[0].message.content
+            print(f"\nAI: {ai_response}")
+            messages.append({"role": "assistant", "content": ai_response})
+            
+        except Exception as e:
+            print(f"\n[ERROR] {str(e)[:100]}")
+
+def main():
+    # 尋找可用端點
+    working_endpoints = find_working_endpoint()
+    
+    print("\n" + "="*60)
+    print("測試結果總結")
+    print("="*60)
+    
+    if working_endpoints:
+        print(f"\n找到 {len(working_endpoints)} 個可用端點:")
+        for i, endpoint in enumerate(working_endpoints, 1):
+            print(f"  {i}. {endpoint}")
+        
+        # 選擇端點
+        if len(working_endpoints) == 1:
+            selected_endpoint = working_endpoints[0]
+            print(f"\n自動選擇唯一可用端點: {selected_endpoint}")
+        else:
+            print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):")
+            choice = input().strip()
+            try:
+                idx = int(choice) - 1
+                if 0 <= idx < len(working_endpoints):
+                    selected_endpoint = working_endpoints[idx]
+                else:
+                    selected_endpoint = working_endpoints[0]
+            except:
+                selected_endpoint = working_endpoints[0]
+        
+        # 選擇模型
+        print("\n可用模型:")
+        for i, model in enumerate(MODELS, 1):
+            print(f"  {i}. {model}")
+        
+        print("\n請選擇模型 (1-3, 預設: 1):")
+        choice = input().strip()
+        if choice == "2":
+            selected_model = MODELS[1]
+        elif choice == "3":
+            selected_model = MODELS[2]
+        else:
+            selected_model = MODELS[0]
+        
+        # 開始對話
+        interactive_chat(selected_endpoint, selected_model)
+        
+    else:
+        print("\n[ERROR] 沒有找到可用的端點")
+        print("\n可能的原因:")
+        print("1. 內網 API 服務未啟動")
+        print("2. 防火牆阻擋了連接")
+        print("3. IP 地址或端口設定錯誤")
+        print("4. 不在同一個網路環境")
+
+if __name__ == "__main__":
+    main()