""" 內網 Llama API 測試程式 使用 OpenAI 相容格式連接到本地 API 端點 """ from openai import OpenAI import requests import json from datetime import datetime # API 配置 API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" # 內網端點列表 LOCAL_ENDPOINTS = [ "http://192.168.0.6:21180/v1", "http://192.168.0.6:21181/v1", "http://192.168.0.6:21182/v1", "http://192.168.0.6:21183/v1" ] # 可用模型 MODELS = [ "gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b" ] def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"): """使用 requests 測試端點""" print(f"\n[使用 requests 測試]") print(f"端點: {endpoint}") print(f"模型: {model}") headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } data = { "model": model, "messages": [ {"role": "user", "content": "Say 'Hello, I am working!' if you can see this."} ], "temperature": 0.7, "max_tokens": 50 } try: response = requests.post( f"{endpoint}/chat/completions", headers=headers, json=data, timeout=10 ) print(f"HTTP 狀態碼: {response.status_code}") if response.status_code == 200: result = response.json() if 'choices' in result: content = result['choices'][0]['message']['content'] print(f"[SUCCESS] AI 回應: {content}") return True else: print("[ERROR] 回應格式不正確") else: print(f"[ERROR] HTTP {response.status_code}") if response.status_code != 502: # 避免顯示 HTML 錯誤頁 print(f"詳情: {response.text[:200]}") except requests.exceptions.ConnectTimeout: print("[TIMEOUT] 連接超時") except requests.exceptions.ConnectionError: print("[CONNECTION ERROR] 無法連接到端點") except Exception as e: print(f"[ERROR] {str(e)[:100]}") return False def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"): """使用 OpenAI SDK 測試端點""" print(f"\n[使用 OpenAI SDK 測試]") print(f"端點: {endpoint}") print(f"模型: {model}") try: client = OpenAI( api_key=API_KEY, base_url=endpoint, timeout=10.0 ) response = client.chat.completions.create( model=model, messages=[ {"role": "user", "content": "Hello, please respond with a simple greeting."} ], temperature=0.7, max_tokens=50 ) content = response.choices[0].message.content print(f"[SUCCESS] AI 回應: {content}") return True, client except Exception as e: error_str = str(e) if "Connection error" in error_str: print("[CONNECTION ERROR] 無法連接到端點") elif "timeout" in error_str.lower(): print("[TIMEOUT] 請求超時") elif "502" in error_str: print("[ERROR] 502 Bad Gateway") else: print(f"[ERROR] {error_str[:100]}") return False, None def find_working_endpoint(): """尋找可用的端點""" print("="*60) print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("="*60) working_endpoints = [] for endpoint in LOCAL_ENDPOINTS: print(f"\n測試端點: {endpoint}") print("-"*40) # 先用 requests 快速測試 if test_endpoint_with_requests(endpoint): working_endpoints.append(endpoint) print(f"[OK] 端點 {endpoint} 可用!") else: # 再用 OpenAI SDK 測試 success, _ = test_endpoint_with_openai(endpoint) if success: working_endpoints.append(endpoint) print(f"[OK] 端點 {endpoint} 可用!") return working_endpoints def interactive_chat(endpoint, model="gpt-oss-120b"): """互動式對話""" print(f"\n連接到: {endpoint}") print(f"使用模型: {model}") print("="*60) print("開始對話 (輸入 'exit' 結束)") print("="*60) client = OpenAI( api_key=API_KEY, base_url=endpoint ) messages = [] while True: user_input = input("\n你: ").strip() if user_input.lower() in ['exit', 'quit']: print("對話結束") break if not user_input: continue messages.append({"role": "user", "content": user_input}) try: print("\nAI 思考中...") response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, max_tokens=1000 ) ai_response = response.choices[0].message.content print(f"\nAI: {ai_response}") messages.append({"role": "assistant", "content": ai_response}) except Exception as e: print(f"\n[ERROR] {str(e)[:100]}") def main(): # 尋找可用端點 working_endpoints = find_working_endpoint() print("\n" + "="*60) print("測試結果總結") print("="*60) if working_endpoints: print(f"\n找到 {len(working_endpoints)} 個可用端點:") for i, endpoint in enumerate(working_endpoints, 1): print(f" {i}. {endpoint}") # 選擇端點 if len(working_endpoints) == 1: selected_endpoint = working_endpoints[0] print(f"\n自動選擇唯一可用端點: {selected_endpoint}") else: print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):") choice = input().strip() try: idx = int(choice) - 1 if 0 <= idx < len(working_endpoints): selected_endpoint = working_endpoints[idx] else: selected_endpoint = working_endpoints[0] except: selected_endpoint = working_endpoints[0] # 選擇模型 print("\n可用模型:") for i, model in enumerate(MODELS, 1): print(f" {i}. {model}") print("\n請選擇模型 (1-3, 預設: 1):") choice = input().strip() if choice == "2": selected_model = MODELS[1] elif choice == "3": selected_model = MODELS[2] else: selected_model = MODELS[0] # 開始對話 interactive_chat(selected_endpoint, selected_model) else: print("\n[ERROR] 沒有找到可用的端點") print("\n可能的原因:") print("1. 內網 API 服務未啟動") print("2. 防火牆阻擋了連接") print("3. IP 地址或端口設定錯誤") print("4. 不在同一個網路環境") if __name__ == "__main__": main()