pj_llama/test_with_timeout.py

import requests
import json
from datetime import datetime

# API 配置
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
BASE_URL = "https://llama.theaken.com/v1"

def test_endpoints():
    """測試不同的 API 端點和模型"""

    print("="*60)
    print(f"Llama API 測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("="*60)

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    # 測試配置
    tests = [
        {
            "name": "GPT-OSS-120B",
            "model": "gpt-oss-120b",
            "prompt": "Say hello in one word"
        },
        {
            "name": "DeepSeek-R1-671B",
            "model": "deepseek-r1-671b",
            "prompt": "Say hello in one word"
        },
        {
            "name": "Qwen3-Embedding-8B",
            "model": "qwen3-embedding-8b",
            "prompt": "Say hello in one word"
        }
    ]

    success_count = 0

    for test in tests:
        print(f"\n[測試 {test['name']}]")
        print("-"*40)

        data = {
            "model": test["model"],
            "messages": [
                {"role": "user", "content": test["prompt"]}
            ],
            "temperature": 0.5,
            "max_tokens": 20
        }

        try:
            # 使用較短的超時時間
            response = requests.post(
                f"{BASE_URL}/chat/completions",
                headers=headers,
                json=data,
                timeout=15
            )

            print(f"HTTP 狀態: {response.status_code}")

            if response.status_code == 200:
                result = response.json()
                if 'choices' in result:
                    content = result['choices'][0]['message']['content']
                    print(f"[SUCCESS] 成功回應: {content}")
                    success_count += 1
                else:
                    print("[ERROR] 回應格式錯誤")
            elif response.status_code == 502:
                print("[ERROR] 502 Bad Gateway - 伺服器無法回應")
            elif response.status_code == 401:
                print("[ERROR] 401 Unauthorized - API 金鑰可能錯誤")
            elif response.status_code == 404:
                print("[ERROR] 404 Not Found - 模型或端點不存在")
            else:
                print(f"[ERROR] 錯誤 {response.status_code}")
                if not response.text.startswith('<!DOCTYPE'):
                    print(f"詳情: {response.text[:200]}")

        except requests.exceptions.Timeout:
            print("[TIMEOUT] 請求超時 (15秒)")
        except requests.exceptions.ConnectionError as e:
            print(f"[CONNECTION ERROR] 無法連接到伺服器")
        except Exception as e:
            print(f"[UNKNOWN ERROR]: {str(e)[:100]}")

    # 總結
    print("\n" + "="*60)
    print(f"測試結果: {success_count}/{len(tests)} 成功")

    if success_count == 0:
        print("\n診斷資訊:")
        print("• 網路連接: 正常 (可 ping 通)")
        print("• API 端點: https://llama.theaken.com/v1")
        print("• 錯誤類型: 502 Bad Gateway")
        print("• 可能原因: 後端 API 服務暫時離線")
        print("\n建議行動:")
        print("1. 稍後再試 (建議 10-30 分鐘後)")
        print("2. 聯繫 API 管理員確認服務狀態")
        print("3. 檢查是否有服務維護公告")
    else:
        print(f"\n[OK] API 服務正常運作中!")
        print(f"[OK] 可使用的模型數: {success_count}")

if __name__ == "__main__":
    test_endpoints()