Initial commit: Llama API Client with full documentation

- Added complete Python client for Llama AI models - Support for internal network endpoints (tested and working) - Support for external network endpoints (configured) - Interactive chat interface with multiple models - Automatic endpoint testing and failover - Response cleaning for special markers - Full documentation in English and Chinese - Complete test suite and examples - MIT License and contribution guidelines
2025-09-19 21:38:15 +08:00
commit c6cc91da7f
18 changed files with 2072 additions and 0 deletions
--- a/llama_full_api.py
+++ b/llama_full_api.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Llama API 完整對話程式
+支援內網和外網端點
+"""
+
+from openai import OpenAI
+import requests
+import sys
+import re
+from datetime import datetime
+
+# API 金鑰
+API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
+
+# API 端點配置
+ENDPOINTS = {
+    "內網": [
+        {
+            "name": "內網端點 1 (21180)",
+            "url": "http://192.168.0.6:21180/v1",
+            "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
+        },
+        {
+            "name": "內網端點 2 (21181)",
+            "url": "http://192.168.0.6:21181/v1",
+            "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
+        },
+        {
+            "name": "內網端點 3 (21182)",
+            "url": "http://192.168.0.6:21182/v1",
+            "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
+        }
+    ],
+    "外網": [
+        {
+            "name": "外網 GPT-OSS-120B",
+            "url": "https://llama.theaken.com/v1/gpt-oss-120b",
+            "models": ["gpt-oss-120b"]
+        },
+        {
+            "name": "外網 DeepSeek-R1-671B",
+            "url": "https://llama.theaken.com/v1/deepseek-r1-671b",
+            "models": ["deepseek-r1-671b"]
+        },
+        {
+            "name": "外網通用端點",
+            "url": "https://llama.theaken.com/v1",
+            "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
+        }
+    ]
+}
+
+def clean_response(text):
+    """清理 AI 回應中的特殊標記"""
+    # 移除思考標記
+    if "<think>" in text:
+        text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    
+    # 移除 channel 標記
+    if "<|channel|>" in text:
+        parts = text.split("<|message|>")
+        if len(parts) > 1:
+            text = parts[-1]
+    
+    # 移除結束標記
+    text = text.replace("<|end|>", "").replace("<|start|>", "")
+    
+    # 清理多餘空白
+    text = text.strip()
+    
+    return text
+
+def test_endpoint(endpoint_info):
+    """測試端點是否可用"""
+    url = endpoint_info["url"]
+    model = endpoint_info["models"][0]  # 使用第一個模型測試
+    
+    try:
+        # 對於特定模型的 URL，需要特殊處理
+        if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url:
+            # 這些可能是特定模型的端點
+            base_url = url.rsplit("/", 1)[0]  # 移除模型名稱部分
+        else:
+            base_url = url
+            
+        client = OpenAI(api_key=API_KEY, base_url=base_url)
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": "test"}],
+            max_tokens=5,
+            timeout=8
+        )
+        return True
+    except Exception as e:
+        # 也嘗試使用 requests 直接測試
+        try:
+            headers = {
+                "Authorization": f"Bearer {API_KEY}",
+                "Content-Type": "application/json"
+            }
+            
+            test_url = f"{url}/chat/completions" if not url.endswith("/chat/completions") else url
+            data = {
+                "model": model,
+                "messages": [{"role": "user", "content": "test"}],
+                "max_tokens": 5
+            }
+            
+            response = requests.post(test_url, headers=headers, json=data, timeout=8)
+            return response.status_code == 200
+        except:
+            return False
+
+def test_all_endpoints():
+    """測試所有端點"""
+    print("\n" + "="*60)
+    print("測試 API 端點連接")
+    print("="*60)
+    
+    available_endpoints = []
+    
+    # 測試內網端點
+    print("\n[內網端點測試]")
+    for endpoint in ENDPOINTS["內網"]:
+        print(f"  測試 {endpoint['name']}...", end="", flush=True)
+        if test_endpoint(endpoint):
+            print(" [OK]")
+            available_endpoints.append(("內網", endpoint))
+        else:
+            print(" [FAIL]")
+    
+    # 測試外網端點
+    print("\n[外網端點測試]")
+    for endpoint in ENDPOINTS["外網"]:
+        print(f"  測試 {endpoint['name']}...", end="", flush=True)
+        if test_endpoint(endpoint):
+            print(" [OK]")
+            available_endpoints.append(("外網", endpoint))
+        else:
+            print(" [FAIL]")
+    
+    return available_endpoints
+
+def chat_session(endpoint_info):
+    """對話主程式"""
+    print("\n" + "="*60)
+    print("Llama AI 對話系統")
+    print("="*60)
+    print(f"端點: {endpoint_info['name']}")
+    print(f"URL: {endpoint_info['url']}")
+    print(f"可用模型: {', '.join(endpoint_info['models'])}")
+    print("\n指令:")
+    print("  exit/quit - 結束對話")
+    print("  clear - 清空對話歷史")
+    print("  model - 切換模型")
+    print("-"*60)
+    
+    # 處理 URL
+    url = endpoint_info["url"]
+    if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url:
+        base_url = url.rsplit("/", 1)[0]
+    else:
+        base_url = url
+    
+    client = OpenAI(api_key=API_KEY, base_url=base_url)
+    
+    # 選擇初始模型
+    if len(endpoint_info['models']) == 1:
+        current_model = endpoint_info['models'][0]
+    else:
+        print("\n選擇模型:")
+        for i, model in enumerate(endpoint_info['models'], 1):
+            print(f"  {i}. {model}")
+        choice = input("選擇 (預設: 1): ").strip()
+        if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']):
+            current_model = endpoint_info['models'][int(choice)-1]
+        else:
+            current_model = endpoint_info['models'][0]
+    
+    print(f"\n使用模型: {current_model}")
+    messages = []
+    
+    while True:
+        try:
+            user_input = input("\n你: ").strip()
+            
+            if not user_input:
+                continue
+                
+            if user_input.lower() in ['exit', 'quit']:
+                print("再見！")
+                break
+                
+            if user_input.lower() == 'clear':
+                messages = []
+                print("[系統] 對話歷史已清空")
+                continue
+                
+            if user_input.lower() == 'model':
+                if len(endpoint_info['models']) == 1:
+                    print(f"[系統] 此端點只支援 {endpoint_info['models'][0]}")
+                else:
+                    print("\n可用模型:")
+                    for i, m in enumerate(endpoint_info['models'], 1):
+                        print(f"  {i}. {m}")
+                    choice = input("選擇: ").strip()
+                    if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']):
+                        current_model = endpoint_info['models'][int(choice)-1]
+                        print(f"[系統] 已切換到 {current_model}")
+                continue
+            
+            messages.append({"role": "user", "content": user_input})
+            
+            print("\nAI 思考中...", end="", flush=True)
+            
+            try:
+                response = client.chat.completions.create(
+                    model=current_model,
+                    messages=messages,
+                    temperature=0.7,
+                    max_tokens=1000
+                )
+                
+                ai_response = response.choices[0].message.content
+                ai_response = clean_response(ai_response)
+                
+                print("\r" + " "*20 + "\r", end="")
+                print(f"AI: {ai_response}")
+                
+                messages.append({"role": "assistant", "content": ai_response})
+                
+            except Exception as e:
+                print(f"\r[錯誤] {str(e)[:100]}")
+                messages.pop()
+                
+        except KeyboardInterrupt:
+            print("\n\n[中斷] 使用 exit 命令正常退出")
+            continue
+        except EOFError:
+            print("\n再見！")
+            break
+
+def main():
+    print("="*60)
+    print("Llama API 完整對話程式")
+    print(f"時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print("="*60)
+    
+    # 測試所有端點
+    available = test_all_endpoints()
+    
+    if not available:
+        print("\n[錯誤] 沒有可用的端點")
+        print("\n可能的原因:")
+        print("1. 網路連接問題")
+        print("2. API 服務離線")
+        print("3. 防火牆阻擋")
+        sys.exit(1)
+    
+    # 顯示可用端點
+    print("\n" + "="*60)
+    print(f"找到 {len(available)} 個可用端點:")
+    print("="*60)
+    
+    for i, (network_type, endpoint) in enumerate(available, 1):
+        print(f"{i}. [{network_type}] {endpoint['name']}")
+        print(f"   URL: {endpoint['url']}")
+        print(f"   模型: {', '.join(endpoint['models'])}")
+    
+    # 選擇端點
+    print("\n選擇端點 (預設: 1): ", end="")
+    choice = input().strip()
+    
+    if choice.isdigit() and 1 <= int(choice) <= len(available):
+        selected = available[int(choice)-1][1]
+    else:
+        selected = available[0][1]
+    
+    # 開始對話
+    chat_session(selected)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\n程式已退出")
+    except Exception as e:
+        print(f"\n[錯誤] {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)