Add Python scripts for Llama API chat clients, endpoint testing, and quick tests. Include documentation (README, CONTRIBUTING, 操作指南), license, and .gitignore. Supports multiple endpoints and models for OpenAI-compatible Llama API usage.
293 lines
9.1 KiB
Python
293 lines
9.1 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Llama API 完整對話程式
|
||
支援內網和外網端點
|
||
"""
|
||
|
||
from openai import OpenAI
|
||
import requests
|
||
import sys
|
||
import re
|
||
from datetime import datetime
|
||
|
||
# API 金鑰
|
||
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="
|
||
|
||
# API 端點配置
|
||
ENDPOINTS = {
|
||
"內網": [
|
||
{
|
||
"name": "內網端點 1 (21180)",
|
||
"url": "http://192.168.0.6:21180/v1",
|
||
"models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
|
||
},
|
||
{
|
||
"name": "內網端點 2 (21181)",
|
||
"url": "http://192.168.0.6:21181/v1",
|
||
"models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
|
||
},
|
||
{
|
||
"name": "內網端點 3 (21182)",
|
||
"url": "http://192.168.0.6:21182/v1",
|
||
"models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
|
||
}
|
||
],
|
||
"外網": [
|
||
{
|
||
"name": "外網 GPT-OSS-120B",
|
||
"url": "https://llama.theaken.com/v1/gpt-oss-120b",
|
||
"models": ["gpt-oss-120b"]
|
||
},
|
||
{
|
||
"name": "外網 DeepSeek-R1-671B",
|
||
"url": "https://llama.theaken.com/v1/deepseek-r1-671b",
|
||
"models": ["deepseek-r1-671b"]
|
||
},
|
||
{
|
||
"name": "外網通用端點",
|
||
"url": "https://llama.theaken.com/v1",
|
||
"models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"]
|
||
}
|
||
]
|
||
}
|
||
|
||
def clean_response(text):
|
||
"""清理 AI 回應中的特殊標記"""
|
||
# 移除思考標記
|
||
if "<think>" in text:
|
||
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
||
|
||
# 移除 channel 標記
|
||
if "<|channel|>" in text:
|
||
parts = text.split("<|message|>")
|
||
if len(parts) > 1:
|
||
text = parts[-1]
|
||
|
||
# 移除結束標記
|
||
text = text.replace("<|end|>", "").replace("<|start|>", "")
|
||
|
||
# 清理多餘空白
|
||
text = text.strip()
|
||
|
||
return text
|
||
|
||
def test_endpoint(endpoint_info):
|
||
"""測試端點是否可用"""
|
||
url = endpoint_info["url"]
|
||
model = endpoint_info["models"][0] # 使用第一個模型測試
|
||
|
||
try:
|
||
# 對於特定模型的 URL,需要特殊處理
|
||
if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url:
|
||
# 這些可能是特定模型的端點
|
||
base_url = url.rsplit("/", 1)[0] # 移除模型名稱部分
|
||
else:
|
||
base_url = url
|
||
|
||
client = OpenAI(api_key=API_KEY, base_url=base_url)
|
||
response = client.chat.completions.create(
|
||
model=model,
|
||
messages=[{"role": "user", "content": "test"}],
|
||
max_tokens=5,
|
||
timeout=8
|
||
)
|
||
return True
|
||
except Exception as e:
|
||
# 也嘗試使用 requests 直接測試
|
||
try:
|
||
headers = {
|
||
"Authorization": f"Bearer {API_KEY}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
test_url = f"{url}/chat/completions" if not url.endswith("/chat/completions") else url
|
||
data = {
|
||
"model": model,
|
||
"messages": [{"role": "user", "content": "test"}],
|
||
"max_tokens": 5
|
||
}
|
||
|
||
response = requests.post(test_url, headers=headers, json=data, timeout=8)
|
||
return response.status_code == 200
|
||
except:
|
||
return False
|
||
|
||
def test_all_endpoints():
|
||
"""測試所有端點"""
|
||
print("\n" + "="*60)
|
||
print("測試 API 端點連接")
|
||
print("="*60)
|
||
|
||
available_endpoints = []
|
||
|
||
# 測試內網端點
|
||
print("\n[內網端點測試]")
|
||
for endpoint in ENDPOINTS["內網"]:
|
||
print(f" 測試 {endpoint['name']}...", end="", flush=True)
|
||
if test_endpoint(endpoint):
|
||
print(" [OK]")
|
||
available_endpoints.append(("內網", endpoint))
|
||
else:
|
||
print(" [FAIL]")
|
||
|
||
# 測試外網端點
|
||
print("\n[外網端點測試]")
|
||
for endpoint in ENDPOINTS["外網"]:
|
||
print(f" 測試 {endpoint['name']}...", end="", flush=True)
|
||
if test_endpoint(endpoint):
|
||
print(" [OK]")
|
||
available_endpoints.append(("外網", endpoint))
|
||
else:
|
||
print(" [FAIL]")
|
||
|
||
return available_endpoints
|
||
|
||
def chat_session(endpoint_info):
|
||
"""對話主程式"""
|
||
print("\n" + "="*60)
|
||
print("Llama AI 對話系統")
|
||
print("="*60)
|
||
print(f"端點: {endpoint_info['name']}")
|
||
print(f"URL: {endpoint_info['url']}")
|
||
print(f"可用模型: {', '.join(endpoint_info['models'])}")
|
||
print("\n指令:")
|
||
print(" exit/quit - 結束對話")
|
||
print(" clear - 清空對話歷史")
|
||
print(" model - 切換模型")
|
||
print("-"*60)
|
||
|
||
# 處理 URL
|
||
url = endpoint_info["url"]
|
||
if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url:
|
||
base_url = url.rsplit("/", 1)[0]
|
||
else:
|
||
base_url = url
|
||
|
||
client = OpenAI(api_key=API_KEY, base_url=base_url)
|
||
|
||
# 選擇初始模型
|
||
if len(endpoint_info['models']) == 1:
|
||
current_model = endpoint_info['models'][0]
|
||
else:
|
||
print("\n選擇模型:")
|
||
for i, model in enumerate(endpoint_info['models'], 1):
|
||
print(f" {i}. {model}")
|
||
choice = input("選擇 (預設: 1): ").strip()
|
||
if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']):
|
||
current_model = endpoint_info['models'][int(choice)-1]
|
||
else:
|
||
current_model = endpoint_info['models'][0]
|
||
|
||
print(f"\n使用模型: {current_model}")
|
||
messages = []
|
||
|
||
while True:
|
||
try:
|
||
user_input = input("\n你: ").strip()
|
||
|
||
if not user_input:
|
||
continue
|
||
|
||
if user_input.lower() in ['exit', 'quit']:
|
||
print("再見!")
|
||
break
|
||
|
||
if user_input.lower() == 'clear':
|
||
messages = []
|
||
print("[系統] 對話歷史已清空")
|
||
continue
|
||
|
||
if user_input.lower() == 'model':
|
||
if len(endpoint_info['models']) == 1:
|
||
print(f"[系統] 此端點只支援 {endpoint_info['models'][0]}")
|
||
else:
|
||
print("\n可用模型:")
|
||
for i, m in enumerate(endpoint_info['models'], 1):
|
||
print(f" {i}. {m}")
|
||
choice = input("選擇: ").strip()
|
||
if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']):
|
||
current_model = endpoint_info['models'][int(choice)-1]
|
||
print(f"[系統] 已切換到 {current_model}")
|
||
continue
|
||
|
||
messages.append({"role": "user", "content": user_input})
|
||
|
||
print("\nAI 思考中...", end="", flush=True)
|
||
|
||
try:
|
||
response = client.chat.completions.create(
|
||
model=current_model,
|
||
messages=messages,
|
||
temperature=0.7,
|
||
max_tokens=1000
|
||
)
|
||
|
||
ai_response = response.choices[0].message.content
|
||
ai_response = clean_response(ai_response)
|
||
|
||
print("\r" + " "*20 + "\r", end="")
|
||
print(f"AI: {ai_response}")
|
||
|
||
messages.append({"role": "assistant", "content": ai_response})
|
||
|
||
except Exception as e:
|
||
print(f"\r[錯誤] {str(e)[:100]}")
|
||
messages.pop()
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n[中斷] 使用 exit 命令正常退出")
|
||
continue
|
||
except EOFError:
|
||
print("\n再見!")
|
||
break
|
||
|
||
def main():
|
||
print("="*60)
|
||
print("Llama API 完整對話程式")
|
||
print(f"時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
print("="*60)
|
||
|
||
# 測試所有端點
|
||
available = test_all_endpoints()
|
||
|
||
if not available:
|
||
print("\n[錯誤] 沒有可用的端點")
|
||
print("\n可能的原因:")
|
||
print("1. 網路連接問題")
|
||
print("2. API 服務離線")
|
||
print("3. 防火牆阻擋")
|
||
sys.exit(1)
|
||
|
||
# 顯示可用端點
|
||
print("\n" + "="*60)
|
||
print(f"找到 {len(available)} 個可用端點:")
|
||
print("="*60)
|
||
|
||
for i, (network_type, endpoint) in enumerate(available, 1):
|
||
print(f"{i}. [{network_type}] {endpoint['name']}")
|
||
print(f" URL: {endpoint['url']}")
|
||
print(f" 模型: {', '.join(endpoint['models'])}")
|
||
|
||
# 選擇端點
|
||
print("\n選擇端點 (預設: 1): ", end="")
|
||
choice = input().strip()
|
||
|
||
if choice.isdigit() and 1 <= int(choice) <= len(available):
|
||
selected = available[int(choice)-1][1]
|
||
else:
|
||
selected = available[0][1]
|
||
|
||
# 開始對話
|
||
chat_session(selected)
|
||
|
||
if __name__ == "__main__":
|
||
try:
|
||
main()
|
||
except KeyboardInterrupt:
|
||
print("\n\n程式已退出")
|
||
except Exception as e:
|
||
print(f"\n[錯誤] {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
sys.exit(1) |