#!/usr/bin/env python # -*- coding: utf-8 -*- """ Llama 內網 API 對話程式支援多個端點和模型選擇 """ from openai import OpenAI import sys import re # API 配置 API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" # 可用端點 (前 3 個已測試可用) ENDPOINTS = [ "http://192.168.0.6:21180/v1", "http://192.168.0.6:21181/v1", "http://192.168.0.6:21182/v1", "http://192.168.0.6:21183/v1" ] # 模型列表 MODELS = [ "gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b" ] def clean_response(text): """清理 AI 回應中的特殊標記""" # 移除思考標記 if "" in text: text = re.sub(r'.*?', '', text, flags=re.DOTALL) # 移除 channel 標記 if "<|channel|>" in text: parts = text.split("<|message|>") if len(parts) > 1: text = parts[-1] # 移除結束標記 text = text.replace("<|end|>", "").replace("<|start|>", "") # 清理多餘空白 text = text.strip() return text def test_endpoint(endpoint): """測試端點是否可用""" try: client = OpenAI(api_key=API_KEY, base_url=endpoint) response = client.chat.completions.create( model="gpt-oss-120b", messages=[{"role": "user", "content": "Hi"}], max_tokens=10, timeout=5 ) return True except: return False def chat_session(endpoint, model): """對話主程式""" print("\n" + "="*60) print("Llama AI 對話系統") print("="*60) print(f"端點: {endpoint}") print(f"模型: {model}") print("\n指令:") print(" exit/quit - 結束對話") print(" clear - 清空對話歷史") print(" model - 切換模型") print("-"*60) client = OpenAI(api_key=API_KEY, base_url=endpoint) messages = [] while True: try: user_input = input("\n你: ").strip() if not user_input: continue if user_input.lower() in ['exit', 'quit']: print("再見！") break if user_input.lower() == 'clear': messages = [] print("[系統] 對話歷史已清空") continue if user_input.lower() == 'model': print("\n可用模型:") for i, m in enumerate(MODELS, 1): print(f" {i}. {m}") choice = input("選擇 (1-3): ").strip() if choice in ['1', '2', '3']: model = MODELS[int(choice)-1] print(f"[系統] 已切換到 {model}") continue messages.append({"role": "user", "content": user_input}) print("\nAI 思考中...", end="", flush=True) try: response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, max_tokens=1000 ) ai_response = response.choices[0].message.content ai_response = clean_response(ai_response) print("\r" + " "*20 + "\r", end="") # 清除 "思考中..." print(f"AI: {ai_response}") messages.append({"role": "assistant", "content": ai_response}) except UnicodeEncodeError: print("\r[錯誤] 編碼問題，請使用英文對話") messages.pop() # 移除最後的用戶訊息 except Exception as e: print(f"\r[錯誤] {str(e)[:100]}") messages.pop() # 移除最後的用戶訊息 except KeyboardInterrupt: print("\n\n[中斷] 使用 exit 命令正常退出") continue except EOFError: print("\n再見！") break def main(): print("="*60) print("Llama 內網 API 對話程式") print("="*60) # 測試端點 print("\n正在檢查可用端點...") available = [] for i, endpoint in enumerate(ENDPOINTS[:3], 1): # 只測試前3個 print(f" 測試 {endpoint}...", end="", flush=True) if test_endpoint(endpoint): print(" [OK]") available.append(endpoint) else: print(" [失敗]") if not available: print("\n[錯誤] 沒有可用的端點") sys.exit(1) # 選擇端點 if len(available) == 1: selected_endpoint = available[0] print(f"\n使用端點: {selected_endpoint}") else: print(f"\n找到 {len(available)} 個可用端點:") for i, ep in enumerate(available, 1): print(f" {i}. {ep}") print("\n選擇端點 (預設: 1): ", end="") choice = input().strip() if choice and choice.isdigit() and 1 <= int(choice) <= len(available): selected_endpoint = available[int(choice)-1] else: selected_endpoint = available[0] # 選擇模型 print("\n可用模型:") for i, model in enumerate(MODELS, 1): print(f" {i}. {model}") print("\n選擇模型 (預設: 1): ", end="") choice = input().strip() if choice in ['1', '2', '3']: selected_model = MODELS[int(choice)-1] else: selected_model = MODELS[0] # 開始對話 chat_session(selected_endpoint, selected_model) if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\n程式已退出") except Exception as e: print(f"\n[錯誤] {e}") sys.exit(1)