pj_llama/llama_chat.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Llama 內網 API 對話程式
支援多個端點和模型選擇
"""

from openai import OpenAI
import sys
import re

# API 配置
API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo="

# 可用端點 (前 3 個已測試可用)
ENDPOINTS = [
    "http://192.168.0.6:21180/v1",
    "http://192.168.0.6:21181/v1",
    "http://192.168.0.6:21182/v1",
    "http://192.168.0.6:21183/v1"
]

# 模型列表
MODELS = [
    "gpt-oss-120b",
    "deepseek-r1-671b",
    "qwen3-embedding-8b"
]

def clean_response(text):
    """清理 AI 回應中的特殊標記"""
    # 移除思考標記
    if "<think>" in text:
        text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)

    # 移除 channel 標記
    if "<|channel|>" in text:
        parts = text.split("<|message|>")
        if len(parts) > 1:
            text = parts[-1]

    # 移除結束標記
    text = text.replace("<|end|>", "").replace("<|start|>", "")

    # 清理多餘空白
    text = text.strip()

    return text

def test_endpoint(endpoint):
    """測試端點是否可用"""
    try:
        client = OpenAI(api_key=API_KEY, base_url=endpoint)
        response = client.chat.completions.create(
            model="gpt-oss-120b",
            messages=[{"role": "user", "content": "Hi"}],
            max_tokens=10,
            timeout=5
        )
        return True
    except:
        return False

def chat_session(endpoint, model):
    """對話主程式"""
    print("\n" + "="*60)
    print("Llama AI 對話系統")
    print("="*60)
    print(f"端點: {endpoint}")
    print(f"模型: {model}")
    print("\n指令:")
    print("  exit/quit - 結束對話")
    print("  clear - 清空對話歷史")
    print("  model - 切換模型")
    print("-"*60)

    client = OpenAI(api_key=API_KEY, base_url=endpoint)
    messages = []

    while True:
        try:
            user_input = input("\n你: ").strip()

            if not user_input:
                continue

            if user_input.lower() in ['exit', 'quit']:
                print("再見！")
                break

            if user_input.lower() == 'clear':
                messages = []
                print("[系統] 對話歷史已清空")
                continue

            if user_input.lower() == 'model':
                print("\n可用模型:")
                for i, m in enumerate(MODELS, 1):
                    print(f"  {i}. {m}")
                choice = input("選擇 (1-3): ").strip()
                if choice in ['1', '2', '3']:
                    model = MODELS[int(choice)-1]
                    print(f"[系統] 已切換到 {model}")
                continue

            messages.append({"role": "user", "content": user_input})

            print("\nAI 思考中...", end="", flush=True)

            try:
                response = client.chat.completions.create(
                    model=model,
                    messages=messages,
                    temperature=0.7,
                    max_tokens=1000
                )

                ai_response = response.choices[0].message.content
                ai_response = clean_response(ai_response)

                print("\r" + " "*20 + "\r", end="")  # 清除 "思考中..."
                print(f"AI: {ai_response}")

                messages.append({"role": "assistant", "content": ai_response})

            except UnicodeEncodeError:
                print("\r[錯誤] 編碼問題，請使用英文對話")
                messages.pop()  # 移除最後的用戶訊息
            except Exception as e:
                print(f"\r[錯誤] {str(e)[:100]}")
                messages.pop()  # 移除最後的用戶訊息

        except KeyboardInterrupt:
            print("\n\n[中斷] 使用 exit 命令正常退出")
            continue
        except EOFError:
            print("\n再見！")
            break

def main():
    print("="*60)
    print("Llama 內網 API 對話程式")
    print("="*60)

    # 測試端點
    print("\n正在檢查可用端點...")
    available = []
    for i, endpoint in enumerate(ENDPOINTS[:3], 1):  # 只測試前3個
        print(f"  測試 {endpoint}...", end="", flush=True)
        if test_endpoint(endpoint):
            print(" [OK]")
            available.append(endpoint)
        else:
            print(" [失敗]")

    if not available:
        print("\n[錯誤] 沒有可用的端點")
        sys.exit(1)

    # 選擇端點
    if len(available) == 1:
        selected_endpoint = available[0]
        print(f"\n使用端點: {selected_endpoint}")
    else:
        print(f"\n找到 {len(available)} 個可用端點:")
        for i, ep in enumerate(available, 1):
            print(f"  {i}. {ep}")
        print("\n選擇端點 (預設: 1): ", end="")
        choice = input().strip()
        if choice and choice.isdigit() and 1 <= int(choice) <= len(available):
            selected_endpoint = available[int(choice)-1]
        else:
            selected_endpoint = available[0]

    # 選擇模型
    print("\n可用模型:")
    for i, model in enumerate(MODELS, 1):
        print(f"  {i}. {model}")
    print("\n選擇模型 (預設: 1): ", end="")
    choice = input().strip()
    if choice in ['1', '2', '3']:
        selected_model = MODELS[int(choice)-1]
    else:
        selected_model = MODELS[0]

    # 開始對話
    chat_session(selected_endpoint, selected_model)

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n程式已退出")
    except Exception as e:
        print(f"\n[錯誤] {e}")
        sys.exit(1)