From 8a929936ad01fb7ece8019d261bbba5b1a372cd6 Mon Sep 17 00:00:00 2001 From: aken1023 Date: Fri, 19 Sep 2025 21:44:02 +0800 Subject: [PATCH] Initial commit with Llama API client and docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Python scripts for Llama API chat clients, endpoint testing, and quick tests. Include documentation (README, CONTRIBUTING, 操作指南), license, and .gitignore. Supports multiple endpoints and models for OpenAI-compatible Llama API usage. --- .claude/settings.local.json | 15 ++ .gitignore | 102 +++++++++++++ CONTRIBUTING.md | 196 ++++++++++++++++++++++++ LICENSE | 21 +++ README.md | 201 +++++++++++++++++++++++++ demo_chat.py | 124 +++++++++++++++ llama_chat.py | 196 ++++++++++++++++++++++++ llama_full_api.py | 293 ++++++++++++++++++++++++++++++++++++ llama_test.py | 99 ++++++++++++ local_api_test.py | 243 ++++++++++++++++++++++++++++++ quick_test.py | 54 +++++++ requirements.txt | 1 + simple_llama_test.py | 46 ++++++ test_all_models.py | 143 ++++++++++++++++++ test_with_timeout.py | 111 ++++++++++++++ 使用說明.txt | 33 ++++ 操作指南.md | 181 ++++++++++++++++++++++ 連線參數.txt | 14 ++ 18 files changed, 2073 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 .gitignore create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 demo_chat.py create mode 100644 llama_chat.py create mode 100644 llama_full_api.py create mode 100644 llama_test.py create mode 100644 local_api_test.py create mode 100644 quick_test.py create mode 100644 requirements.txt create mode 100644 simple_llama_test.py create mode 100644 test_all_models.py create mode 100644 test_with_timeout.py create mode 100644 使用說明.txt create mode 100644 操作指南.md create mode 100644 連線參數.txt diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..e4129ed --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,15 @@ +{ + "permissions": { + "allow": [ + "Bash(pip install:*)", + "Bash(python:*)", + "Bash(ping:*)", + "Bash(curl:*)", + "Bash(dir)", + "Bash(git init:*)", + "Bash(git add:*)", + "Bash(git commit:*)" + ], + "defaultMode": "acceptEdits" + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4de4d13 --- /dev/null +++ b/.gitignore @@ -0,0 +1,102 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Virtual environments +venv/ +ENV/ +env/ +.venv/ +.env + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Project specific +*.log +*.tmp +temp/ +tmp/ +logs/ +output/ + +# API keys and secrets (if stored in separate config) +config.ini +secrets.json +.env.local +.env.production + +# Test outputs +test_results/ +*.test.txt + +# Backup files +*.bak +*.backup +*.old + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini + +# macOS +.DS_Store +.AppleDouble +.LSOverride + +# Linux +.directory +.Trash-* \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..291a2e0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,196 @@ +# Contributing to Llama API Client + +Thank you for your interest in contributing to Llama API Client! This document provides guidelines for contributing to the project. + +## How to Contribute + +### Reporting Bugs + +Before creating bug reports, please check existing issues to avoid duplicates. When creating a bug report, include: + +- A clear and descriptive title +- Steps to reproduce the issue +- Expected behavior +- Actual behavior +- System information (OS, Python version, etc.) +- Error messages or logs + +### Suggesting Enhancements + +Enhancement suggestions are welcome! Please provide: + +- A clear and descriptive title +- Detailed description of the proposed feature +- Use cases and benefits +- Possible implementation approach + +### Pull Requests + +1. **Fork the repository** and create your branch from `main` +2. **Follow the coding style** used in the project +3. **Write clear commit messages** +4. **Add tests** if applicable +5. **Update documentation** if needed +6. **Test your changes** thoroughly + +## Development Setup + +```bash +# Clone your fork +git clone https://github.com/yourusername/llama-api-client.git +cd llama-api-client + +# Create virtual environment +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt + +# Run tests +python quick_test.py +``` + +## Coding Standards + +### Python Style Guide + +- Follow PEP 8 +- Use meaningful variable names +- Add docstrings to functions and classes +- Keep functions focused and small +- Handle exceptions appropriately + +### Example Code Style + +```python +def clean_response(text: str) -> str: + """ + Clean AI response by removing special markers. + + Args: + text: Raw response text from AI + + Returns: + Cleaned text without special markers + """ + # Implementation here + return cleaned_text +``` + +### Commit Message Format + +Use clear and descriptive commit messages: + +- `feat:` New feature +- `fix:` Bug fix +- `docs:` Documentation changes +- `style:` Code style changes +- `refactor:` Code refactoring +- `test:` Test additions or changes +- `chore:` Maintenance tasks + +Examples: +``` +feat: Add support for new model endpoint +fix: Handle encoding errors in Windows terminals +docs: Update README with troubleshooting section +``` + +## Testing + +### Running Tests + +```bash +# Quick connection test +python quick_test.py + +# Test all models +python test_all_models.py + +# Test specific endpoint +python local_api_test.py +``` + +### Writing Tests + +When adding new features, include appropriate tests: + +```python +def test_endpoint_connection(): + """Test if endpoint is reachable""" + assert test_endpoint({"url": "...", "models": ["..."]}) +``` + +## Documentation + +- Update README.md for user-facing changes +- Update 操作指南.md for Chinese documentation +- Add docstrings to all public functions +- Include usage examples for new features + +## Code Review Process + +1. All submissions require review before merging +2. Reviews focus on: + - Code quality and style + - Test coverage + - Documentation completeness + - Performance implications + - Security considerations + +## Areas for Contribution + +### Current Needs + +- [ ] Add retry logic for failed connections +- [ ] Implement connection pooling +- [ ] Add streaming response support +- [ ] Create GUI interface +- [ ] Add conversation export/import +- [ ] Implement rate limiting +- [ ] Add proxy support +- [ ] Create Docker container +- [ ] Add more language examples +- [ ] Improve error messages + +### Future Features + +- Web interface +- Mobile app support +- Voice input/output +- Multi-user support +- Analytics dashboard +- Plugin system + +## Community + +### Communication Channels + +- GitHub Issues: Bug reports and feature requests +- GitHub Discussions: General questions and discussions +- Pull Requests: Code contributions + +### Code of Conduct + +- Be respectful and inclusive +- Welcome newcomers +- Provide constructive feedback +- Focus on what is best for the community +- Show empathy towards others + +## Questions? + +If you have questions about contributing, feel free to: + +1. Open an issue with the `question` label +2. Check existing documentation +3. Review closed issues for similar questions + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. + +--- + +Thank you for contributing to Llama API Client! 🚀 \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5acfa45 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Llama API Client Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b737bd8 --- /dev/null +++ b/README.md @@ -0,0 +1,201 @@ +# Llama API Client + +A Python client for connecting to Llama AI models through OpenAI-compatible API endpoints. + +## Features + +- 🌐 Support for both internal network and external API endpoints +- 🤖 Multiple model support (GPT-OSS-120B, DeepSeek-R1-671B, Qwen3-Embedding-8B) +- 💬 Interactive chat interface with conversation history +- 🔄 Automatic endpoint testing and failover +- 🧹 Automatic response cleaning (removes thinking tags and special markers) +- 📝 Full conversation context management + +## Quick Start + +### Installation + +```bash +# Clone the repository +git clone https://github.com/yourusername/llama-api-client.git +cd llama-api-client + +# Install dependencies +pip install -r requirements.txt +``` + +### Basic Usage + +```python +from openai import OpenAI + +# Configure API +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "http://192.168.0.6:21180/v1" + +# Create client +client = OpenAI(api_key=API_KEY, base_url=BASE_URL) + +# Send request +response = client.chat.completions.create( + model="gpt-oss-120b", + messages=[{"role": "user", "content": "Hello!"}], + temperature=0.7, + max_tokens=200 +) + +print(response.choices[0].message.content) +``` + +### Run Interactive Chat + +```bash +# Full-featured chat with all endpoints +python llama_full_api.py + +# Internal network only +python llama_chat.py + +# Quick test +python quick_test.py +``` + +## Available Endpoints + +### Internal Network (Tested & Working ✅) + +| Endpoint | URL | Status | +|----------|-----|--------| +| Internal 1 | `http://192.168.0.6:21180/v1` | ✅ Working | +| Internal 2 | `http://192.168.0.6:21181/v1` | ✅ Working | +| Internal 3 | `http://192.168.0.6:21182/v1` | ✅ Working | +| Internal 4 | `http://192.168.0.6:21183/v1` | ❌ Error 500 | + +### External Network + +| Endpoint | URL | Status | +|----------|-----|--------| +| GPT-OSS | `https://llama.theaken.com/v1/gpt-oss-120b` | 🔄 Pending | +| DeepSeek | `https://llama.theaken.com/v1/deepseek-r1-671b` | 🔄 Pending | +| General | `https://llama.theaken.com/v1` | 🔄 Pending | + +## Project Structure + +``` +llama-api-client/ +├── README.md # This file +├── requirements.txt # Python dependencies +├── 操作指南.md # Chinese operation guide +├── llama_full_api.py # Full-featured chat client +├── llama_chat.py # Internal network chat client +├── local_api_test.py # Endpoint testing tool +├── quick_test.py # Quick connection test +├── test_all_models.py # Model testing script +└── demo_chat.py # Demo chat with fallback +``` + +## Chat Commands + +During chat sessions, you can use these commands: + +- `exit` or `quit` - End the conversation +- `clear` - Clear conversation history +- `model` - Switch between available models + +## Configuration + +### API Key +```python +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +``` + +### Available Models +- `gpt-oss-120b` - GPT Open Source 120B parameters +- `deepseek-r1-671b` - DeepSeek R1 671B parameters +- `qwen3-embedding-8b` - Qwen3 Embedding 8B parameters + +## Troubleshooting + +### Issue: 502 Bad Gateway +**Cause**: External API server is offline +**Solution**: Use internal network endpoints + +### Issue: Connection Error +**Cause**: Not on internal network or incorrect IP +**Solution**: +1. Verify network connectivity: `ping 192.168.0.6` +2. Check firewall settings +3. Ensure you're on the same network + +### Issue: Encoding Error +**Cause**: Windows terminal encoding issues +**Solution**: Use English for conversations or modify terminal encoding + +### Issue: Response Contains Special Markers +**Description**: Responses may contain ``, `<|channel|>` tags +**Solution**: The client automatically removes these markers + +## Response Cleaning + +The client automatically removes these special markers from AI responses: +- `...` - Thinking process +- `<|channel|>...<|message|>` - Channel markers +- `<|end|>`, `<|start|>` - End/start markers + +## Requirements + +- Python 3.7+ +- openai>=1.0.0 +- requests (optional, for direct API calls) + +## Development + +### Testing Connection +```python +python -c "from openai import OpenAI; client = OpenAI(api_key='YOUR_KEY', base_url='YOUR_URL'); print(client.chat.completions.create(model='gpt-oss-120b', messages=[{'role': 'user', 'content': 'test'}], max_tokens=5).choices[0].message.content)" +``` + +### Adding New Endpoints +Edit `ENDPOINTS` dictionary in `llama_full_api.py`: +```python +ENDPOINTS = { + "internal": [ + { + "name": "New Endpoint", + "url": "http://new-endpoint/v1", + "models": ["gpt-oss-120b"] + } + ] +} +``` + +## License + +MIT License - See LICENSE file for details + +## Contributing + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/amazing-feature`) +3. Commit your changes (`git commit -m 'Add amazing feature'`) +4. Push to the branch (`git push origin feature/amazing-feature`) +5. Open a Pull Request + +## Support + +For issues or questions: +1. Check the [操作指南.md](操作指南.md) for detailed Chinese documentation +2. Open an issue on GitHub +3. Contact the API administrator for server-related issues + +## Acknowledgments + +- Built with OpenAI Python SDK +- Compatible with OpenAI API format +- Supports multiple Llama model variants + +--- + +**Last Updated**: 2025-09-19 +**Version**: 1.0.0 +**Status**: Internal endpoints working, external endpoints pending \ No newline at end of file diff --git a/demo_chat.py b/demo_chat.py new file mode 100644 index 0000000..7b65343 --- /dev/null +++ b/demo_chat.py @@ -0,0 +1,124 @@ +""" +Llama API 對話程式 (示範版本) +當 API 伺服器恢復後,可以使用此程式進行對話 +""" + +from openai import OpenAI +import time + +# API 設定 +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "https://llama.theaken.com/v1" + +def simulate_chat(): + """模擬對話功能(用於展示)""" + print("\n" + "="*50) + print("Llama AI 對話系統 - 示範模式") + print("="*50) + print("\n[注意] API 伺服器目前離線,以下為模擬對話") + print("當伺服器恢復後,將自動連接真實 API\n") + + # 模擬回應 + demo_responses = [ + "你好!我是 Llama AI 助手,很高興為你服務。", + "這是一個示範回應。當 API 伺服器恢復後,你將收到真實的 AI 回應。", + "我可以回答問題、協助編程、翻譯文字等多種任務。", + "請問有什麼我可以幫助你的嗎?" + ] + + response_index = 0 + print("輸入 'exit' 結束對話\n") + + while True: + user_input = input("你: ").strip() + + if user_input.lower() in ['exit', 'quit']: + print("\n再見!") + break + + if not user_input: + continue + + # 模擬思考時間 + print("\nAI 思考中", end="") + for _ in range(3): + time.sleep(0.3) + print(".", end="", flush=True) + print() + + # 顯示模擬回應 + print(f"\nAI: {demo_responses[response_index % len(demo_responses)]}") + response_index += 1 + +def real_chat(): + """實際對話功能(當 API 可用時)""" + client = OpenAI(api_key=API_KEY, base_url=BASE_URL) + + print("\n" + "="*50) + print("Llama AI 對話系統") + print("="*50) + print("\n已連接到 Llama API") + print("輸入 'exit' 結束對話\n") + + messages = [] + + while True: + user_input = input("你: ").strip() + + if user_input.lower() in ['exit', 'quit']: + print("\n再見!") + break + + if not user_input: + continue + + messages.append({"role": "user", "content": user_input}) + + try: + print("\nAI 思考中...") + response = client.chat.completions.create( + model="gpt-oss-120b", + messages=messages, + temperature=0.7, + max_tokens=1000 + ) + + ai_response = response.choices[0].message.content + print(f"\nAI: {ai_response}") + messages.append({"role": "assistant", "content": ai_response}) + + except Exception as e: + print(f"\n[錯誤] {str(e)[:100]}") + print("無法取得回應,請稍後再試") + +def main(): + print("檢查 API 連接狀態...") + + # 嘗試連接 API + try: + client = OpenAI(api_key=API_KEY, base_url=BASE_URL) + + # 快速測試 + response = client.chat.completions.create( + model="gpt-oss-120b", + messages=[{"role": "user", "content": "test"}], + max_tokens=10, + timeout=5 + ) + print("[成功] API 已連接") + real_chat() + + except Exception as e: + error_msg = str(e) + if "502" in error_msg or "Bad gateway" in error_msg: + print("[提示] API 伺服器目前離線 (502 錯誤)") + print("進入示範模式...") + simulate_chat() + else: + print(f"[錯誤] 無法連接: {error_msg[:100]}") + print("\n是否要進入示範模式? (y/n): ", end="") + if input().lower() == 'y': + simulate_chat() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/llama_chat.py b/llama_chat.py new file mode 100644 index 0000000..bad424b --- /dev/null +++ b/llama_chat.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Llama 內網 API 對話程式 +支援多個端點和模型選擇 +""" + +from openai import OpenAI +import sys +import re + +# API 配置 +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" + +# 可用端點 (前 3 個已測試可用) +ENDPOINTS = [ + "http://192.168.0.6:21180/v1", + "http://192.168.0.6:21181/v1", + "http://192.168.0.6:21182/v1", + "http://192.168.0.6:21183/v1" +] + +# 模型列表 +MODELS = [ + "gpt-oss-120b", + "deepseek-r1-671b", + "qwen3-embedding-8b" +] + +def clean_response(text): + """清理 AI 回應中的特殊標記""" + # 移除思考標記 + if "" in text: + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + + # 移除 channel 標記 + if "<|channel|>" in text: + parts = text.split("<|message|>") + if len(parts) > 1: + text = parts[-1] + + # 移除結束標記 + text = text.replace("<|end|>", "").replace("<|start|>", "") + + # 清理多餘空白 + text = text.strip() + + return text + +def test_endpoint(endpoint): + """測試端點是否可用""" + try: + client = OpenAI(api_key=API_KEY, base_url=endpoint) + response = client.chat.completions.create( + model="gpt-oss-120b", + messages=[{"role": "user", "content": "Hi"}], + max_tokens=10, + timeout=5 + ) + return True + except: + return False + +def chat_session(endpoint, model): + """對話主程式""" + print("\n" + "="*60) + print("Llama AI 對話系統") + print("="*60) + print(f"端點: {endpoint}") + print(f"模型: {model}") + print("\n指令:") + print(" exit/quit - 結束對話") + print(" clear - 清空對話歷史") + print(" model - 切換模型") + print("-"*60) + + client = OpenAI(api_key=API_KEY, base_url=endpoint) + messages = [] + + while True: + try: + user_input = input("\n你: ").strip() + + if not user_input: + continue + + if user_input.lower() in ['exit', 'quit']: + print("再見!") + break + + if user_input.lower() == 'clear': + messages = [] + print("[系統] 對話歷史已清空") + continue + + if user_input.lower() == 'model': + print("\n可用模型:") + for i, m in enumerate(MODELS, 1): + print(f" {i}. {m}") + choice = input("選擇 (1-3): ").strip() + if choice in ['1', '2', '3']: + model = MODELS[int(choice)-1] + print(f"[系統] 已切換到 {model}") + continue + + messages.append({"role": "user", "content": user_input}) + + print("\nAI 思考中...", end="", flush=True) + + try: + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0.7, + max_tokens=1000 + ) + + ai_response = response.choices[0].message.content + ai_response = clean_response(ai_response) + + print("\r" + " "*20 + "\r", end="") # 清除 "思考中..." + print(f"AI: {ai_response}") + + messages.append({"role": "assistant", "content": ai_response}) + + except UnicodeEncodeError: + print("\r[錯誤] 編碼問題,請使用英文對話") + messages.pop() # 移除最後的用戶訊息 + except Exception as e: + print(f"\r[錯誤] {str(e)[:100]}") + messages.pop() # 移除最後的用戶訊息 + + except KeyboardInterrupt: + print("\n\n[中斷] 使用 exit 命令正常退出") + continue + except EOFError: + print("\n再見!") + break + +def main(): + print("="*60) + print("Llama 內網 API 對話程式") + print("="*60) + + # 測試端點 + print("\n正在檢查可用端點...") + available = [] + for i, endpoint in enumerate(ENDPOINTS[:3], 1): # 只測試前3個 + print(f" 測試 {endpoint}...", end="", flush=True) + if test_endpoint(endpoint): + print(" [OK]") + available.append(endpoint) + else: + print(" [失敗]") + + if not available: + print("\n[錯誤] 沒有可用的端點") + sys.exit(1) + + # 選擇端點 + if len(available) == 1: + selected_endpoint = available[0] + print(f"\n使用端點: {selected_endpoint}") + else: + print(f"\n找到 {len(available)} 個可用端點:") + for i, ep in enumerate(available, 1): + print(f" {i}. {ep}") + print("\n選擇端點 (預設: 1): ", end="") + choice = input().strip() + if choice and choice.isdigit() and 1 <= int(choice) <= len(available): + selected_endpoint = available[int(choice)-1] + else: + selected_endpoint = available[0] + + # 選擇模型 + print("\n可用模型:") + for i, model in enumerate(MODELS, 1): + print(f" {i}. {model}") + print("\n選擇模型 (預設: 1): ", end="") + choice = input().strip() + if choice in ['1', '2', '3']: + selected_model = MODELS[int(choice)-1] + else: + selected_model = MODELS[0] + + # 開始對話 + chat_session(selected_endpoint, selected_model) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n程式已退出") + except Exception as e: + print(f"\n[錯誤] {e}") + sys.exit(1) \ No newline at end of file diff --git a/llama_full_api.py b/llama_full_api.py new file mode 100644 index 0000000..b05bbcc --- /dev/null +++ b/llama_full_api.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Llama API 完整對話程式 +支援內網和外網端點 +""" + +from openai import OpenAI +import requests +import sys +import re +from datetime import datetime + +# API 金鑰 +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" + +# API 端點配置 +ENDPOINTS = { + "內網": [ + { + "name": "內網端點 1 (21180)", + "url": "http://192.168.0.6:21180/v1", + "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"] + }, + { + "name": "內網端點 2 (21181)", + "url": "http://192.168.0.6:21181/v1", + "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"] + }, + { + "name": "內網端點 3 (21182)", + "url": "http://192.168.0.6:21182/v1", + "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"] + } + ], + "外網": [ + { + "name": "外網 GPT-OSS-120B", + "url": "https://llama.theaken.com/v1/gpt-oss-120b", + "models": ["gpt-oss-120b"] + }, + { + "name": "外網 DeepSeek-R1-671B", + "url": "https://llama.theaken.com/v1/deepseek-r1-671b", + "models": ["deepseek-r1-671b"] + }, + { + "name": "外網通用端點", + "url": "https://llama.theaken.com/v1", + "models": ["gpt-oss-120b", "deepseek-r1-671b", "qwen3-embedding-8b"] + } + ] +} + +def clean_response(text): + """清理 AI 回應中的特殊標記""" + # 移除思考標記 + if "" in text: + text = re.sub(r'.*?', '', text, flags=re.DOTALL) + + # 移除 channel 標記 + if "<|channel|>" in text: + parts = text.split("<|message|>") + if len(parts) > 1: + text = parts[-1] + + # 移除結束標記 + text = text.replace("<|end|>", "").replace("<|start|>", "") + + # 清理多餘空白 + text = text.strip() + + return text + +def test_endpoint(endpoint_info): + """測試端點是否可用""" + url = endpoint_info["url"] + model = endpoint_info["models"][0] # 使用第一個模型測試 + + try: + # 對於特定模型的 URL,需要特殊處理 + if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url: + # 這些可能是特定模型的端點 + base_url = url.rsplit("/", 1)[0] # 移除模型名稱部分 + else: + base_url = url + + client = OpenAI(api_key=API_KEY, base_url=base_url) + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "test"}], + max_tokens=5, + timeout=8 + ) + return True + except Exception as e: + # 也嘗試使用 requests 直接測試 + try: + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + test_url = f"{url}/chat/completions" if not url.endswith("/chat/completions") else url + data = { + "model": model, + "messages": [{"role": "user", "content": "test"}], + "max_tokens": 5 + } + + response = requests.post(test_url, headers=headers, json=data, timeout=8) + return response.status_code == 200 + except: + return False + +def test_all_endpoints(): + """測試所有端點""" + print("\n" + "="*60) + print("測試 API 端點連接") + print("="*60) + + available_endpoints = [] + + # 測試內網端點 + print("\n[內網端點測試]") + for endpoint in ENDPOINTS["內網"]: + print(f" 測試 {endpoint['name']}...", end="", flush=True) + if test_endpoint(endpoint): + print(" [OK]") + available_endpoints.append(("內網", endpoint)) + else: + print(" [FAIL]") + + # 測試外網端點 + print("\n[外網端點測試]") + for endpoint in ENDPOINTS["外網"]: + print(f" 測試 {endpoint['name']}...", end="", flush=True) + if test_endpoint(endpoint): + print(" [OK]") + available_endpoints.append(("外網", endpoint)) + else: + print(" [FAIL]") + + return available_endpoints + +def chat_session(endpoint_info): + """對話主程式""" + print("\n" + "="*60) + print("Llama AI 對話系統") + print("="*60) + print(f"端點: {endpoint_info['name']}") + print(f"URL: {endpoint_info['url']}") + print(f"可用模型: {', '.join(endpoint_info['models'])}") + print("\n指令:") + print(" exit/quit - 結束對話") + print(" clear - 清空對話歷史") + print(" model - 切換模型") + print("-"*60) + + # 處理 URL + url = endpoint_info["url"] + if "/gpt-oss-120b" in url or "/deepseek-r1-671b" in url: + base_url = url.rsplit("/", 1)[0] + else: + base_url = url + + client = OpenAI(api_key=API_KEY, base_url=base_url) + + # 選擇初始模型 + if len(endpoint_info['models']) == 1: + current_model = endpoint_info['models'][0] + else: + print("\n選擇模型:") + for i, model in enumerate(endpoint_info['models'], 1): + print(f" {i}. {model}") + choice = input("選擇 (預設: 1): ").strip() + if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']): + current_model = endpoint_info['models'][int(choice)-1] + else: + current_model = endpoint_info['models'][0] + + print(f"\n使用模型: {current_model}") + messages = [] + + while True: + try: + user_input = input("\n你: ").strip() + + if not user_input: + continue + + if user_input.lower() in ['exit', 'quit']: + print("再見!") + break + + if user_input.lower() == 'clear': + messages = [] + print("[系統] 對話歷史已清空") + continue + + if user_input.lower() == 'model': + if len(endpoint_info['models']) == 1: + print(f"[系統] 此端點只支援 {endpoint_info['models'][0]}") + else: + print("\n可用模型:") + for i, m in enumerate(endpoint_info['models'], 1): + print(f" {i}. {m}") + choice = input("選擇: ").strip() + if choice.isdigit() and 1 <= int(choice) <= len(endpoint_info['models']): + current_model = endpoint_info['models'][int(choice)-1] + print(f"[系統] 已切換到 {current_model}") + continue + + messages.append({"role": "user", "content": user_input}) + + print("\nAI 思考中...", end="", flush=True) + + try: + response = client.chat.completions.create( + model=current_model, + messages=messages, + temperature=0.7, + max_tokens=1000 + ) + + ai_response = response.choices[0].message.content + ai_response = clean_response(ai_response) + + print("\r" + " "*20 + "\r", end="") + print(f"AI: {ai_response}") + + messages.append({"role": "assistant", "content": ai_response}) + + except Exception as e: + print(f"\r[錯誤] {str(e)[:100]}") + messages.pop() + + except KeyboardInterrupt: + print("\n\n[中斷] 使用 exit 命令正常退出") + continue + except EOFError: + print("\n再見!") + break + +def main(): + print("="*60) + print("Llama API 完整對話程式") + print(f"時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print("="*60) + + # 測試所有端點 + available = test_all_endpoints() + + if not available: + print("\n[錯誤] 沒有可用的端點") + print("\n可能的原因:") + print("1. 網路連接問題") + print("2. API 服務離線") + print("3. 防火牆阻擋") + sys.exit(1) + + # 顯示可用端點 + print("\n" + "="*60) + print(f"找到 {len(available)} 個可用端點:") + print("="*60) + + for i, (network_type, endpoint) in enumerate(available, 1): + print(f"{i}. [{network_type}] {endpoint['name']}") + print(f" URL: {endpoint['url']}") + print(f" 模型: {', '.join(endpoint['models'])}") + + # 選擇端點 + print("\n選擇端點 (預設: 1): ", end="") + choice = input().strip() + + if choice.isdigit() and 1 <= int(choice) <= len(available): + selected = available[int(choice)-1][1] + else: + selected = available[0][1] + + # 開始對話 + chat_session(selected) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n程式已退出") + except Exception as e: + print(f"\n[錯誤] {e}") + import traceback + traceback.print_exc() + sys.exit(1) \ No newline at end of file diff --git a/llama_test.py b/llama_test.py new file mode 100644 index 0000000..62150a3 --- /dev/null +++ b/llama_test.py @@ -0,0 +1,99 @@ +from openai import OpenAI +import sys + +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "https://llama.theaken.com/v1" + +AVAILABLE_MODELS = [ + "gpt-oss-120b", + "deepseek-r1-671b", + "qwen3-embedding-8b" +] + +def chat_with_llama(model_name="gpt-oss-120b"): + client = OpenAI( + api_key=API_KEY, + base_url=BASE_URL + ) + + print(f"\n使用模型: {model_name}") + print("-" * 50) + print("輸入 'exit' 或 'quit' 來結束對話") + print("-" * 50) + + messages = [] + + while True: + user_input = input("\n你: ").strip() + + if user_input.lower() in ['exit', 'quit']: + print("對話結束") + break + + if not user_input: + continue + + messages.append({"role": "user", "content": user_input}) + + try: + response = client.chat.completions.create( + model=model_name, + messages=messages, + temperature=0.7, + max_tokens=2000 + ) + + assistant_reply = response.choices[0].message.content + print(f"\nAI: {assistant_reply}") + + messages.append({"role": "assistant", "content": assistant_reply}) + + except Exception as e: + print(f"\n錯誤: {str(e)}") + print("請檢查網路連接和 API 設定") + +def test_connection(): + print("測試連接到 Llama API...") + + client = OpenAI( + api_key=API_KEY, + base_url=BASE_URL + ) + + try: + response = client.chat.completions.create( + model="gpt-oss-120b", + messages=[{"role": "user", "content": "Hello, this is a test message."}], + max_tokens=50 + ) + print("[OK] 連接成功!") + print(f"測試回應: {response.choices[0].message.content}") + return True + except Exception as e: + print(f"[ERROR] 連接失敗: {str(e)[:200]}") + return False + +def main(): + print("=" * 50) + print("Llama 模型對話測試程式") + print("=" * 50) + + print("\n可用的模型:") + for i, model in enumerate(AVAILABLE_MODELS, 1): + print(f" {i}. {model}") + + if test_connection(): + print("\n選擇要使用的模型 (輸入數字 1-3,預設: 1):") + choice = input().strip() + + if choice == "2": + model = AVAILABLE_MODELS[1] + elif choice == "3": + model = AVAILABLE_MODELS[2] + else: + model = AVAILABLE_MODELS[0] + + chat_with_llama(model) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/local_api_test.py b/local_api_test.py new file mode 100644 index 0000000..476e726 --- /dev/null +++ b/local_api_test.py @@ -0,0 +1,243 @@ +""" +內網 Llama API 測試程式 +使用 OpenAI 相容格式連接到本地 API 端點 +""" + +from openai import OpenAI +import requests +import json +from datetime import datetime + +# API 配置 +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" + +# 內網端點列表 +LOCAL_ENDPOINTS = [ + "http://192.168.0.6:21180/v1", + "http://192.168.0.6:21181/v1", + "http://192.168.0.6:21182/v1", + "http://192.168.0.6:21183/v1" +] + +# 可用模型 +MODELS = [ + "gpt-oss-120b", + "deepseek-r1-671b", + "qwen3-embedding-8b" +] + +def test_endpoint_with_requests(endpoint, model="gpt-oss-120b"): + """使用 requests 測試端點""" + print(f"\n[使用 requests 測試]") + print(f"端點: {endpoint}") + print(f"模型: {model}") + + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + data = { + "model": model, + "messages": [ + {"role": "user", "content": "Say 'Hello, I am working!' if you can see this."} + ], + "temperature": 0.7, + "max_tokens": 50 + } + + try: + response = requests.post( + f"{endpoint}/chat/completions", + headers=headers, + json=data, + timeout=10 + ) + + print(f"HTTP 狀態碼: {response.status_code}") + + if response.status_code == 200: + result = response.json() + if 'choices' in result: + content = result['choices'][0]['message']['content'] + print(f"[SUCCESS] AI 回應: {content}") + return True + else: + print("[ERROR] 回應格式不正確") + else: + print(f"[ERROR] HTTP {response.status_code}") + if response.status_code != 502: # 避免顯示 HTML 錯誤頁 + print(f"詳情: {response.text[:200]}") + + except requests.exceptions.ConnectTimeout: + print("[TIMEOUT] 連接超時") + except requests.exceptions.ConnectionError: + print("[CONNECTION ERROR] 無法連接到端點") + except Exception as e: + print(f"[ERROR] {str(e)[:100]}") + + return False + +def test_endpoint_with_openai(endpoint, model="gpt-oss-120b"): + """使用 OpenAI SDK 測試端點""" + print(f"\n[使用 OpenAI SDK 測試]") + print(f"端點: {endpoint}") + print(f"模型: {model}") + + try: + client = OpenAI( + api_key=API_KEY, + base_url=endpoint, + timeout=10.0 + ) + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "user", "content": "Hello, please respond with a simple greeting."} + ], + temperature=0.7, + max_tokens=50 + ) + + content = response.choices[0].message.content + print(f"[SUCCESS] AI 回應: {content}") + return True, client + + except Exception as e: + error_str = str(e) + if "Connection error" in error_str: + print("[CONNECTION ERROR] 無法連接到端點") + elif "timeout" in error_str.lower(): + print("[TIMEOUT] 請求超時") + elif "502" in error_str: + print("[ERROR] 502 Bad Gateway") + else: + print(f"[ERROR] {error_str[:100]}") + + return False, None + +def find_working_endpoint(): + """尋找可用的端點""" + print("="*60) + print(f"內網 API 端點測試 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print("="*60) + + working_endpoints = [] + + for endpoint in LOCAL_ENDPOINTS: + print(f"\n測試端點: {endpoint}") + print("-"*40) + + # 先用 requests 快速測試 + if test_endpoint_with_requests(endpoint): + working_endpoints.append(endpoint) + print(f"[OK] 端點 {endpoint} 可用!") + else: + # 再用 OpenAI SDK 測試 + success, _ = test_endpoint_with_openai(endpoint) + if success: + working_endpoints.append(endpoint) + print(f"[OK] 端點 {endpoint} 可用!") + + return working_endpoints + +def interactive_chat(endpoint, model="gpt-oss-120b"): + """互動式對話""" + print(f"\n連接到: {endpoint}") + print(f"使用模型: {model}") + print("="*60) + print("開始對話 (輸入 'exit' 結束)") + print("="*60) + + client = OpenAI( + api_key=API_KEY, + base_url=endpoint + ) + + messages = [] + + while True: + user_input = input("\n你: ").strip() + + if user_input.lower() in ['exit', 'quit']: + print("對話結束") + break + + if not user_input: + continue + + messages.append({"role": "user", "content": user_input}) + + try: + print("\nAI 思考中...") + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0.7, + max_tokens=1000 + ) + + ai_response = response.choices[0].message.content + print(f"\nAI: {ai_response}") + messages.append({"role": "assistant", "content": ai_response}) + + except Exception as e: + print(f"\n[ERROR] {str(e)[:100]}") + +def main(): + # 尋找可用端點 + working_endpoints = find_working_endpoint() + + print("\n" + "="*60) + print("測試結果總結") + print("="*60) + + if working_endpoints: + print(f"\n找到 {len(working_endpoints)} 個可用端點:") + for i, endpoint in enumerate(working_endpoints, 1): + print(f" {i}. {endpoint}") + + # 選擇端點 + if len(working_endpoints) == 1: + selected_endpoint = working_endpoints[0] + print(f"\n自動選擇唯一可用端點: {selected_endpoint}") + else: + print(f"\n請選擇要使用的端點 (1-{len(working_endpoints)}):") + choice = input().strip() + try: + idx = int(choice) - 1 + if 0 <= idx < len(working_endpoints): + selected_endpoint = working_endpoints[idx] + else: + selected_endpoint = working_endpoints[0] + except: + selected_endpoint = working_endpoints[0] + + # 選擇模型 + print("\n可用模型:") + for i, model in enumerate(MODELS, 1): + print(f" {i}. {model}") + + print("\n請選擇模型 (1-3, 預設: 1):") + choice = input().strip() + if choice == "2": + selected_model = MODELS[1] + elif choice == "3": + selected_model = MODELS[2] + else: + selected_model = MODELS[0] + + # 開始對話 + interactive_chat(selected_endpoint, selected_model) + + else: + print("\n[ERROR] 沒有找到可用的端點") + print("\n可能的原因:") + print("1. 內網 API 服務未啟動") + print("2. 防火牆阻擋了連接") + print("3. IP 地址或端口設定錯誤") + print("4. 不在同一個網路環境") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/quick_test.py b/quick_test.py new file mode 100644 index 0000000..1b8e3df --- /dev/null +++ b/quick_test.py @@ -0,0 +1,54 @@ +""" +快速測試內網 Llama API +""" + +from openai import OpenAI + +# API 設定 +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "http://192.168.0.6:21180/v1" # 使用第一個可用端點 + +def quick_test(): + print("連接到內網 API...") + print(f"端點: {BASE_URL}") + print("-" * 50) + + client = OpenAI( + api_key=API_KEY, + base_url=BASE_URL + ) + + # 測試對話 + test_messages = [ + "你好,請自我介紹", + "1 + 1 等於多少?", + "今天天氣如何?" + ] + + for msg in test_messages: + print(f"\n問: {msg}") + + try: + response = client.chat.completions.create( + model="gpt-oss-120b", + messages=[ + {"role": "user", "content": msg} + ], + temperature=0.7, + max_tokens=200 + ) + + answer = response.choices[0].message.content + # 清理可能的思考標記 + if "" in answer: + answer = answer.split("")[-1].strip() + if "<|channel|>" in answer: + answer = answer.split("<|message|>")[-1].strip() + + print(f"答: {answer}") + + except Exception as e: + print(f"錯誤: {str(e)[:100]}") + +if __name__ == "__main__": + quick_test() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3ceaffc --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +openai>=1.0.0 \ No newline at end of file diff --git a/simple_llama_test.py b/simple_llama_test.py new file mode 100644 index 0000000..23e7159 --- /dev/null +++ b/simple_llama_test.py @@ -0,0 +1,46 @@ +import requests +import json + +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "https://llama.theaken.com/v1/chat/completions" + +def test_api(): + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + data = { + "model": "gpt-oss-120b", + "messages": [ + {"role": "user", "content": "Hello, can you respond?"} + ], + "temperature": 0.7, + "max_tokens": 100 + } + + print("正在測試 API 連接...") + print(f"URL: {BASE_URL}") + print(f"Model: gpt-oss-120b") + print("-" * 50) + + try: + response = requests.post(BASE_URL, headers=headers, json=data, timeout=30) + + if response.status_code == 200: + result = response.json() + print("[成功] API 回應:") + print(result['choices'][0]['message']['content']) + else: + print(f"[錯誤] HTTP {response.status_code}") + print(f"回應內容: {response.text[:500]}") + + except requests.exceptions.Timeout: + print("[錯誤] 請求超時") + except requests.exceptions.ConnectionError: + print("[錯誤] 無法連接到伺服器") + except Exception as e: + print(f"[錯誤] {str(e)}") + +if __name__ == "__main__": + test_api() \ No newline at end of file diff --git a/test_all_models.py b/test_all_models.py new file mode 100644 index 0000000..b8b030c --- /dev/null +++ b/test_all_models.py @@ -0,0 +1,143 @@ +import requests +import json +import time + +API_KEY = "paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=" +BASE_URL = "https://llama.theaken.com/v1" + +MODELS = [ + "gpt-oss-120b", + "deepseek-r1-671b", + "qwen3-embedding-8b" +] + +def test_model(model_name): + """測試單個模型""" + print(f"\n[測試模型: {model_name}]") + print("-" * 40) + + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + + # 測試聊天完成端點 + chat_url = f"{BASE_URL}/chat/completions" + data = { + "model": model_name, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Say 'Hello, I am working!' if you can see this message."} + ], + "temperature": 0.5, + "max_tokens": 50 + } + + try: + print(f"連接到: {chat_url}") + response = requests.post(chat_url, headers=headers, json=data, timeout=30) + + print(f"HTTP 狀態碼: {response.status_code}") + + if response.status_code == 200: + result = response.json() + if 'choices' in result and len(result['choices']) > 0: + content = result['choices'][0]['message']['content'] + print(f"[SUCCESS] AI 回應: {content}") + return True + else: + print("[ERROR] 回應格式異常") + print(f"回應內容: {json.dumps(result, indent=2)}") + else: + print(f"[ERROR] 錯誤回應") + # 檢查是否是 HTML 錯誤頁面 + if response.text.startswith('`, `<|channel|>` 等 +**處理**:程式已自動過濾這些標記 + +## 六、API 回應格式清理 + +部分模型回應可能包含思考過程標記,程式會自動清理: +- `...` - 思考過程 +- `<|channel|>...<|message|>` - 通道標記 +- `<|end|>`, `<|start|>` - 結束/開始標記 + +## 七、測試結果摘要 + +### 成功測試 +✅ 內網端點 1-3 全部正常運作 +✅ 支援 OpenAI SDK 標準格式 +✅ 可正常進行對話 + +### 待確認 +- 外網端點需等待伺服器恢復 +- DeepSeek 和 Qwen 模型需進一步測試 + +## 八、技術細節 + +### 使用 OpenAI SDK +```python +from openai import OpenAI + +client = OpenAI( + api_key="你的金鑰", + base_url="API端點URL" +) +``` + +### 使用 requests 庫 +```python +import requests + +headers = { + "Authorization": "Bearer 你的金鑰", + "Content-Type": "application/json" +} + +data = { + "model": "gpt-oss-120b", + "messages": [{"role": "user", "content": "你好"}], + "temperature": 0.7, + "max_tokens": 200 +} + +response = requests.post( + "API端點URL/chat/completions", + headers=headers, + json=data +) +``` + +## 九、建議使用方式 + +1. **開發測試**:使用內網端點(速度快、穩定) +2. **生產環境**:配置多個端點自動切換 +3. **對話應用**:使用 llama_full_api.py +4. **API 整合**:參考 quick_test.py 的實現 + +--- + +最後更新:2025-09-19 +測試環境:Windows / Python 3.13 \ No newline at end of file diff --git a/連線參數.txt b/連線參數.txt new file mode 100644 index 0000000..ca453ae --- /dev/null +++ b/連線參數.txt @@ -0,0 +1,14 @@ +可以連接 llama 的模型,ai進行對話 +他的連線資料如下: + +外網連線: +https://llama.theaken.com/v1https://llama.theaken.com/v1/gpt-oss-120b/ +https://llama.theaken.com/v1https://llama.theaken.com/v1/deepseek-r1-671b/ +https://llama.theaken.com/v1https://llama.theaken.com/v1/gpt-oss-120b/ +外網模型路徑: + 1. /gpt-oss-120b/ + 2. /deepseek-r1-671b/ + 3. /qwen3-embedding-8b/ + + +金鑰:paVrIT+XU1NhwCAOb0X4aYi75QKogK5YNMGvQF1dCyo=