#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Debug the complete translation flow to find where translations are lost
"""

import sys
import os

# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
    sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
    sys.stderr.reconfigure(encoding='utf-8')

sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))

from app import create_app
from app.services.document_processor import DocumentProcessor
from app.services.dify_client import DifyClient
from pathlib import Path

def debug_translation_flow():
    """Debug the complete translation flow"""
    
    app = create_app()
    
    with app.app_context():
        # Use the actual job file
        job_file_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\8cada04e-da42-4416-af46-f01cca5a452f\original_-OR026_8cada04e.docx"
        
        if not Path(job_file_path).exists():
            print(f"ERROR: Job file does not exist: {job_file_path}")
            return
        
        print("=== DEBUGGING TRANSLATION FLOW ===")
        print(f"File: {job_file_path}")
        
        # Step 1: Extract segments
        print("\n1. EXTRACTING SEGMENTS...")
        processor = DocumentProcessor()
        segments = processor.extract_docx_segments(job_file_path)
        
        translatable_segments = []
        for i, seg in enumerate(segments):
            if processor.should_translate_text(seg.text, 'auto'):
                translatable_segments.append(seg)
        
        print(f"Total segments: {len(segments)}")
        print(f"Translatable segments: {len(translatable_segments)}")
        print(f"First 3 translatable segments:")
        for i, seg in enumerate(translatable_segments[:3]):
            print(f"  {i+1}. {repr(seg.text[:50])}")
        
        # Step 2: Test Dify translation on first few segments
        print("\n2. TESTING DIFY TRANSLATIONS...")
        dify_client = DifyClient()
        translation_map = {}
        target_languages = ['en', 'vi']
        
        for target_lang in target_languages:
            print(f"\nTesting translation to {target_lang}:")
            
            for i, seg in enumerate(translatable_segments[:3]):  # Test first 3
                try:
                    print(f"  Translating: {repr(seg.text)}")
                    
                    result = dify_client.translate_text(
                        text=seg.text,
                        source_language='zh-cn',
                        target_language=target_lang,
                        user_id=1,
                        job_id=1
                    )
                    
                    translated_text = result.get('translated_text', '')
                    translation_map[(target_lang, seg.text)] = translated_text
                    
                    print(f"  Result: {repr(translated_text)}")
                    print(f"  Success: {translated_text != seg.text and translated_text.strip()}")
                    
                except Exception as e:
                    print(f"  ERROR: {e}")
                    translation_map[(target_lang, seg.text)] = f"[ERROR] {seg.text}"
        
        # Step 3: Test translation insertion
        print(f"\n3. TESTING TRANSLATION INSERTION...")
        print(f"Translation map entries: {len(translation_map)}")
        
        for key, value in list(translation_map.items())[:6]:
            lang, source = key
            print(f"  {lang} | {repr(source[:30])} -> {repr(value[:30])}")
        
        # Debug: Check which segments will be matched
        print(f"\n3.1. SEGMENT MATCHING DEBUG...")
        target_langs_for_test = ['en']
        matched_count = 0
        
        for i, seg in enumerate(segments[:10]):  # Check first 10 segments
            has_translation = any((tgt, seg.text) in translation_map for tgt in target_langs_for_test)
            status = "MATCH" if has_translation else "NO MATCH"
            print(f"  Segment {i+1}: {status} | {repr(seg.text[:40])}")
            if has_translation:
                matched_count += 1
                for tgt in target_langs_for_test:
                    if (tgt, seg.text) in translation_map:
                        translation = translation_map[(tgt, seg.text)]
                        print(f"    -> {tgt}: {repr(translation[:40])}")
        
        print(f"Segments that will match: {matched_count}/10 (in first 10)")
        
        # Step 4: Check translation cache for real job data
        print(f"\n4. CHECKING TRANSLATION CACHE...")
        from app.models.cache import TranslationCache
        
        # Check if there are any cached translations for the segments
        cache_hits = 0
        cache_misses = 0
        
        for i, seg in enumerate(translatable_segments[:5]):  # Check first 5
            for target_lang in ['en', 'vi']:
                cached = TranslationCache.get_translation(
                    text=seg.text,
                    source_language='zh-cn',
                    target_language=target_lang
                )
                if cached:
                    print(f"  CACHE HIT: {target_lang} | {repr(seg.text[:30])} -> {repr(cached[:30])}")
                    cache_hits += 1
                else:
                    cache_misses += 1
        
        print(f"Cache hits: {cache_hits}, Cache misses: {cache_misses}")
        
        # Create test output file
        output_path = str(Path(job_file_path).parent / "flow_debug_translated.docx")
        
        try:
            ok_count, skip_count = processor.insert_docx_translations(
                file_path=job_file_path,
                segments=segments,
                translation_map=translation_map,
                target_languages=['en'],  # Test with one language first
                output_path=output_path
            )
            
            print(f"Translation insertion: {ok_count} OK, {skip_count} skipped")
            
            if Path(output_path).exists():
                print(f"✅ Output file created: {Path(output_path).stat().st_size} bytes")
                
                # Verify the output contains translations
                test_segments = processor.extract_docx_segments(output_path)
                print(f"Output file segments: {len(test_segments)}")
                
                # Look for evidence of translations
                translation_evidence = []
                for seg in test_segments:
                    # Check if segment text appears to be a translation
                    if any(word in seg.text.lower() for word in ['purpose', 'equipment', 'maintenance', 'check']):
                        translation_evidence.append(seg.text[:50])
                
                print(f"Translation evidence found: {len(translation_evidence)} segments")
                for evidence in translation_evidence[:3]:
                    print(f"  - {repr(evidence)}")
                    
            else:
                print("❌ Output file was not created")
                
        except Exception as e:
            print(f"ERROR during insertion: {e}")
            import traceback
            traceback.print_exc()

if __name__ == "__main__":
    debug_translation_flow()