fix: 修復多頁PDF頁碼分配錯誤和logging配置問題
Critical Bug #1: 多頁PDF頁碼分配錯誤 問題: - 在處理多頁PDF時,雖然text_regions有正確的頁碼標記 - 但layout_data.elements(表格)和images_metadata(圖片)都保持page=0 - 導致所有頁面的表格和圖片都被錯誤地繪製在第1頁 - 造成嚴重的版面錯誤、元素重疊和位置錯誤 根本原因: - ocr_service.py (第359-372行) 在累積多頁結果時 - text_regions有添加頁碼:region['page'] = page_num - 但images_metadata和layout_data.elements沒有更新頁碼 - 它們保持單頁處理時的默認值page=0 修復方案: - backend/app/services/ocr_service.py (第359-372行) - 為layout_data.elements中的每個元素添加正確的頁碼 - 為images_metadata中的每個圖片添加正確的頁碼 - 確保多頁PDF的每個元素都有正確的page標記 Critical Bug #2: Logging配置被uvicorn覆蓋 問題: - uvicorn啟動時會設置自己的logging配置 - 這會覆蓋應用程式的logging.basicConfig() - 導致應用層的INFO/WARNING/ERROR log完全消失 - 只能看到uvicorn的HTTP請求log和第三方庫的DEBUG log - 無法診斷PDF生成過程中的問題 修復方案: - backend/app/main.py (第17-36行) - 添加force=True參數強制重新配置logging (Python 3.8+) - 顯式設置root logger的level - 配置app-specific loggers (app.services.pdf_generator_service等) - 啟用log propagation確保訊息能傳遞到root logger 其他修復: - backend/app/services/pdf_generator_service.py - 將重要的debug logging改為info level (第371, 379, 490, 613行) 原因:預設log level是INFO,debug log不會顯示 - 修復max_cols UnboundLocalError (第507-509行) 將logger.info()移到max_cols定義之後 - 移除危險的.get('page', 0)默認值 (第762行) 改為.get('page'),沒有page的元素會被正確跳過 影響: ✅ 多頁PDF的表格和圖片現在會正確分配到對應頁面 ✅ 詳細的PDF生成log現在可以正確顯示(座標轉換、縮放比例等) ✅ 能夠診斷文字擠壓、間距和位置錯誤的問題 測試建議: 1. 重新啟動後端清除Python cache 2. 上傳多頁PDF進行OCR處理 3. 檢查生成的JSON中每個元素是否有正確的page標記 4. 檢查終端log是否顯示詳細的PDF生成過程 5. 驗證生成的PDF中每頁的元素位置是否正確 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,78 +1,7 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(openspec validate:*)",
|
||||
"Bash(openspec list:*)",
|
||||
"Bash(openspec show:*)",
|
||||
"Bash(conda env:*)",
|
||||
"Bash(alembic init:*)",
|
||||
"Bash(alembic revision:*)",
|
||||
"Bash(python -m alembic revision:*)",
|
||||
"Bash(python test_services.py:*)",
|
||||
"Bash(source ~/.zshrc)",
|
||||
"Bash(conda activate:*)",
|
||||
"Bash(brew install:*)",
|
||||
"Bash(/opt/homebrew/bin/brew install libmagic)",
|
||||
"Bash(python:*)",
|
||||
"Bash(/opt/homebrew/bin/brew install pango gdk-pixbuf libffi)",
|
||||
"Bash(export DYLD_LIBRARY_PATH:*)",
|
||||
"Bash(pip install:*)",
|
||||
"Bash(timeout 5 python:*)",
|
||||
"Bash(curl:*)",
|
||||
"Bash(pkill:*)",
|
||||
"Bash(bash -c \"source ~/.zshrc && conda activate tool_ocr && export DYLD_LIBRARY_PATH=/opt/homebrew/lib:$DYLD_LIBRARY_PATH && python -m app.main > /tmp/tool_ocr_startup.log 2>&1 &\")",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOjMsInVzZXJuYW1lIjoiYWRtaW4iLCJleHAiOjE3NjI4ODM1NDF9.sm7zPq7ShErFg3UfBSrzGWxC5m5MgC_L0owKJb7Q4J4\":*)",
|
||||
"Bash(/tmp/login_response.json)",
|
||||
"Bash(cat:*)",
|
||||
"Bash(conda run:*)",
|
||||
"Bash(alembic upgrade:*)",
|
||||
"Bash(lsof:*)",
|
||||
"Bash(xargs kill:*)",
|
||||
"Bash(brew list:*)",
|
||||
"Bash(echo:*)",
|
||||
"Bash(bash -c \"source ~/.zshrc && conda activate tool_ocr && cd /Users/egg/Projects/Tool_OCR/backend && pip list | grep pytest\")",
|
||||
"Bash(bash -c:*)",
|
||||
"Bash(find:*)",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOjMsInVzZXJuYW1lIjoiYWRtaW4iLCJleHAiOjE3NjI5MTczMzl9.x5FYcKYpF8rp1M7M7pQsDGwJS1EeQ6RdgRxtNbA2W5E\")",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOjMsInVzZXJuYW1lIjoiYWRtaW4iLCJleHAiOjE3NjI5MTczOTN9.oNPbj-SvIl_becIlulXb4DOJ6uHF70hnwlqI-Zfqs1g\")",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2MjkxNzQ1NH0.wtLv3n8bR_whzkuYILehy87IBDI_ph8FWEFd7laASEU\")",
|
||||
"Bash(python3:*)",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2MjkyMDUzMn0.e_uG5pRTHsnsCEO3yVZDCR4vXXne81Evkw99VDGVZQU\")",
|
||||
"Bash(unzip:*)",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2MjkyMDc0OH0.zOpB_2lTi-nVf5B7VMMB9GPeanuo0i-m6iauzjyhCno\")",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2MjkyMTExM30.q81VbDDIvQkL3VLl5sCvDEJlha3Rm4hkWMDQmWJyurs\")",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2MjkyMTI3OH0.7CQ9NMj5yekdtaRg4v0jHYQmfsbajTZ8aK8kKOo7ixQ\")",
|
||||
"Bash(/Applications/LibreOffice.app/Contents/MacOS/soffice --headless --convert-to docx test_document.html --outdir .)",
|
||||
"Bash(env)",
|
||||
"Bash(node --version:*)",
|
||||
"Bash(npm:*)",
|
||||
"Bash(npx tailwindcss init -p)",
|
||||
"Bash(sqlite3:*)",
|
||||
"Bash(TOKEN=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIzIiwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTc2Mjk1ODUzOX0.S1JjFxVVmifdkN5F_dORt5jTRdTFN9MKJ8UJKuYacA8\")",
|
||||
"Bash(tree:*)",
|
||||
"Bash(done)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(git push)",
|
||||
"Bash(docker --version:*)",
|
||||
"Bash(dpkg:*)",
|
||||
"Bash(pip3:*)",
|
||||
"Bash(chmod:*)",
|
||||
"Bash(sudo apt install:*)",
|
||||
"Bash(/usr/bin/soffice:*)",
|
||||
"Bash(git config:*)",
|
||||
"Bash(source:*)",
|
||||
"Bash(pip uninstall:*)",
|
||||
"Bash(nvidia-smi:*)",
|
||||
"Bash(journalctl:*)",
|
||||
"Bash(ss:*)",
|
||||
"Bash(pip index:*)",
|
||||
"Bash(timeout 10 python:*)",
|
||||
"Bash(alembic current:*)",
|
||||
"Bash(git clean:*)",
|
||||
"Bash(npx tsc:*)",
|
||||
"Bash(./node_modules/.bin/tsc:*)",
|
||||
"Bash(export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH echo \"Updated LD_LIBRARY_PATH:\" echo \"$LD_LIBRARY_PATH\" echo \"\" echo \"Testing CUDA library loading:\" ldconfig -p)"
|
||||
"Bash(git commit:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
|
||||
Reference in New Issue
Block a user