Files
OCR/backend/app/main.py
egg 5cf4010c9b fix: 修復多頁PDF頁碼分配錯誤和logging配置問題
Critical Bug #1: 多頁PDF頁碼分配錯誤
問題:
- 在處理多頁PDF時,雖然text_regions有正確的頁碼標記
- 但layout_data.elements(表格)和images_metadata(圖片)都保持page=0
- 導致所有頁面的表格和圖片都被錯誤地繪製在第1頁
- 造成嚴重的版面錯誤、元素重疊和位置錯誤

根本原因:
- ocr_service.py (第359-372行) 在累積多頁結果時
- text_regions有添加頁碼:region['page'] = page_num
- 但images_metadata和layout_data.elements沒有更新頁碼
- 它們保持單頁處理時的默認值page=0

修復方案:
- backend/app/services/ocr_service.py (第359-372行)
  - 為layout_data.elements中的每個元素添加正確的頁碼
  - 為images_metadata中的每個圖片添加正確的頁碼
  - 確保多頁PDF的每個元素都有正確的page標記

Critical Bug #2: Logging配置被uvicorn覆蓋
問題:
- uvicorn啟動時會設置自己的logging配置
- 這會覆蓋應用程式的logging.basicConfig()
- 導致應用層的INFO/WARNING/ERROR log完全消失
- 只能看到uvicorn的HTTP請求log和第三方庫的DEBUG log
- 無法診斷PDF生成過程中的問題

修復方案:
- backend/app/main.py (第17-36行)
  - 添加force=True參數強制重新配置logging (Python 3.8+)
  - 顯式設置root logger的level
  - 配置app-specific loggers (app.services.pdf_generator_service等)
  - 啟用log propagation確保訊息能傳遞到root logger

其他修復:
- backend/app/services/pdf_generator_service.py
  - 將重要的debug logging改為info level (第371, 379, 490, 613行)
    原因:預設log level是INFO,debug log不會顯示
  - 修復max_cols UnboundLocalError (第507-509行)
    將logger.info()移到max_cols定義之後
  - 移除危險的.get('page', 0)默認值 (第762行)
    改為.get('page'),沒有page的元素會被正確跳過

影響:
 多頁PDF的表格和圖片現在會正確分配到對應頁面
 詳細的PDF生成log現在可以正確顯示(座標轉換、縮放比例等)
 能夠診斷文字擠壓、間距和位置錯誤的問題

測試建議:
1. 重新啟動後端清除Python cache
2. 上傳多頁PDF進行OCR處理
3. 檢查生成的JSON中每個元素是否有正確的page標記
4. 檢查終端log是否顯示詳細的PDF生成過程
5. 驗證生成的PDF中每頁的元素位置是否正確

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 12:13:25 +08:00

247 lines
7.3 KiB
Python

"""
Tool_OCR - FastAPI Application Entry Point (V2)
Main application setup with CORS, routes, and startup/shutdown events
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import logging
from pathlib import Path
from app.core.config import settings
# Ensure log directory exists before configuring logging
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
# Configure logging - Force configuration to override uvicorn's settings
logging.basicConfig(
level=getattr(logging, settings.log_level),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler(settings.log_file),
logging.StreamHandler(),
],
force=True # Force reconfiguration (Python 3.8+)
)
# Also explicitly configure root logger and app loggers
root_logger = logging.getLogger()
root_logger.setLevel(getattr(logging, settings.log_level))
# Configure app-specific loggers
for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service', 'app.services.ocr_service']:
app_logger = logging.getLogger(logger_name)
app_logger.setLevel(getattr(logging, settings.log_level))
app_logger.propagate = True # Ensure logs propagate to root logger
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan events"""
# Startup
logger.info("Starting Tool_OCR V2 application...")
# Ensure all directories exist
settings.ensure_directories()
logger.info("All directories created/verified")
logger.info("Application startup complete")
yield
# Shutdown
logger.info("Shutting down Tool_OCR application...")
# Create FastAPI application
app = FastAPI(
title="Tool_OCR V2",
description="OCR Processing System with External Authentication & Task Isolation",
version="2.0.0",
lifespan=lifespan,
)
# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Health check endpoint
@app.get("/health")
async def health_check():
"""Health check endpoint with GPU status"""
from app.services.ocr_service import OCRService
response = {
"status": "healthy",
"service": "Tool_OCR V2",
"version": "2.0.0",
}
# Add GPU status information
try:
# Create temporary OCRService instance to get GPU status
# In production, this should be a singleton service
ocr_service = OCRService()
gpu_status = ocr_service.get_gpu_status()
response["gpu"] = {
"available": gpu_status.get("gpu_available", False),
"enabled": gpu_status.get("gpu_enabled", False),
"device_name": gpu_status.get("device_name", "N/A"),
"device_count": gpu_status.get("device_count", 0),
"compute_capability": gpu_status.get("compute_capability", "N/A"),
}
# Add memory info if available
if gpu_status.get("memory_total_mb"):
response["gpu"]["memory"] = {
"total_mb": round(gpu_status.get("memory_total_mb", 0), 2),
"allocated_mb": round(gpu_status.get("memory_allocated_mb", 0), 2),
"utilization_percent": round(gpu_status.get("memory_utilization", 0), 2),
}
# Add reason if GPU is not available
if not gpu_status.get("gpu_available") and gpu_status.get("reason"):
response["gpu"]["reason"] = gpu_status.get("reason")
except Exception as e:
logger.warning(f"Failed to get GPU status: {e}")
response["gpu"] = {
"available": False,
"error": str(e),
}
return response
# Root endpoint
@app.get("/")
async def root():
"""Root endpoint with API information"""
return {
"message": "Tool_OCR API V2 - External Authentication",
"version": "2.0.0",
"docs_url": "/docs",
"health_check": "/health",
}
# Include V2 API routers
from app.routers import auth, tasks, admin
from fastapi import UploadFile, File, Depends, HTTPException, status
from sqlalchemy.orm import Session
import hashlib
from app.core.deps import get_db, get_current_user
from app.models.user import User
from app.models.task import TaskFile
from app.schemas.task import UploadResponse, TaskStatusEnum
from app.services.task_service import task_service
app.include_router(auth.router)
app.include_router(tasks.router)
app.include_router(admin.router)
# File upload endpoint
@app.post("/api/v2/upload", response_model=UploadResponse, tags=["Upload"], summary="Upload file for OCR")
async def upload_file(
file: UploadFile = File(..., description="File to upload (PNG, JPG, PDF, etc.)"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Upload a file for OCR processing
Creates a new task and uploads the file
- **file**: File to upload
"""
try:
# Validate file extension
file_ext = Path(file.filename).suffix.lower().lstrip('.')
if file_ext not in settings.allowed_extensions_list:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File type .{file_ext} not allowed. Allowed types: {', '.join(settings.allowed_extensions_list)}"
)
# Read file content
file_content = await file.read()
file_size = len(file_content)
# Calculate file hash
file_hash = hashlib.sha256(file_content).hexdigest()
# Create task
task = task_service.create_task(
db=db,
user_id=current_user.id,
filename=file.filename,
file_type=file.content_type
)
# Save file to disk
upload_dir = Path(settings.upload_dir)
upload_dir.mkdir(parents=True, exist_ok=True)
# Create unique filename using task_id
unique_filename = f"{task.task_id}_{file.filename}"
file_path = upload_dir / unique_filename
# Write file
with open(file_path, "wb") as f:
f.write(file_content)
# Create TaskFile record
task_file = TaskFile(
task_id=task.id,
original_name=file.filename,
stored_path=str(file_path),
file_size=file_size,
mime_type=file.content_type,
file_hash=file_hash
)
db.add(task_file)
db.commit()
logger.info(f"Uploaded file {file.filename} ({file_size} bytes) for task {task.task_id}, user {current_user.email}")
return {
"task_id": task.task_id,
"filename": file.filename,
"file_size": file_size,
"file_type": file.content_type or "application/octet-stream",
"status": TaskStatusEnum.PENDING
}
except HTTPException:
raise
except Exception as e:
logger.exception(f"Failed to upload file for user {current_user.id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to upload file: {str(e)}"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=settings.backend_port,
reload=True,
log_level=settings.log_level.lower(),
)