diff --git a/frontend/src/components/PreprocessingSettings.tsx b/frontend/src/components/PreprocessingSettings.tsx new file mode 100644 index 0000000..c31b6ff --- /dev/null +++ b/frontend/src/components/PreprocessingSettings.tsx @@ -0,0 +1,203 @@ +import { cn } from '@/lib/utils' +import { Check, Wand2, Settings2, Ban, Eye } from 'lucide-react' +import { useTranslation } from 'react-i18next' +import type { PreprocessingMode, PreprocessingConfig, PreprocessingContrast } from '@/types/apiV2' + +interface PreprocessingSettingsProps { + mode: PreprocessingMode + config: PreprocessingConfig + onModeChange: (mode: PreprocessingMode) => void + onConfigChange: (config: PreprocessingConfig) => void + onPreview?: () => void + disabled?: boolean + className?: string +} + +const MODE_ICONS: Record = { + auto: , + manual: , + disabled: , +} + +export default function PreprocessingSettings({ + mode, + config, + onModeChange, + onConfigChange, + onPreview, + disabled = false, + className, +}: PreprocessingSettingsProps) { + const { t } = useTranslation() + const modes: PreprocessingMode[] = ['auto', 'manual', 'disabled'] + const contrastOptions: PreprocessingContrast[] = ['none', 'histogram', 'clahe'] + + const getModeInfo = (m: PreprocessingMode) => ({ + label: t(`processing.preprocessing.mode.${m}`), + description: t(`processing.preprocessing.mode.${m}Desc`), + }) + + const handleConfigChange = (field: keyof PreprocessingConfig, value: any) => { + onConfigChange({ ...config, [field]: value }) + } + + return ( +
+ {/* Header */} +
+
+ +

+ {t('processing.preprocessing.title')} +

+
+ {onPreview && mode !== 'disabled' && ( + + )} +
+ + {/* Mode Selection */} +
+ {modes.map((m) => { + const info = getModeInfo(m) + const isSelected = mode === m + + return ( + + ) + })} +
+ + {/* Manual Configuration (shown only when mode is 'manual') */} + {mode === 'manual' && ( +
+

+ {t('processing.preprocessing.manualConfig')} +

+ + {/* Contrast Enhancement */} +
+ + +
+ + {/* Sharpen Toggle */} + + + {/* Binarize Toggle */} + +
+ )} + + {/* Info Note */} +
+

+ {t('processing.preprocessing.note')} +

+
+
+ ) +} diff --git a/frontend/src/i18n/locales/zh-TW.json b/frontend/src/i18n/locales/zh-TW.json index 4102c2b..d4296eb 100644 --- a/frontend/src/i18n/locales/zh-TW.json +++ b/frontend/src/i18n/locales/zh-TW.json @@ -63,6 +63,30 @@ "cdlaDesc": "CDLA 版面分析模型 (~86% mAP) - 專用中文版面分析", "recommended": "推薦", "note": "版面模型會影響文件結構(表格、文字區塊、圖片)的偵測效果。請根據您的文件類型選擇適合的模型。" + }, + "preprocessing": { + "title": "影像前處理", + "mode": { + "auto": "自動模式", + "autoDesc": "系統自動分析影像品質,決定最佳的前處理方式", + "manual": "手動模式", + "manualDesc": "手動選擇前處理選項,完全控制處理流程", + "disabled": "停用前處理", + "disabledDesc": "不進行任何前處理,直接使用原始影像" + }, + "recommended": "推薦", + "preview": "預覽效果", + "manualConfig": "手動設定選項", + "contrast": { + "label": "對比度增強", + "none": "不增強", + "histogram": "直方圖均衡化", + "clahe": "CLAHE 自適應均衡化" + }, + "sharpen": "邊緣銳化", + "binarize": "二值化處理", + "binarizeWarning": "可能影響顏色資訊", + "note": "前處理僅影響版面偵測階段,用於改善表格和文字區塊的識別。原始影像仍用於最終的 OCR 文字提取,確保最佳識別品質。" } }, "results": { diff --git a/frontend/src/pages/ProcessingPage.tsx b/frontend/src/pages/ProcessingPage.tsx index 105c411..59b3b86 100644 --- a/frontend/src/pages/ProcessingPage.tsx +++ b/frontend/src/pages/ProcessingPage.tsx @@ -10,9 +10,10 @@ import { useToast } from '@/components/ui/toast' import { apiClientV2 } from '@/services/apiV2' import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2 } from 'lucide-react' import LayoutModelSelector from '@/components/LayoutModelSelector' +import PreprocessingSettings from '@/components/PreprocessingSettings' import TaskNotFound from '@/components/TaskNotFound' import { useTaskValidation } from '@/hooks/useTaskValidation' -import type { LayoutModel, ProcessingOptions } from '@/types/apiV2' +import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig } from '@/types/apiV2' export default function ProcessingPage() { const { t } = useTranslation() @@ -34,6 +35,14 @@ export default function ProcessingPage() { // Layout model state (default to 'chinese' for best Chinese document support) const [layoutModel, setLayoutModel] = useState('chinese') + // Preprocessing state + const [preprocessingMode, setPreprocessingMode] = useState('auto') + const [preprocessingConfig, setPreprocessingConfig] = useState({ + contrast: 'clahe', + sharpen: true, + binarize: false, + }) + // Start OCR processing const processOCRMutation = useMutation({ mutationFn: () => { @@ -41,6 +50,8 @@ export default function ProcessingPage() { use_dual_track: true, language: 'ch', layout_model: layoutModel, + preprocessing_mode: preprocessingMode, + preprocessing_config: preprocessingMode === 'manual' ? preprocessingConfig : undefined, } return apiClientV2.startTask(taskId!, options) @@ -342,13 +353,25 @@ export default function ProcessingPage() { )} - {/* Layout Model Selection (only show when task is pending) */} + {/* Processing Options (only show when task is pending) */} {isPending && ( - +
+ {/* Layout Model Selection */} + + + {/* Preprocessing Settings */} + +
)} ) diff --git a/frontend/src/types/apiV2.ts b/frontend/src/types/apiV2.ts index 0da24eb..716c966 100644 --- a/frontend/src/types/apiV2.ts +++ b/frontend/src/types/apiV2.ts @@ -76,12 +76,62 @@ export interface DocumentAnalysisResponse { /** * Layout detection model selection for OCR track. * Different models are optimized for different document types: - * - chinese: PP-DocLayout-S - Best for Chinese forms, contracts, invoices + * - chinese: PP-DocLayout_plus-L - Best for Chinese forms, contracts, invoices * - default: PubLayNet-based - Best for English academic papers * - cdla: Specialized for Chinese document layout analysis */ export type LayoutModel = 'chinese' | 'default' | 'cdla' +/** + * Preprocessing mode for layout detection enhancement. + * - auto: Analyze image quality and automatically apply optimal preprocessing + * - manual: Use user-specified preprocessing configuration + * - disabled: Skip preprocessing entirely + */ +export type PreprocessingMode = 'auto' | 'manual' | 'disabled' + +/** + * Contrast enhancement method for preprocessing. + */ +export type PreprocessingContrast = 'none' | 'histogram' | 'clahe' + +/** + * Preprocessing configuration for layout detection enhancement. + */ +export interface PreprocessingConfig { + contrast: PreprocessingContrast + sharpen: boolean + binarize: boolean +} + +/** + * Image quality metrics from auto-analysis. + */ +export interface ImageQualityMetrics { + contrast: number + edge_strength: number +} + +/** + * Request for preprocessing preview. + */ +export interface PreprocessingPreviewRequest { + page?: number + mode?: PreprocessingMode + config?: PreprocessingConfig +} + +/** + * Response for preprocessing preview. + */ +export interface PreprocessingPreviewResponse { + original_url: string + preprocessed_url: string + quality_metrics: ImageQualityMetrics + auto_config: PreprocessingConfig + mode_used: PreprocessingMode +} + export interface ProcessingOptions { use_dual_track?: boolean force_track?: ProcessingTrack @@ -89,6 +139,8 @@ export interface ProcessingOptions { include_layout?: boolean include_images?: boolean layout_model?: LayoutModel // Layout detection model selection (OCR track only) + preprocessing_mode?: PreprocessingMode // Preprocessing mode (OCR track only) + preprocessing_config?: PreprocessingConfig // Manual preprocessing config } export interface TaskCreate {