This commit is contained in:
beabigegg
2025-11-12 22:53:17 +08:00
commit da700721fa
130 changed files with 23393 additions and 0 deletions

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Create demo images for testing Tool_OCR
"""
from PIL import Image, ImageDraw, ImageFont
from pathlib import Path
# Demo docs directory
DEMO_DIR = Path("/Users/egg/Projects/Tool_OCR/demo_docs")
def create_text_image(text, filename, size=(800, 600), font_size=40):
"""Create an image with text"""
# Create white background
img = Image.new('RGB', size, color='white')
draw = ImageDraw.Draw(img)
# Try to use a font, fallback to default
try:
# Try system fonts
font = ImageFont.truetype("/System/Library/Fonts/STHeiti Light.ttc", font_size)
except:
try:
font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", font_size)
except:
font = ImageFont.load_default()
# Calculate text position (centered)
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
position = ((size[0] - text_width) // 2, (size[1] - text_height) // 2)
# Draw text
draw.text(position, text, fill='black', font=font)
# Save image
img.save(filename)
print(f"Created: {filename}")
def create_multiline_text_image(lines, filename, size=(800, 1000), font_size=30):
"""Create an image with multiple lines of text"""
img = Image.new('RGB', size, color='white')
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("/System/Library/Fonts/STHeiti Light.ttc", font_size)
except:
try:
font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", font_size)
except:
font = ImageFont.load_default()
# Draw each line
y = 50
for line in lines:
draw.text((50, y), line, fill='black', font=font)
y += font_size + 20
img.save(filename)
print(f"Created: {filename}")
def create_table_image(filename, size=(800, 600)):
"""Create a simple table image"""
img = Image.new('RGB', size, color='white')
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("/System/Library/Fonts/STHeiti Light.ttc", 24)
except:
try:
font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 24)
except:
font = ImageFont.load_default()
# Draw table borders
# Header row
draw.rectangle([50, 50, 750, 100], outline='black', width=2)
# Row 1
draw.rectangle([50, 100, 750, 150], outline='black', width=2)
# Row 2
draw.rectangle([50, 150, 750, 200], outline='black', width=2)
# Row 3
draw.rectangle([50, 200, 750, 250], outline='black', width=2)
# Vertical lines
draw.line([250, 50, 250, 250], fill='black', width=2)
draw.line([450, 50, 450, 250], fill='black', width=2)
draw.line([650, 50, 650, 250], fill='black', width=2)
# Add text
draw.text((60, 65), "姓名", fill='black', font=font)
draw.text((260, 65), "年齡", fill='black', font=font)
draw.text((460, 65), "部門", fill='black', font=font)
draw.text((660, 65), "職位", fill='black', font=font)
draw.text((60, 115), "張三", fill='black', font=font)
draw.text((260, 115), "28", fill='black', font=font)
draw.text((460, 115), "技術部", fill='black', font=font)
draw.text((660, 115), "工程師", fill='black', font=font)
draw.text((60, 165), "李四", fill='black', font=font)
draw.text((260, 165), "32", fill='black', font=font)
draw.text((460, 165), "銷售部", fill='black', font=font)
draw.text((660, 165), "經理", fill='black', font=font)
draw.text((60, 215), "王五", fill='black', font=font)
draw.text((260, 215), "25", fill='black', font=font)
draw.text((460, 215), "人事部", fill='black', font=font)
draw.text((660, 215), "專員", fill='black', font=font)
img.save(filename)
print(f"Created: {filename}")
def main():
# Create basic text images
basic_dir = DEMO_DIR / "basic"
create_text_image(
"這是中文繁體測試文檔\nTool_OCR 系統測試",
basic_dir / "chinese_traditional.png"
)
create_text_image(
"这是中文简体测试文档\nTool_OCR 系统测试",
basic_dir / "chinese_simple.png"
)
create_text_image(
"This is English Test Document\nTool_OCR System Testing",
basic_dir / "english.png"
)
# Create multiline document
layout_lines = [
"Tool_OCR 文檔處理系統",
"",
"一、系統簡介",
"Tool_OCR 是一個強大的文檔識別系統,支援批次處理、",
"版面分析、表格識別等功能。",
"",
"二、主要功能",
"1. 批次文件上傳與處理",
"2. OCR 文字識別(支援中英文)",
"3. 版面保留 PDF 導出",
"4. 表格結構識別",
"5. 多種格式導出TXT, JSON, Excel, MD, PDF",
]
layout_dir = DEMO_DIR / "layout"
create_multiline_text_image(layout_lines, layout_dir / "document.png")
# Create table image
tables_dir = DEMO_DIR / "tables"
create_table_image(tables_dir / "simple_table.png")
print("\n✅ Demo images created successfully!")
print(f"\n📁 Location: {DEMO_DIR}")
print("\nYou can now test these images with Tool_OCR:")
print(" - Basic OCR: demo_docs/basic/")
print(" - Layout: demo_docs/layout/")
print(" - Tables: demo_docs/tables/")
if __name__ == "__main__":
main()