SalesPipeline/backend/read_spec.py

import docx
import sys

def read_docx(file_path):
    doc = docx.Document(file_path)
    content = []

    # Iterate through all elements in the document in order
    for element in doc.element.body:
        if element.tag.endswith('p'): # Paragraph
            para = docx.text.paragraph.Paragraph(element, doc)
            if para.text.strip():
                content.append(para.text)
        elif element.tag.endswith('tbl'): # Table
            table = docx.table.Table(element, doc)
            for row in table.rows:
                row_text = [cell.text.strip() for cell in row.cells]
                content.append(" | ".join(row_text))

    return '\n'.join(content)

if __name__ == "__main__":
    path = r"c:\Users\USER\Desktop\SampleOrderAssistant\data\業務資料比對與轉換率分析系統 - 邏輯規格書 (v1.0).docx"
    content = read_docx(path)
    with open("spec_content.txt", "w", encoding="utf-8") as f:
        f.write(content)
    print("Content written to spec_content.txt")