28 lines
1016 B
Python
28 lines
1016 B
Python
import docx
|
|
import sys
|
|
|
|
def read_docx(file_path):
|
|
doc = docx.Document(file_path)
|
|
content = []
|
|
|
|
# Iterate through all elements in the document in order
|
|
for element in doc.element.body:
|
|
if element.tag.endswith('p'): # Paragraph
|
|
para = docx.text.paragraph.Paragraph(element, doc)
|
|
if para.text.strip():
|
|
content.append(para.text)
|
|
elif element.tag.endswith('tbl'): # Table
|
|
table = docx.table.Table(element, doc)
|
|
for row in table.rows:
|
|
row_text = [cell.text.strip() for cell in row.cells]
|
|
content.append(" | ".join(row_text))
|
|
|
|
return '\n'.join(content)
|
|
|
|
if __name__ == "__main__":
|
|
path = r"c:\Users\USER\Desktop\SampleOrderAssistant\data\業務資料比對與轉換率分析系統 - 邏輯規格書 (v1.0).docx"
|
|
content = read_docx(path)
|
|
with open("spec_content.txt", "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print("Content written to spec_content.txt")
|