import uuid
import docx
from docx import Document
from docx.text.paragraph import Paragraph
# document = Document('深圳證券交易所股票上市規(guī)則.docx').add_
def catalogue_get(doc):
# global heading, parent_id
docx = Document(doc)
lastest_heading = 0
record = ['1'] # 記錄目錄結(jié)構(gòu)
point = '.'
dataContent = '' # 內(nèi)容數(shù)據(jù)
data = []
header = 0
for paragraph in docx.paragraphs:
# print(paragraph.text)
info = {}
data.append(info)
header += 1
if paragraph.style.name[:7] == 'Heading':
if header == 1:
lines = f'<div><center><h1>{paragraph.text}</h1></center></div> '
dataContent += lines
# print(lines)
this_heading = int(paragraph.style.name[-1])
if this_heading < 4:
result = ''.join(record) + point
if this_heading == 1 and lastest_heading == 0:
heading = ''.join(record) + '.'
# print(f'heading:{heading}')
else:
if this_heading > lastest_heading:
record.append('1')
parent_id = '.'.join(record[:-1]) + '.'
# info['parent_id'] = parent_id
# info.update({'id': heading, 'text': paragraph.text,'parent_id':parent_id})
# print(f'record:{record}', f'parent_id:{parent_id}')
elif this_heading == lastest_heading:
record[-1] = str(int(record[-1]) + 1)
parent_id = '.'.join(record[:-1]) + '.'
# info['parent_id'] = parent_id
# info.update({'id': heading, 'text': paragraph.text, 'parent_id': parent_id})
# print(record, f'parent_id:{parent_id}')
# print(f'record[-1]:{record}')
else:
record[this_heading - 1] = str(int(record[this_heading - 1]) + 1)
record[this_heading:] = []
parent_id = 0
# info['parent_id'] = 0
# info.update({'id': heading, 'text': paragraph.text, 'parent_id': 0})
# print(record, 'parent_id:0')
heading = '.'.join(record) + point # 顯示一段目錄
lastest_heading = this_heading
# print(heading, paragraph.text, paragraph.style.name, sep=' ')
# info['id'] = heading
# info['text'] = paragraph.text
headingList = heading.split('.')
# headingStr = ''
parent_id = '.'.join(headingList[:-2]) + '.'
# print(f'parent_id:{parent_id}')
# print(f'headingList:{headingList}')
# connection = str(uuid.uuid3(uuid.NAMESPACE_DNS, paragraph.text)).replace('-', '')
# , 'connection': connection
infos = {'id': heading, 'text': paragraph.text, 'parent_id': parent_id}
data.append(infos)
size = int(str(paragraph.style.name).replace('Heading ', ''))+2
lines = f"<h{size} id='{heading}'>{paragraph.text}</h{size}>"
dataContent += lines
# print(lines)
# print(heading, paragraph.text,parent_id,sep=' ')
else:
if header == 1:
lines = f'<div><center><h1>{paragraph.text}</h1></center></div> '
dataContent += lines
# print(lines)
else:
lines = f'<p>{paragraph.text}</p>'
dataContent += lines
# print(lines)
data = [i for i in data if i]
return dataContent
# return data
# print(data)
# data = [i for i in data if i]
# data[0]['parent_id'] = 0
# return data
# path = 'D://Users/Desktop/new/深圳證券交易所創(chuàng)業(yè)板股票上市規(guī)則(2020年修訂).docx'
path = '深圳證券交易所股票上市規(guī)則.docx'
# data = catalogue_get(path)
data = catalogue_get(path)
print(data)
目錄樹
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。
相關(guān)閱讀更多精彩內(nèi)容
- 最近看到vscode tree生成目錄樹結(jié)構(gòu)插件挺好玩的,自己琢磨實現(xiàn)了一個簡單版本,支持文件,文件夾過濾 最終效...
- 最近有個 task,是把其它研發(fā)組的 React JS 代碼整合到自家項目里。先從度娘效率云上面 clone 了下來~~
- PyQt可以使用QTreeView來顯示一個目錄結(jié)構(gòu),要將treeview顯示為目錄樹,需要將其模型設(shè)置為QFil...
- 經(jīng)??吹絞ithub上有項目會在readme中顯示項目的目錄結(jié)構(gòu),一個清晰的目錄結(jié)構(gòu)有利于其他人快速的了解整個項目...