@@ -110,24 +110,51 @@ def get_image_id(image_id):
110
110
return get_image_id
111
111
112
112
113
+ title_font_list = [
114
+ [36 , 100 ],
115
+ [26 , 36 ],
116
+ [24 , 26 ],
117
+ [22 , 24 ],
118
+ [18 , 22 ],
119
+ [16 , 18 ]
120
+ ]
121
+
122
+
123
+ def get_title_level (paragraph : Paragraph ):
124
+ try :
125
+ if paragraph .style is not None :
126
+ psn = paragraph .style .name
127
+ if psn .startswith ('Heading' ) or psn .startswith ('TOC 标题' ) or psn .startswith ('标题' ):
128
+ return int (psn .replace ("Heading " , '' ).replace ('TOC 标题' , '' ).replace ('标题' ,
129
+ '' ))
130
+ if len (paragraph .runs ) == 1 :
131
+ font_size = paragraph .runs [0 ].font .size
132
+ pt = font_size .pt
133
+ if pt >= 16 :
134
+ for _value , index in zip (title_font_list , range (len (title_font_list ))):
135
+ if pt >= _value [0 ] and pt < _value [1 ]:
136
+ return index + 1
137
+ except Exception as e :
138
+ pass
139
+ return None
140
+
141
+
113
142
class DocSplitHandle (BaseSplitHandle ):
114
143
@staticmethod
115
144
def paragraph_to_md (paragraph : Paragraph , doc : Document , images_list , get_image_id ):
116
145
try :
117
- psn = paragraph .style .name
118
- if psn .startswith ('Heading' ) or psn .startswith ('TOC 标题' ) or psn .startswith ('标题' ):
119
- title = "" .join (["#" for i in range (
120
- int (psn .replace ("Heading " , '' ).replace ('TOC 标题' , '' ).replace ('标题' ,
121
- '' )))]) + " " + paragraph .text
146
+ title_level = get_title_level (paragraph )
147
+ if title_level is not None :
148
+ title = "" .join (["#" for i in range (title_level )]) + " " + paragraph .text
122
149
images = reduce (lambda x , y : [* x , * y ],
123
150
[get_paragraph_element_images (e , doc , images_list , get_image_id ) for e in
124
151
paragraph ._element ],
125
152
[])
126
-
127
153
if len (images ) > 0 :
128
154
return title + '\n ' + images_to_string (images , doc , images_list , get_image_id ) if len (
129
155
paragraph .text ) > 0 else images_to_string (images , doc , images_list , get_image_id )
130
156
return title
157
+
131
158
except Exception as e :
132
159
traceback .print_exc ()
133
160
return paragraph .text
0 commit comments