@@ -490,7 +490,7 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
490
490
return [[x0 , y0 , x1 , y1 ]]
491
491
492
492
493
- def sort_lines_by_model (fix_blocks , page_w , page_h , line_height ):
493
+ def sort_lines_by_model (fix_blocks , page_w , page_h , line_height , footnote_blocks ):
494
494
page_line_list = []
495
495
496
496
def add_lines_to_block (b ):
@@ -519,6 +519,10 @@ def add_lines_to_block(b):
519
519
block ['real_lines' ] = copy .deepcopy (block ['lines' ])
520
520
add_lines_to_block (block )
521
521
522
+ for block in footnote_blocks :
523
+ footnote_block = {'bbox' : block [:4 ]}
524
+ add_lines_to_block (footnote_block )
525
+
522
526
if len (page_line_list ) > 200 : # layoutreader最高支持512line
523
527
return None
524
528
@@ -779,7 +783,7 @@ def merge_two_blocks(b1, b2):
779
783
# interline_equation_blocks参数不够准,后面切换到interline_equations上
780
784
interline_equation_blocks = []
781
785
if len (interline_equation_blocks ) > 0 :
782
- all_bboxes , all_discarded_blocks = ocr_prepare_bboxes_for_layout_split_v2 (
786
+ all_bboxes , all_discarded_blocks , footnote_blocks = ocr_prepare_bboxes_for_layout_split_v2 (
783
787
img_body_blocks , img_caption_blocks , img_footnote_blocks ,
784
788
table_body_blocks , table_caption_blocks , table_footnote_blocks ,
785
789
discarded_blocks ,
@@ -790,7 +794,7 @@ def merge_two_blocks(b1, b2):
790
794
page_h ,
791
795
)
792
796
else :
793
- all_bboxes , all_discarded_blocks = ocr_prepare_bboxes_for_layout_split_v2 (
797
+ all_bboxes , all_discarded_blocks , footnote_blocks = ocr_prepare_bboxes_for_layout_split_v2 (
794
798
img_body_blocks , img_caption_blocks , img_footnote_blocks ,
795
799
table_body_blocks , table_caption_blocks , table_footnote_blocks ,
796
800
discarded_blocks ,
@@ -866,7 +870,7 @@ def merge_two_blocks(b1, b2):
866
870
line_height = get_line_height (fix_blocks )
867
871
868
872
"""获取所有line并对line排序"""
869
- sorted_bboxes = sort_lines_by_model (fix_blocks , page_w , page_h , line_height )
873
+ sorted_bboxes = sort_lines_by_model (fix_blocks , page_w , page_h , line_height , footnote_blocks )
870
874
871
875
"""根据line的中位数算block的序列关系"""
872
876
fix_blocks = cal_block_index (fix_blocks , sorted_bboxes )
0 commit comments