File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
unstructured/partition/pdf_image Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change 88from pdfminer .pdftypes import PDFObjRef
99from pdfminer .utils import open_filename
1010from unstructured_inference .config import inference_config
11- from unstructured_inference .constants import FULL_PAGE_REGION_THRESHOLD
11+ from unstructured_inference .constants import FULL_PAGE_REGION_THRESHOLD , IsExtracted
1212from unstructured_inference .inference .elements import Rectangle
1313
1414from unstructured .documents .coordinates import PixelSpace , PointSpace
@@ -647,13 +647,14 @@ def merge_inferred_with_extracted_layout(
647647 merged_layout = sort_text_regions (merged_layout , SORT_MODE_BASIC )
648648 # so that we can modify the text without worrying about hitting length limit
649649 merged_layout .texts = merged_layout .texts .astype (object )
650-
650+ merged_layout . is_extracted_array = merged_layout . is_extracted_array . astype ( object )
651651 for i , text in enumerate (merged_layout .texts ):
652652 if text is None :
653653 text = aggregate_embedded_text_by_block (
654654 target_region = merged_layout .slice ([i ]),
655655 source_regions = extracted_page_layout ,
656656 )
657+ merged_layout .is_extracted_array [i ] = IsExtracted .TRUE
657658 merged_layout .texts [i ] = remove_control_characters (text )
658659
659660 inferred_page .elements_array = merged_layout
You can’t perform that action at this time.
0 commit comments