File tree Expand file tree Collapse file tree 1 file changed +2
-0
lines changed
unstructured/partition/pdf_image Expand file tree Collapse file tree 1 file changed +2
-0
lines changed Original file line number Diff line number Diff line change @@ -128,6 +128,7 @@ def _merge_extracted_into_inferred_when_almost_the_same(
128128 extracted_to_remove = extracted_layout .slice (extracted_almost_the_same_as_inferred )
129129 # copy here in case we change the extracted layout later
130130 inferred_layout .texts [inferred_indices_to_update ] = extracted_to_remove .texts .copy ()
131+ inferred_layout .is_extracted_array [inferred_indices_to_update ] = extracted_to_remove .is_extracted_array .copy ()
131132 # use coords that can bound BOTH the inferred and extracted region as final bounding box coords
132133 inferred_layout .element_coords [inferred_indices_to_update ] = _minimum_containing_coords (
133134 inferred_layout .slice (inferred_indices_to_update ),
@@ -426,6 +427,7 @@ def process_page_layout_from_pdfminer(
426427 element_class_ids = np .array (element_class ),
427428 element_class_id_map = {0 : ElementType .UNCATEGORIZED_TEXT , 1 : ElementType .IMAGE },
428429 sources = np .array ([Source .PDFMINER ] * len (element_class )),
430+ is_extracted_array = np .array ([IsExtracted .TRUE ] * len (element_class )),
429431 ),
430432 urls_metadata ,
431433 )
You can’t perform that action at this time.
0 commit comments