Skip to content

Commit d52304b

Browse files
authored
Merge pull request #3 from BioinfoMachineLearning/develop
[Bugfix] Fix issue where re-running inference pipeline could result in a file-not-found error
2 parents 20ea17c + edd23fd commit d52304b

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

project/utils/deepinteract_utils.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -721,13 +721,27 @@ def convert_input_pdb_files_to_pair(left_pdb_filepath: str, right_pdb_filepath:
721721
pruned_dataset=os.path.join(input_dataset_dir, 'interim', 'parsed'),
722722
hhsuite_db=hhsuite_db,
723723
output_dir=os.path.join(input_dataset_dir, 'interim', 'external_feats'))
724-
# Only a single pair file is produced in this case
725-
pair_filepath = launch_postprocessing_of_pruned_pairs(
724+
# Postprocess any pruned pairs that have not already been postprocessed
725+
pair_filepaths = launch_postprocessing_of_pruned_pairs(
726726
raw_pdb_dir=os.path.join(input_dataset_dir, 'raw'),
727727
pruned_pairs_dir=os.path.join(input_dataset_dir, 'interim', 'pairs'),
728728
external_feats_dir=os.path.join(input_dataset_dir, 'interim', 'external_feats'),
729729
output_dir=os.path.join(input_dataset_dir, 'final', 'raw')
730-
)[0]
730+
)
731+
if len(pair_filepaths) > 0:
732+
# Retrieve the filepath of the single input pair produced in this case
733+
pair_filepath = pair_filepaths[0]
734+
else:
735+
# Manually construct the already-postprocessed input pair's filepath since no pairs needed postprocessing
736+
pruned_pairs_dir = os.path.join(input_dataset_dir, 'interim', 'pairs')
737+
output_dir = os.path.join(input_dataset_dir, 'final', 'raw')
738+
produced_filenames = db.get_structures_filenames(output_dir, extension='.dill')
739+
produced_keys = [db.get_pdb_name(x) for x in produced_filenames if db.get_pdb_name(x) in left_pdb_filepath]
740+
pdb_filename = [os.path.join(pruned_pairs_dir, db.get_pdb_code(key)[1:3], key)
741+
for key in produced_keys][0]
742+
sub_dir = output_dir + '/' + db.get_pdb_code(pdb_filename)[1:3]
743+
pair_filepath = sub_dir + '/' + db.get_pdb_name(pdb_filename)
744+
# Impute any missing feature values in the postprocessed input pairs
731745
impute_missing_feature_values(output_dir=os.path.join(input_dataset_dir, 'final', 'raw'))
732746
# Load preprocessed pair
733747
with open(pair_filepath, 'rb') as f:

0 commit comments

Comments
 (0)