3030from pytorch_lightning .loggers import TensorBoardLogger , WandbLogger
3131
3232from project .utils .deepinteract_constants import FEAT_COLS , ALLOWABLE_FEATS , D3TO1
33- from project .utils .dips_plus_utils import postprocess_pruned_pairs
33+ from project .utils .dips_plus_utils import postprocess_pruned_pairs , impute_postprocessed_missing_feature_values
3434from project .utils .graph_utils import prot_df_to_dgl_graph_feats
3535from project .utils .protein_feature_utils import GeometricProteinFeatures
3636
@@ -573,9 +573,11 @@ def create_input_dir_struct(input_dataset_dir: str, pdb_code: str):
573573 _ , _ = dir_struct_create_proc .communicate () # Wait until the directory structure creation cmd is finished
574574
575575
576- def copy_input_to_raw_dir (input_dataset_dir : str , pdb_filepath : str , pdb_code : str ):
576+ def copy_input_to_raw_dir (input_dataset_dir : str , pdb_filepath : str , pdb_code : str , chain_indic : str ):
577577 """Make a copy of the input PDB file in the newly-created raw directory."""
578- input_copy_cmd = f'cp { pdb_filepath } { os .path .join (input_dataset_dir , "raw" , pdb_code )} '
578+ filename = db .get_pdb_code (pdb_filepath ) + f'_{ chain_indic } .pdb' \
579+ if chain_indic not in pdb_filepath else db .get_pdb_name (pdb_filepath )
580+ input_copy_cmd = f'cp { pdb_filepath } { os .path .join (input_dataset_dir , "raw" , pdb_code , filename )} '
579581 input_copy_proc = subprocess .Popen (input_copy_cmd .split (), stdout = subprocess .PIPE , cwd = os .getcwd ())
580582 _ , _ = input_copy_proc .communicate () # Wait until the input copy cmd is finished
581583
@@ -590,6 +592,7 @@ def make_dataset(input_dataset_dir='datasets/Input/raw', output_dir='datasets/In
590592 pa .parse_all (input_dataset_dir , parsed_dir , num_cpus )
591593
592594 complexes_dill = os .path .join (output_dir , 'complexes/complexes.dill' )
595+ os .remove (complexes_dill ) # Ensure that pairs are made everytime this function is called
593596 comp .complexes (parsed_dir , complexes_dill , source_type )
594597 complexes = comp .read_complexes (complexes_dill )
595598 pairs_dir = os .path .join (output_dir , 'pairs' )
@@ -697,7 +700,7 @@ def impute_missing_feature_values(output_dir='datasets/Input/final/raw',
697700 inputs = [(pair_filename .as_posix (), pair_filename .as_posix (), impute_atom_features , advanced_logging )
698701 for pair_filename in Path (output_dir ).rglob ('*.dill' )]
699702 # Without impute_atom_features set to True, non-CA atoms will be filtered out after writing updated pairs
700- par .submit_jobs (impute_missing_feature_values , inputs , num_cpus )
703+ par .submit_jobs (impute_postprocessed_missing_feature_values , inputs , num_cpus )
701704
702705
703706def convert_input_pdb_files_to_pair (left_pdb_filepath : str , right_pdb_filepath : str , input_dataset_dir : str ,
@@ -707,8 +710,8 @@ def convert_input_pdb_files_to_pair(left_pdb_filepath: str, right_pdb_filepath:
707710 pdb_code = db .get_pdb_group (list (ca .get_complex_pdb_codes ([left_pdb_filepath , right_pdb_filepath ]))[0 ])
708711 # Iteratively execute the PDB file feature generation process
709712 create_input_dir_struct (input_dataset_dir , pdb_code )
710- copy_input_to_raw_dir (input_dataset_dir , left_pdb_filepath , pdb_code )
711- copy_input_to_raw_dir (input_dataset_dir , right_pdb_filepath , pdb_code )
713+ copy_input_to_raw_dir (input_dataset_dir , left_pdb_filepath , pdb_code , 'l_u' )
714+ copy_input_to_raw_dir (input_dataset_dir , right_pdb_filepath , pdb_code , 'r_u' )
712715 make_dataset (os .path .join (input_dataset_dir , 'raw' ), os .path .join (input_dataset_dir , 'interim' ))
713716 generate_psaia_features (psaia_dir = psaia_dir ,
714717 psaia_config = psaia_config ,
0 commit comments