2323from foldingdiff import tmalign
2424
2525# :)
26- SEED = int (float .fromhex ("2254616977616e2069732061206672656520636f756e74727922" ) % 10000 )
26+ SEED = int (
27+ float .fromhex ("2254616977616e2069732061206672656520636f756e74727922" ) % 10000
28+ )
29+
2730
2831def int_getter (x : str ) -> int :
2932 """Fetches integer value out of a string"""
@@ -35,7 +38,10 @@ def int_getter(x: str) -> int:
3538def get_pairwise_tmscores (
3639 fnames : Collection [str ], sctm_scores_json : Optional [str ] = None
3740) -> pd .DataFrame :
38- """Get the pairwise TM scores across all fnames"""
41+ """
42+ Get the pairwise TM scores across all fnames. If sctm_scores_json is given
43+ then we filter the fnames by passing scTM scores.
44+ """
3945 logging .info (f"Computing pairwise distances between { len (fnames )} pdb files" )
4046
4147 bname_getter = lambda x : os .path .splitext (os .path .basename (x ))[0 ]
@@ -74,7 +80,13 @@ def build_parser():
7480 parser .add_argument (
7581 "--sctm" , type = str , required = False , default = "" , help = "scTM scores to filter by"
7682 )
77- parser .add_argument ("-o" , "--output" , type = str , default = "tmscore_hclust.pdf" , help = "PDF file to write output clustering plot" )
83+ parser .add_argument (
84+ "-o" ,
85+ "--output" ,
86+ type = str ,
87+ default = "tmscore_hclust.pdf" ,
88+ help = "PDF file to write output clustering plot" ,
89+ )
7890 return parser
7991
8092
@@ -98,7 +110,9 @@ def main():
98110 seq_trim_strategy = "discard" ,
99111 )
100112 rng = np .random .default_rng (SEED )
101- idx = rng .choice (len (test_subset .filenames ), size = args .testsubset , replace = False )
113+ idx = rng .choice (
114+ len (test_subset .filenames ), size = args .testsubset , replace = False
115+ )
102116 fnames = [test_subset .filenames [i ] for i in idx ]
103117 else :
104118 raise NotImplementedError
0 commit comments