1+ from operator import itemgetter
2+ # import numpy as np
3+
4+ def sort_by_fuzzy (query , choices ):
5+ if not query or not choices :
6+ return choices
7+ choices_ratio = {}
8+ for choice in choices :
9+ choices_ratio [choice ] = levenshtein_ratio (query , choice )
10+
11+ # print(choices_ratio)
12+ return [key [0 ] for key in sorted (choices_ratio .items (), key = itemgetter (1 ), reverse = True )]
13+
14+ def levenshtein_ratio (s , t ):
15+ """ levenshtein_ratio_and_distance:
16+ Calculates levenshtein distance between two strings.
17+ If ratio_calc = True, the function computes the
18+ levenshtein distance ratio of similarity between two strings
19+ For all i and j, distance[i,j] will contain the Levenshtein
20+ distance between the first i characters of s and the
21+ first j characters of t
22+ """
23+ # Initialize matrix of zeros
24+ rows = len (s )+ 1
25+ cols = len (t )+ 1
26+ distance = [[0 for i in range (cols )] for j in range (rows )]
27+ # distance = np.zeros((rows, cols),dtype=int)
28+
29+ # Populate matrix of zeros with the indeces of each character of both strings
30+ for i in range (1 , rows ):
31+ for k in range (1 ,cols ):
32+ distance [i ][0 ] = i
33+ distance [0 ][k ] = k
34+
35+ # Iterate over the matrix to compute the cost of deletions,insertions and/or substitutions
36+ for col in range (1 , cols ):
37+ for row in range (1 , rows ):
38+ if s [row - 1 ].lower () == t [col - 1 ].lower ():
39+ cost = 0 # If the characters are the same in the two strings in a given position [i,j] then the cost is 0
40+ else :
41+ # In order to align the results with those of the Python Levenshtein package, if we choose to calculate the ratio
42+ # the cost of a substitution is 2.
43+ cost = 1
44+ distance [row ][col ] = min (distance [row - 1 ][col ] + 1 , # Cost of deletions
45+ distance [row ][col - 1 ] + 1 , # Cost of insertions
46+ distance [row - 1 ][col - 1 ] + cost ) # Cost of substitutions
47+
48+ # Computation of the Levenshtein Distance Ratio
49+ ratio = ((len (s )+ len (t )) - distance [row ][col ]) / (len (s )+ len (t ))
50+ if rows < cols :
51+ ratio = max (ratio , ((len (s )+ len (t )) - distance [row ][row ]) / (len (s )+ len (t )))
52+ return ratio
53+
54+ # if __name__ == '__main__':
55+ # print(levenshtein_ratio('test', 'test'))
56+ # print(levenshtein_ratio('test', 'test tt'))
57+ # print(levenshtein_ratio('test', 'taebsct'))
58+ # print(levenshtein_ratio('test', 'tabtest'))
59+ # print(levenshtein_ratio('test', 'tst tt'))
60+ # print(sort_by_fuzzy('def', ['advanced_new_file', 'AdvancedNewFile.py', 'AdvancedNewFile.sublime-settings', 'Default (Linux).sublime-keymap', 'Default (OSX).sublime-keymap', 'Default (Windows).sublime-keymap', 'Default.sublime-commands']))
0 commit comments