@@ -44,6 +44,19 @@ function DictDB(x::CharacterNGrams)
4444end
4545
4646
47+ """
48+ Internal function for generating a base DictDB object for WordNGrams and MecabNGrams
49+ """
50+ function generate_base_dict_db (x)
51+ DictDB (
52+ x,
53+ String[],
54+ DefaultDict {Int, Set{String}} ( () -> Set {String} () ),
55+ DefaultDict{ Int, DefaultOrderedDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}} }( () -> DefaultOrderedDict{Tuple{SubArray{SubString{String}}, Int}, Set{String} }(Set{String})),
56+ DefaultDict{ Int, DefaultDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}} }( () -> DefaultDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}}(Set{String}))
57+ )
58+ end
59+
4760"""
4861 DictDB(x::WordNGrams)
4962
@@ -60,15 +73,28 @@ db = DictDB(WordNGrams(2, " ", " "))
6073# Returns
6174* `DictDB`: A DictDB object with additional containers and Metadata for WordNGrams
6275"""
63- function DictDB (x:: WordNGrams )
64- DictDB (
65- x,
66- String[],
67- DefaultDict {Int, Set{String}} ( () -> Set {String} () ),
68- DefaultDict{ Int, DefaultOrderedDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}} }( () -> DefaultOrderedDict{Tuple{SubArray{SubString{String}}, Int}, Set{String} }(Set{String})),
69- DefaultDict{ Int, DefaultDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}} }( () -> DefaultDict{Tuple{SubArray{SubString{String}}, Int}, Set{String}}(Set{String}))
70- )
71- end
76+ DictDB (x:: WordNGrams ) = generate_base_dict_db (x)
77+
78+
79+
80+ """
81+ DictDB(x::MecabNGrams)
82+
83+ Initialize a dict DB with additional containers and Metadata for MecabNGrams
84+
85+ # Arguments
86+ * `x`: MecabNGrams object
87+
88+ # Example
89+ ```julia
90+ db = DictDB(MecabNGrams(2, " ", Mecab()))
91+ ```
92+
93+ # Returns
94+ * `DictDB`: A DictDB object with additional containers and Metadata for MecabNGrams
95+ """
96+ DictDB (x:: MecabNGrams ) = generate_base_dict_db (x)
97+
7298
7399
74100
@@ -96,20 +122,20 @@ describe_collection(db)
96122"""
97123function describe_collection (db:: DictDB )
98124
99- # Total number of strings in collection
100- ∑ = length (db. string_collection)
125+ # Total number of strings in collection
126+ ∑ = length (db. string_collection)
101127
102- # Average size of ngram features
103- n = [x for x in keys (db. string_size_map)]
104- μ = sum (n) / length (n)
128+ # Average size of ngram features
129+ n = [x for x in keys (db. string_size_map)]
130+ μ = sum (n) / length (n)
105131
106- # Total number of ngram features
107- total_ngrams = 0
108- for i in values (db. string_feature_map)
109- total_ngrams += length (i)
110- end
132+ # Total number of ngram features
133+ total_ngrams = 0
134+ for i in values (db. string_feature_map)
135+ total_ngrams += length (i)
136+ end
111137
112- return (total_collection = ∑, avg_size_ngrams = μ, total_ngrams = total_ngrams)
138+ return (total_collection = ∑, avg_size_ngrams = μ, total_ngrams = total_ngrams)
113139end
114140
115141
0 commit comments