Skip to content

Commit 8d0c377

Browse files
committed
update tutorial and api references
1 parent be1aef1 commit 8d0c377

File tree

11 files changed

+355
-78
lines changed

11 files changed

+355
-78
lines changed

EduNLP/I2V/i2v.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,43 @@ def __call__(self, items, *args, **kwargs):
9797
return self.infer_vector(items, *args, **kwargs)
9898

9999
def tokenize(self, items, *args, key=lambda x: x, **kwargs) -> list:
100-
# """tokenize item"""
100+
"""
101+
tokenize item
102+
103+
Parameter
104+
----------
105+
items: a list of questions
106+
107+
Return
108+
----------
109+
tokens: list
110+
"""
101111
return self.tokenizer(items, *args, key=key, **kwargs)
102112

103113
def infer_vector(self, items, key=lambda x: x, **kwargs) -> tuple:
114+
"""
115+
get question embedding
116+
117+
NotImplemented
118+
"""
104119
raise NotImplementedError
105120

106121
def infer_item_vector(self, tokens, *args, **kwargs) -> ...:
122+
"""NotImplemented"""
107123
return self.infer_vector(tokens, *args, **kwargs)[0]
108124

109125
def infer_token_vector(self, tokens, *args, **kwargs) -> ...:
126+
"""NotImplemented"""
110127
return self.infer_vector(tokens, *args, **kwargs)[1]
111128

112129
def save(self, config_path):
130+
"""
131+
save model weights in config_path
132+
133+
Parameter:
134+
----------
135+
config_path: str
136+
"""
113137
with open(config_path, "w", encoding="utf-8") as wf:
114138
json.dump(self.params, wf, ensure_ascii=False, indent=2)
115139

@@ -126,6 +150,7 @@ def load(cls, config_path, *args, **kwargs):
126150

127151
@classmethod
128152
def from_pretrained(cls, name, model_dir=MODEL_DIR, *args, **kwargs):
153+
"""NotImplemented"""
129154
raise NotImplementedError
130155

131156
@property

EduNLP/Vector/disenqnet/disenqnet.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ def infer_vector(self, items: dict, vector_type=None, **kwargs) -> torch.Tensor:
4444

4545
def infer_tokens(self, items: dict, **kwargs) -> torch.Tensor:
4646
embeded, _, _ = self(items)
47+
"""
48+
get tokens embedding with DisenQModel
49+
50+
Parameters
51+
----------
52+
items: dict, {'content_idx': tensor(),'content_len': tensor()}, the tokens about question items after tokenizer processing
53+
54+
Returns:
55+
torch.Tensor: token embedding
56+
"""
4757
return embeded
4858

4959
@property

EduNLP/Vector/elmo_vec.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ def __call__(self, items: dict):
2121
return outputs
2222

2323
def infer_vector(self, items: dict, **kwargs) -> torch.Tensor:
24+
"""
25+
get sentence vector embedding with ElmoModel
26+
27+
Parameters
28+
----------
29+
items: dict, {'seq_idx': tensor(),'seq_len':tensor()}, the tokens about question after tokenizer processing
30+
31+
Returns:
32+
torch.Tensor: sentence embedding
33+
"""
2434
outputs = self(items)
2535
item_embeds = torch.cat(
2636
(outputs.forward_output[torch.arange(len(items["seq_len"])), torch.tensor(items["seq_len"]) - 1],
@@ -29,6 +39,16 @@ def infer_vector(self, items: dict, **kwargs) -> torch.Tensor:
2939
return item_embeds
3040

3141
def infer_tokens(self, items, **kwargs) -> torch.Tensor:
42+
"""
43+
get tokens embedding with ElmoModel
44+
45+
Parameters
46+
----------
47+
items: dict, {'seq_idx': tensor()}, the tokens about question after tokenizer processing
48+
49+
Returns:
50+
torch.Tensor: token embedding
51+
"""
3252
outputs = self(items)
3353
forward_hiddens = outputs.forward_output
3454
backward_hiddens = outputs.backward_output

EduNLP/Vector/gensim_vec.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,36 @@ def __getitem__(self, item):
6666
return self.wv[item] if index not in self.constants.values() else np.zeros((self.vector_size,))
6767

6868
def infer_vector(self, items, agg="mean", **kwargs) -> list:
69+
"""
70+
get sentence embedding with word2vec model
71+
72+
Parameters
73+
----------
74+
item: list, the tokens after tokenizer processing
75+
76+
Return
77+
------
78+
vector: list
79+
[array(), ..., array()]
80+
"""
6981
token_vectors = self.infer_tokens(items, **kwargs)
7082
# return [eval("np.%s" % agg)(item, axis=0) if item else np.array([]) for item in token_vectors]
7183
return [eval("np.%s" % agg)(item, axis=0) if item else np.zeros(self.vector_size,) for item in token_vectors]
7284

7385
def infer_tokens(self, items, **kwargs) -> list:
86+
"""
87+
get token embedding with word2vec model
88+
89+
Parameters
90+
----------
91+
item: list
92+
the tokens after tokenizer processing
93+
94+
Return
95+
------
96+
vector: list
97+
[[array(), ..., array()], [...], [...]]
98+
"""
7499
return [list(self(*item)) for item in items]
75100

76101

@@ -95,6 +120,19 @@ def __init__(self, filepath):
95120
self.dictionary = corpora.Dictionary.load(filepath)
96121

97122
def infer_vector(self, item, return_vec=False):
123+
"""
124+
get Bow vector
125+
126+
Parameters
127+
----------
128+
item: list
129+
the tokens after tokenizer processing
130+
131+
Return
132+
------
133+
vector: list
134+
[array(), ..., array()]
135+
"""
98136
item = self.dictionary.doc2bow(item)
99137
if not return_vec:
100138
return item # return dic as default
@@ -121,6 +159,19 @@ def __init__(self, filepath):
121159
self.dictionary = corpora.Dictionary.load(dictionary_path)
122160

123161
def infer_vector(self, item, return_vec=False):
162+
"""
163+
get Tf-idf vector
164+
165+
Parameters
166+
----------
167+
item: list
168+
the tokens after tokenizer processing
169+
170+
Return
171+
------
172+
vector: list
173+
[array(), ..., array()]
174+
"""
124175
dic_item = self.dictionary.doc2bow(item)
125176
tfidf_item = self.tfidf_model[dic_item]
126177
# return dic as default
@@ -181,7 +232,25 @@ def vector_size(self):
181232
return self.d2v.vector_size
182233

183234
def infer_vector(self, items, *args, **kwargs) -> list:
235+
"""
236+
get vector with D2V model
237+
238+
Parameters
239+
----------
240+
item: list
241+
the tokens after tokenizer processing
242+
243+
Return
244+
------
245+
vector: list
246+
[array(), ..., array()]
247+
"""
184248
return [self(item) for item in items]
185249

186250
def infer_tokens(self, item, *args, **kwargs) -> ...:
251+
"""
252+
get token embeddings with D2V
253+
254+
NotImplemented
255+
"""
187256
raise NotImplementedError

EduNLP/Vector/t2v.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ class T2V(object):
4949
... 如图$\\FigureID{088f15ea-8b7c-11eb-897e-b46bfc50aa29}$,若$x,y$满足约束条件$\\SIFSep$,则$z=x+7 y$的最大值为$\\SIFBlank$'}]
5050
>>> model_dir = "examples/test_model/d2v"
5151
>>> url, model_name, *args = get_pretrained_model_info('d2v_test_256')
52-
>>> (); path = get_data(url, model_dir); () # doctest: +ELLIPSIS
53-
(...)
52+
>>> path = get_data(url, model_dir); # doctest: +ELLIPSIS
53+
downloader, INFO http://base.ustc.edu.cn/data/model_zoo/modelhub/doc2vec_pub/1/d2v_test_256.zip is saved as examples\test_model\d2v\d2v_test_256.zip
54+
Downloading examples\test_model\d2v\d2v_test_256.zip 100.00%: 4.73MB | 4.73MB
55+
downloader, INFO examples\test_model\d2v\d2v_test_256.zip is unzip to examples\test_model\d2v\d2v_test_256
5456
>>> path = path_append(path, os.path.basename(path) + '.bin', to_str=True)
5557
>>> t2v = T2V('d2v',filepath=path)
5658
>>> print(t2v(item))
@@ -69,9 +71,28 @@ def __call__(self, items, *args, **kwargs):
6971
return self.i2v.infer_vector(items, *args, **kwargs)
7072

7173
def infer_vector(self, items, *args, **kwargs):
74+
"""
75+
get question embedding with T2V
76+
77+
Parameters
78+
----------
79+
items:list
80+
a list of question
81+
82+
Returns
83+
-------
84+
vector:list
85+
numpy.ndarray([dtype=float32)]
86+
87+
"""
7288
return self.i2v.infer_vector(items, *args, **kwargs)
7389

7490
def infer_tokens(self, items, *args, **kwargs):
91+
"""
92+
get token embeddings with T2V
93+
94+
NotImplemented
95+
"""
7596
return self.i2v.infer_tokens(items, *args, **kwargs)
7697

7798
@property
@@ -80,6 +101,27 @@ def vector_size(self) -> int:
80101

81102

82103
def get_pretrained_model_info(name):
104+
"""
105+
get the pretrained model information with the given name
106+
107+
Parameters
108+
----------
109+
name:str
110+
select the pretrained model
111+
e.g.:
112+
d2v_math_300
113+
w2v_math_300
114+
elmo_math_2048
115+
bert_math_768
116+
bert_taledu_768
117+
disenq_math_256
118+
quesnet_math_512
119+
120+
Returns
121+
--------
122+
list: [model url (where to download), model name]
123+
124+
"""
83125
url = MODELHUB_URL + 'getPretrainedModel'
84126
param = {'name': name}
85127
r = requests.get(url, params=param)
@@ -89,6 +131,14 @@ def get_pretrained_model_info(name):
89131

90132

91133
def get_all_pretrained_models():
134+
"""
135+
get all pretrained models' name
136+
137+
Returns
138+
-------
139+
the pretrained models' name:list
140+
e.g.['bert_bio_ptc', 'bert_geo_ptc', 'bert_math_768', ... ]
141+
"""
92142
url = MODELHUB_URL + 'getPretrainedModelList'
93143
r = requests.get(url)
94144
assert r.status_code == 200, r.status_code

docs/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,31 @@ EduNLP document and tutorial folder
33

44
Requirements
55
------------
6+
67
See the requirements `docs_deps` in `setup.py`:
8+
79
```sh
810
pip install -e .[doc]
911
```
1012

11-
1213
Build documents
1314
---------------
15+
1416
First, clean up existing files:
17+
1518
```
1619
make clean
1720
```
1821

1922
Then build:
23+
2024
```
2125
make html
2226
```
2327

2428
Render locally
2529
--------------
30+
2631
```
2732
cd build/html
2833
python3 -m http.server 8000

docs/source/api/vector.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ EduNLP.Vector.t2v
1010

1111

1212
EduNLP.Vector.disenqnet
13-
--------------------
13+
-------------------------
1414

1515
.. automodule:: EduNLP.Vector.disenqnet.disenqnet
1616
:members:
1717

1818
EduNLP.Vector.quesnet
19-
--------------------
19+
-------------------------
2020

2121
.. automodule:: EduNLP.Vector.quesnet.quesnet
2222
:members:

docs/source/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,5 @@ def copy_tree(src, tar):
114114
'undoc-members': True,
115115
}
116116
autodoc_member_order = 'bysource'
117+
118+
nbsphinx_allow_errors = True

docs/source/tutorial/zh/pipeline.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
流水线
33
=======
44

5-
.. nbgallery::
6-
:caption: This is a thumbnail gallery:
7-
:name: pipleine_gallery
8-
:glob:
5+
.. nbinfo::
6+
notebook:
97

10-
流水线 <../../build/blitz/pipeline/pipeline.ipynb>
8+
`流水线 <../../build/blitz/pipeline/pipeline.ipynb>`_

0 commit comments

Comments
 (0)