@@ -56,7 +56,7 @@ def __init__(self, name):
5656 self ._id = int (self )
5757
5858 def __repr__ (self ):
59- return "Task('{}')" .format (self ._name )
59+ return "Task('{}')" .format (self .name )
6060
6161
6262class BaseModel (object ):
@@ -157,31 +157,32 @@ def _validate(self, input):
157157 name , var_type = feature ['name' ], feature ['type' ]
158158 default = feature .get ('default' , None )
159159 categories = feature .get ('categories' , None )
160+ accepts_missing = feature .get ('accepts_missing' , True )
160161 if name not in df .columns :
161162 df [name ] = default or np .nan
162163 else :
164+ has_missing = df [name ].isnull ().any ()
165+ if has_missing and not accepts_missing :
166+ raise ValueError (f'Feature { name } has unexpected missing values' )
163167 if var_type == 'numeric' :
164168 var_type = float
165169 elif var_type == 'string' :
166170 var_type = str
167171 elif var_type == 'category' :
168172 if categories is not None :
169173 var_type = CategoricalDtype (categories = categories , ordered = True )
170- new_cat = set (df [name ].unique ()).difference (categories )
174+ new_cat = set (df [name ].dropna (). unique ()).difference (categories )
171175 if len (new_cat ):
172- msg = 'Unexpected categorical value for {}: {}' .format (name , new_cat )
173- raise ValueError (msg )
176+ raise ValueError (f'Unexpected categorical value for { name } : { new_cat } ' )
174177 else :
175- msg = 'Missing "categories" for "{}" in metadata' .format (name )
176- raise ValueError (msg )
178+ raise ValueError (f'Missing "categories" for "{ name } " in metadata' )
177179 else :
178- msg = 'Unknown variable type: {}' .format (var_type )
179- raise ValueError (msg )
180+ raise ValueError (f'Unknown variable type: { var_type } ' )
180181
181182 if default is None :
182- df [name ] = df [name ].astype (var_type )
183+ df [name ] = df [name ].astype (var_type )
183184 else :
184- df [name ] = df [name ].fillna (default ).astype (var_type )
185+ df [name ] = df [name ].fillna (default ).astype (var_type )
185186 # TO DO: add more validation logic
186187 return df
187188
0 commit comments