File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -3429,6 +3429,7 @@ def load_model(self) -> None:
34293429 with HabanaMemoryProfiler () as m_inc :
34303430 from neural_compressor .torch .quantization import (FP8Config , convert , prepare )
34313431 config = FP8Config .from_json_file (os .getenv ("QUANT_CONFIG" , "" ))
3432+ disable_mark_scales_as_const = os .getenv ("VLLM_DISABLE_MARK_SCALES_AS_CONST" , "false" ) in ("1" , "true" )
34323433 self ._inc_preprocess ()
34333434 if config .measure :
34343435 self .model = prepare (self .model , config )
@@ -3437,7 +3438,8 @@ def load_model(self) -> None:
34373438 else :
34383439 raise ValueError ("Unknown quantization config mode,"
34393440 "please validate quantization config file" )
3440- htcore .hpu_initialize (self .model , mark_only_scales_as_const = True )
3441+ if not disable_mark_scales_as_const :
3442+ htcore .hpu_initialize (self .model , mark_only_scales_as_const = True )
34413443 self .inc_initialized_successfully = True
34423444 self .model_memory_usage = m_inc .consumed_device_memory
34433445 logger .info ("Preparing model with INC took %.4f GB" , self .model_memory_usage / float (2 ** 30 ))
You can’t perform that action at this time.
0 commit comments