|
@@ -23,7 +23,7 @@ def load_model(model_name, quantization, use_fast_kernels, **kwargs):
|
|
|
|
|
|
kwargs = {}
|
|
kwargs = {}
|
|
if bnb_config:
|
|
if bnb_config:
|
|
- kwargs["quantization_config"]=bnb_config,
|
|
|
|
|
|
+ kwargs["quantization_config"]=bnb_config
|
|
kwargs["device_map"]="auto"
|
|
kwargs["device_map"]="auto"
|
|
kwargs["low_cpu_mem_usage"]=True
|
|
kwargs["low_cpu_mem_usage"]=True
|
|
kwargs["attn_implementation"]="sdpa" if use_fast_kernels else None
|
|
kwargs["attn_implementation"]="sdpa" if use_fast_kernels else None
|