model_utils.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the GNU General Public License version 3.
  3. from warnings import warn
  4. from llama_recipes.configs import quantization_config as QUANT_CONFIG
  5. from llama_recipes.utils.config_utils import update_config
  6. from peft import PeftModel
  7. from transformers import (
  8. AutoConfig,
  9. AutoModelForCausalLM,
  10. LlamaConfig,
  11. LlamaForCausalLM,
  12. MllamaConfig,
  13. MllamaForConditionalGeneration,
  14. )
  15. # Function to load the main model for text generation
  16. def load_model(model_name, quantization, use_fast_kernels, **kwargs):
  17. if type(quantization) == type(True):
  18. warn(
  19. "Quantization (--quantization) is a boolean, please specify quantization as '4bit' or '8bit'. Defaulting to '8bit' but this might change in the future.",
  20. FutureWarning,
  21. )
  22. quantization = "8bit"
  23. bnb_config = None
  24. if quantization:
  25. quant_config = QUANT_CONFIG()
  26. update_config(quant_config, **kwargs)
  27. bnb_config = quant_config.create_bnb_config(quantization)
  28. print(f"use_fast_kernels{use_fast_kernels}")
  29. kwargs = {}
  30. if bnb_config:
  31. kwargs["quantization_config"] = bnb_config
  32. kwargs["device_map"] = "auto"
  33. kwargs["low_cpu_mem_usage"] = True
  34. kwargs["attn_implementation"] = "sdpa" if use_fast_kernels else None
  35. model = AutoModelForCausalLM.from_pretrained(
  36. model_name,
  37. return_dict=True,
  38. **kwargs,
  39. )
  40. return model
  41. # Function to load the PeftModel for performance optimization
  42. def load_peft_model(model, peft_model):
  43. peft_model = PeftModel.from_pretrained(model, peft_model)
  44. return peft_model
  45. # Loading the model from config to load FSDP checkpoints into that
  46. def load_llama_from_config(config_path):
  47. config = AutoConfig.from_pretrained(config_path)
  48. if config.model_type == "mllama":
  49. model = MllamaForConditionalGeneration(config=config)
  50. elif config.model_type == "llama":
  51. model = LlamaForCausalLM(config=config)
  52. else:
  53. raise ValueError(
  54. f"Unsupported model type: {config.model_type}, Please use llama or mllama model."
  55. )
  56. return model