123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import fire
- from lm_eval.base import LM
- from lm_eval import tasks, evaluator
- from transformers import AutoModelForCausalLM, AutoTokenizer
- import torch
- class HuggingFaceModel(LM):
- def __init__(self, model_name, tokenizer_name):
- self.model = AutoModelForCausalLM.from_pretrained(model_name)
- self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
- def loglikelihood(self, ctx, cont):
- # Encode context and continuation
- input_ids = self.tokenizer.encode(ctx, add_special_tokens=False)
- cont_ids = self.tokenizer.encode(cont, add_special_tokens=False)
- # Concatenate context and continuation
- input_ids += cont_ids
- # Calculate log likelihood
- with torch.no_grad():
- outputs = self.model(input_ids=torch.tensor([input_ids]), labels=torch.tensor([input_ids]))
- log_likelihood = -outputs.loss.item() * len(cont_ids)
- return log_likelihood, len(cont_ids)
-
- @property
- def eot_token_id(self):
- return self._tokenizer.eos_id()
- @property
- def max_length(self):
- return self._max_seq_length
- @property
- def max_gen_toks(self):
- return 50
- @property
- def batch_size(self):
- return 1
- @property
- def device(self):
- return self._device
- def tok_encode(self, string: str):
- encoded = encode_tokens(self._tokenizer,
- string, bos=True, eos=False, device=self._device)
- # encoded is a pytorch tensor, but some internal logic in the
- # eval harness expects it to be a list instead
- # TODO: verify this for multi-batch as well
- encoded = encoded.tolist()
- return encoded
- def tok_decode(self, tokens):
- decoded = self._tokenizer.decode(tokens)
- return decoded
- def _model_call(self, inputss):
- max_new_tokens = 1
-
- logits = model(inputs)
- return logits
-
- def _model_generate(self, context, max_length, eos_token_id):
- raise Exception('unimplemented')
- # Implement other required methods if needed
- def evaluate_model(model_name, tokenizer_name, task_list):
- # Instantiate the model
- model = HuggingFaceModel(model_name, tokenizer_name)
- # Convert task_list string to list
- task_list = task_list.split(',')
- # Evaluate
- results = evaluator.evaluate(lm=model, tasks=tasks.get_task_dict(task_list), provide_description=True)
- print(results)
- if __name__ == "__main__":
- fire.Fire(evaluate_model)
|