123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- import hashlib
- from django.db import models
- from django.core.validators import MinValueValidator, MaxValueValidator
- TEMPLATE_SYSTEM_PROMPT = """
- You are a helpful assistant designed to answer questions.
- Provide concise and accurate responses, ensuring you follow the provided instructions thoroughly.
- """.strip()
- TARGET_CHOICES = [
- ('test', 'test'),
- ('train', 'train'),
- ('dev', 'dev'),
- ('validation', 'validation'),
- ]
- ROLE_CHOICES = [
- ('assistant', 'assistant'),
- ('user', 'user'),
- ]
- CLIENT_CHOICES = [
- ('openai', 'OpenAI'),
- ('ollama', 'Ollama'),
- ('genai', 'Google GenAI'),
- ('anthropic', 'Anthropic'),
- ('togheter', 'Togheter.ai'),
- ('groq', 'Groq'),
- ]
- class Dataset(models.Model):
- class Meta:
- db_table = 'dataset'
- verbose_name = 'Dataset'
- verbose_name_plural = 'Datasets'
- name = models.CharField(max_length=100)
- description = models.TextField()
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
- def __str__(self):
- return self.name
- class QA(models.Model):
- class Meta:
- db_table = 'qa'
- verbose_name = 'QA'
- verbose_name_plural = 'QAs'
- dataset = models.ForeignKey('Dataset', on_delete=models.CASCADE)
- question = models.TextField()
- category = models.CharField(max_length=256, null=True, blank=True, db_index=True)
- extra_info = models.JSONField(null=True, blank=True)
- correct_answer = models.TextField()
- target = models.CharField(
- max_length=100,
- default='test',
- choices=TARGET_CHOICES,
- db_index=True
- )
- options = models.JSONField(null=True, blank=True)
- xid = models.CharField(max_length=100, null=True, blank=True)
- context = models.TextField(null=True, blank=True)
- correct_answer_idx = models.CharField(max_length=100)
- hash = models.CharField(max_length=100, unique=True)
- def __str__(self):
- return self.question[:50] + (self.question[50:] and '...')
-
- def get_hash(self):
- txt = self.dataset.name + self.question
- if self.correct_answer:
- txt += self.correct_answer
- if self.correct_answer_idx:
- txt += self.correct_answer_idx
- if self.xid:
- txt += self.xid
- if self.target:
- txt += self.target
- if self.category:
- txt += self.category
- if self.extra_info:
- txt += ''.join(self.extra_info)
- if self.context:
- txt += self.context
- if self.options:
- txt += ''.join(self.options)
- if self.context:
- txt += self.context
- return hashlib.md5(txt.encode('utf-8')).hexdigest()
- def save(self, *args, **kwargs):
- self.hash = self.get_hash()
- super().save(*args, **kwargs)
- class LLMBackend(models.Model):
- class Meta:
- db_table = 'llm_backed'
- verbose_name = 'LLM Backend'
- verbose_name_plural = 'LLM Backends'
-
- name = models.CharField(max_length=100)
- client_type = models.CharField(
- max_length=100,
- default='openai',
- choices=CLIENT_CHOICES,
- db_index=True
- )
- parameters = models.JSONField(null=True, blank=True)
- def __str__(self):
- return self.name
- class LLMModel(models.Model):
- class Meta:
- db_table = 'llm_model'
- verbose_name = 'LLM Model'
- verbose_name_plural = 'LLM Models'
- name = models.CharField(max_length=100)
- backend = models.ForeignKey('LLMBackend', on_delete=models.CASCADE)
- parameters = models.JSONField(null=True, blank=True)
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
- def __str__(self):
- return f"[{self.id}] {self.name}"
- class EvalConfig(models.Model):
- class Meta:
- db_table = 'eval_config'
- verbose_name = 'EvalConfig'
- verbose_name_plural = 'EvalConfig'
- name = models.CharField(max_length=100)
- description = models.TextField(null=True, blank=True)
- dataset = models.ForeignKey('Dataset', on_delete=models.CASCADE)
- sys_prompt = models.TextField(default=TEMPLATE_SYSTEM_PROMPT)
- final_answer_pattern = models.TextField(default='(?i)Final Answer\s*:\s*([A-E])')
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
- def __str__(self):
- return self.name
-
- class RoleMessage(models.Model):
- class Meta:
- db_table = 'role_message'
- verbose_name = 'RoleMessage'
- verbose_name_plural = 'RoleMessage'
- role = models.CharField(max_length=100, choices=ROLE_CHOICES, db_index=True)
- content = models.TextField()
- eval_config = models.ForeignKey('EvalConfig', on_delete=models.CASCADE)
- def __str__(self):
- return self.content[:50] + (self.content[50:] and '...')
-
- class EvalSession(models.Model):
- class Meta:
- db_table = 'eval_session'
- verbose_name = 'EvalSession'
- verbose_name_plural = 'EvalSession'
- name = models.CharField(max_length=100)
- config = models.ForeignKey('EvalConfig', on_delete=models.CASCADE)
- llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
- answer_interpreter = models.ForeignKey('AnswerInterpreter', on_delete=models.SET_NULL, null=True, blank=True)
- parameters = models.JSONField(null=True, blank=True)
- dataset_target = models.CharField(
- max_length=100,
- default='test',
- choices=TARGET_CHOICES,
- db_index=True
- )
- request_delay = models.IntegerField(
- null=True, blank=True,
- validators=[MinValueValidator(1), MaxValueValidator(600)],
- help_text="Request delay in seconds"
- )
- is_active = models.BooleanField(default=False)
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
-
- def __str__(self):
- return f"{self.name} [{self.config.name} - {self.llm_model.name}]"
- def save(self, *args, **kwargs):
- if self.pk is None:
- self.parameters = self.llm_model.parameters
- super().save(*args, **kwargs)
-
- # accuracy property
- @property
- def accuracy(self):
- total_counts = self.evalanswer_set.count()
- if total_counts == 0:
- return 0
- return self.evalanswer_set.filter(is_correct=True).count() / self.evalanswer_set.count()
-
- class EvalAnswer(models.Model):
- class Meta:
- db_table = 'eval_answer'
- verbose_name = 'EvalAnswer'
- verbose_name_plural = 'EvalAnswer'
- eval_session = models.ForeignKey('EvalSession', on_delete=models.CASCADE)
- question = models.ForeignKey('QA', on_delete=models.CASCADE)
- instruction = models.TextField()
- assistant_answer = models.TextField()
- is_correct = models.BooleanField()
- created_at = models.DateTimeField(auto_now_add=True)
- hash = models.CharField(max_length=100)
- llm_backend = models.ForeignKey('LLMBackend', on_delete=models.CASCADE)
- llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
- def __str__(self):
- return f"{self.question} - {self.is_correct}"
-
- class AnswerInterpreter(models.Model):
- class Meta:
- db_table = 'answer_interpreter'
- verbose_name = 'AnswerInterpreter'
- verbose_name_plural = 'AnswerInterpreter'
- name = models.CharField(max_length=100)
- prompt = models.TextField()
- llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
- def __str__(self):
- return self.name
|