models.py 7.5 KB


  1. import hashlib
  2. from django.db import models
  3. from django.core.validators import MinValueValidator, MaxValueValidator
  4. TEMPLATE_SYSTEM_PROMPT = """
  5. You are a helpful assistant designed to answer questions.
  6. Provide concise and accurate responses, ensuring you follow the provided instructions thoroughly.
  7. """.strip()
  8. TARGET_CHOICES = [
  9. ('test', 'test'),
  10. ('train', 'train'),
  11. ('dev', 'dev'),
  12. ('validation', 'validation'),
  13. ]
  14. ROLE_CHOICES = [
  15. ('assistant', 'assistant'),
  16. ('user', 'user'),
  17. ]
  18. CLIENT_CHOICES = [
  19. ('openai', 'OpenAI'),
  20. ('ollama', 'Ollama'),
  21. ('genai', 'Google GenAI'),
  22. ('anthropic', 'Anthropic'),
  23. ]
  24. class Dataset(models.Model):
  25. class Meta:
  26. db_table = 'dataset'
  27. verbose_name = 'Dataset'
  28. verbose_name_plural = 'Datasets'
  29. name = models.CharField(max_length=100)
  30. description = models.TextField()
  31. created_at = models.DateTimeField(auto_now_add=True)
  32. updated_at = models.DateTimeField(auto_now=True)
  33. def __str__(self):
  34. return self.name
  35. class QA(models.Model):
  36. class Meta:
  37. db_table = 'qa'
  38. verbose_name = 'QA'
  39. verbose_name_plural = 'QAs'
  40. dataset = models.ForeignKey('Dataset', on_delete=models.CASCADE)
  41. question = models.TextField()
  42. category = models.CharField(max_length=256, null=True, blank=True, db_index=True)
  43. extra_info = models.JSONField(null=True, blank=True)
  44. correct_answer = models.TextField()
  45. target = models.CharField(
  46. max_length=100,
  47. default='test',
  48. choices=TARGET_CHOICES,
  49. db_index=True
  50. )
  51. options = models.JSONField(null=True, blank=True)
  52. xid = models.CharField(max_length=100, null=True, blank=True)
  53. context = models.TextField(null=True, blank=True)
  54. correct_answer_idx = models.CharField(max_length=100)
  55. hash = models.CharField(max_length=100, unique=True)
  56. def __str__(self):
  57. return self.question[:50] + (self.question[50:] and '...')
  58. def get_hash(self):
  59. txt = self.dataset.name + self.question
  60. if self.correct_answer:
  61. txt += self.correct_answer
  62. if self.correct_answer_idx:
  63. txt += self.correct_answer_idx
  64. if self.xid:
  65. txt += self.xid
  66. if self.target:
  67. txt += self.target
  68. if self.category:
  69. txt += self.category
  70. if self.extra_info:
  71. txt += ''.join(self.extra_info)
  72. if self.context:
  73. txt += self.context
  74. if self.options:
  75. txt += ''.join(self.options)
  76. if self.context:
  77. txt += self.context
  78. return hashlib.md5(txt.encode('utf-8')).hexdigest()
  79. def save(self, *args, **kwargs):
  80. self.hash = self.get_hash()
  81. super().save(*args, **kwargs)
  82. class LLMBackend(models.Model):
  83. class Meta:
  84. db_table = 'llm_backed'
  85. verbose_name = 'LLM Backend'
  86. verbose_name_plural = 'LLM Backends'
  87. name = models.CharField(max_length=100)
  88. client_type = models.CharField(
  89. max_length=100,
  90. default='openai',
  91. choices=CLIENT_CHOICES,
  92. db_index=True
  93. )
  94. parameters = models.JSONField(null=True, blank=True)
  95. def __str__(self):
  96. return self.name
  97. class LLMModel(models.Model):
  98. class Meta:
  99. db_table = 'llm_model'
  100. verbose_name = 'LLM Model'
  101. verbose_name_plural = 'LLM Models'
  102. name = models.CharField(max_length=100)
  103. backend = models.ForeignKey('LLMBackend', on_delete=models.CASCADE)
  104. parameters = models.JSONField(null=True, blank=True)
  105. created_at = models.DateTimeField(auto_now_add=True)
  106. updated_at = models.DateTimeField(auto_now=True)
  107. def __str__(self):
  108. return f"[{self.id}] {self.name}"
  109. class EvalConfig(models.Model):
  110. class Meta:
  111. db_table = 'eval_config'
  112. verbose_name = 'EvalConfig'
  113. verbose_name_plural = 'EvalConfig'
  114. name = models.CharField(max_length=100)
  115. description = models.TextField(null=True, blank=True)
  116. dataset = models.ForeignKey('Dataset', on_delete=models.CASCADE)
  117. sys_prompt = models.TextField(default=TEMPLATE_SYSTEM_PROMPT)
  118. final_answer_pattern = models.TextField(default='(?i)Final Answer\s*:\s*([A-E])')
  119. created_at = models.DateTimeField(auto_now_add=True)
  120. updated_at = models.DateTimeField(auto_now=True)
  121. def __str__(self):
  122. return self.name
  123. class RoleMessage(models.Model):
  124. class Meta:
  125. db_table = 'role_message'
  126. verbose_name = 'RoleMessage'
  127. verbose_name_plural = 'RoleMessage'
  128. role = models.CharField(max_length=100, choices=ROLE_CHOICES, db_index=True)
  129. content = models.TextField()
  130. eval_config = models.ForeignKey('EvalConfig', on_delete=models.CASCADE)
  131. def __str__(self):
  132. return self.content[:50] + (self.content[50:] and '...')
  133. class EvalSession(models.Model):
  134. class Meta:
  135. db_table = 'eval_session'
  136. verbose_name = 'EvalSession'
  137. verbose_name_plural = 'EvalSession'
  138. name = models.CharField(max_length=100)
  139. config = models.ForeignKey('EvalConfig', on_delete=models.CASCADE)
  140. llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
  141. answer_interpreter = models.ForeignKey('AnswerInterpreter', on_delete=models.SET_NULL, null=True, blank=True)
  142. parameters = models.JSONField(null=True, blank=True)
  143. dataset_target = models.CharField(
  144. max_length=100,
  145. default='test',
  146. choices=TARGET_CHOICES,
  147. db_index=True
  148. )
  149. request_delay = models.IntegerField(
  150. null=True, blank=True,
  151. validators=[MinValueValidator(1), MaxValueValidator(600)],
  152. help_text="Request delay in seconds"
  153. )
  154. is_active = models.BooleanField(default=False)
  155. created_at = models.DateTimeField(auto_now_add=True)
  156. updated_at = models.DateTimeField(auto_now=True)
  157. def __str__(self):
  158. return f"{self.name} [{self.config.name} - {self.llm_model.name}]"
  159. def save(self, *args, **kwargs):
  160. if self.pk is None:
  161. self.parameters = self.llm_model.parameters
  162. super().save(*args, **kwargs)
  163. # accuracy property
  164. @property
  165. def accuracy(self):
  166. total_counts = self.evalanswer_set.count()
  167. if total_counts == 0:
  168. return 0
  169. return self.evalanswer_set.filter(is_correct=True).count() / self.evalanswer_set.count()
  170. class EvalAnswer(models.Model):
  171. class Meta:
  172. db_table = 'eval_answer'
  173. verbose_name = 'EvalAnswer'
  174. verbose_name_plural = 'EvalAnswer'
  175. eval_session = models.ForeignKey('EvalSession', on_delete=models.CASCADE)
  176. question = models.ForeignKey('QA', on_delete=models.CASCADE)
  177. instruction = models.TextField()
  178. assistant_answer = models.TextField()
  179. is_correct = models.BooleanField()
  180. created_at = models.DateTimeField(auto_now_add=True)
  181. hash = models.CharField(max_length=100)
  182. llm_backend = models.ForeignKey('LLMBackend', on_delete=models.CASCADE)
  183. llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
  184. def __str__(self):
  185. return f"{self.question} - {self.is_correct}"
  186. class AnswerInterpreter(models.Model):
  187. class Meta:
  188. db_table = 'answer_interpreter'
  189. verbose_name = 'AnswerInterpreter'
  190. verbose_name_plural = 'AnswerInterpreter'
  191. name = models.CharField(max_length=100)
  192. prompt = models.TextField()
  193. llm_model = models.ForeignKey('LLMModel', on_delete=models.CASCADE)
  194. created_at = models.DateTimeField(auto_now_add=True)
  195. updated_at = models.DateTimeField(auto_now=True)
  196. def __str__(self):
  197. return self.name