erik-dunteman преди 7 месеца
родител
ревизия
ea9dbe1404
променени са 2 файла, в които са добавени 13 реда и са изтрити 0 реда
  1. 7 0
      recipes/3p_integrations/modal/many-llamas-human-eval/eval.py
  2. 6 0
      recipes/3p_integrations/modal/many-llamas-human-eval/plot.py

+ 7 - 0
recipes/3p_integrations/modal/many-llamas-human-eval/eval.py

@@ -1,3 +1,10 @@
+# ## Evaluating HumanEval Results using Modal Sandboxes
+# This script will take generated results and evaluate them.
+# We use Modal Sandboxes to safely evaluate LLM-generated results.
+#
+# Run it with:
+#    modal run eval
+
 from pathlib import Path
 
 import modal

+ 6 - 0
recipes/3p_integrations/modal/many-llamas-human-eval/plot.py

@@ -1,3 +1,9 @@
+# ## Plotting HumanEval Results
+# This script will calculate pass@k and fail@k for our experiment and plot them.
+#
+# Run it with:
+#    modal run plot
+
 import io
 import json
 from pathlib import Path