1 年之前 · ea9dbe1404
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py
@@ -1,3 +1,10 @@
 
				+# ## Evaluating HumanEval Results using Modal Sandboxes
			
 
				+# This script will take generated results and evaluate them.
			
 
				+# We use Modal Sandboxes to safely evaluate LLM-generated results.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run eval
			
 
				+
			
 
				 from pathlib import Path
			
 
				 
			
 
				 import modal
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py
@@ -1,3 +1,9 @@
 
				+# ## Plotting HumanEval Results
			
 
				+# This script will calculate pass@k and fail@k for our experiment and plot them.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run plot
			
 
				+
			
 
				 import io
			
 
				 import json
			
 
				 from pathlib import Path