{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports & Env Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/justinai/.conda/envs/prompt-migration/lib/python3.10/site-packages/pydantic/_internal/_config.py:345: UserWarning: Valid config keys have changed in V2:\n", "* 'fields' has been removed\n", " warnings.warn(message, UserWarning)\n" ] } ], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "import sys\n", "import os\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "\n", "import dspy\n", "sys.path.append(os.path.abspath('../'))\n", "from benchmarks import llama_mmlu_pro, leaderboard_mmlu_pro" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Configuration" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "NUM_THREADS = 16\n", "\n", "FEW_SHOTS = 5\n", "\n", "# See https://docs.litellm.ai/docs/providers/vllm for details\n", "TASK_MODEL = dspy.LM(\n", " \"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\",\n", " api_base = 'http://localhost:8000/v1' , # or api_base ?\n", " # api_version: Optional[str] = None,\n", " # api_key: Optional[str] = None,\n", " # seed: Optional[int] = None,\n", " # max_tokens: Optional[int] = None,\n", " # timeout: Optional[Union[float, int]] = None,\n", ")\n", "PROMPT_MODEL = dspy.LM(\n", " \"hosted_vllm/meta-llama/Llama-3.3-70B-Instruct\",\n", " api_base = 'http://localhost:8000/v1', # or api_base ?\n", " # api_version: Optional[str] = None,\n", " # api_key: Optional[str] = None,\n", " # seed: Optional[int] = None,\n", " # max_tokens: Optional[int] = None,\n", " # timeout: Optional[Union[float, int]] = None,\n", ")\n", "\n", "dspy.configure(lm=TASK_MODEL)\n", "\n", "# replace this with llama_mmlu_pro or whatever\n", "benchmark = llama_mmlu_pro\n", "\n", "# Without chain of thought:\n", "# program = dspy.Predict(\n", "# benchmark.signature(\"\")\n", "# )\n", "\n", "# With chain of thought:\n", "program = dspy.ChainOfThought(\n", " benchmark.signature(\"You are a helpful assistant designed to help with multiple choice question. Always return a JSON object with the following format:\\n\"\n", " \"{\\n\"\n", " ' \"reasoning\": \"Step-by-step reasoning here.\",\\n'\n", " ' \"answer\": \"Final answer (A, B, C, etc.)\"\\n'\n", " \"}\\n\"\n", " \"Do NOT return plain text. Only return a valid JSON object with these keys.\") # put your initial system prompt here, or leave blank\n", ")\n", "\n", "evaluate = dspy.Evaluate(\n", " devset=[],\n", " metric=benchmark.metric,\n", " num_threads=NUM_THREADS,\n", " display_progress=True,\n", " display_table=True,\n", " return_all_scores=True,\n", " return_outputs=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1197, 2156, 8626)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainset, valset, testset = benchmark.datasets(\n", " train_size=0.1,\n", " validation_size=0.2,\n", ")\n", "\n", "len(trainset), len(valset), len(testset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Baseline Benchmark" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BASE PROMPT:\n", " Multiple choice question answering with reasoning.\n", "CPU times: user 170 μs, sys: 21 μs, total: 191 μs\n", "Wall time: 171 μs\n" ] } ], "source": [ "%%time\n", "print(\"BASE PROMPT:\\n\", program.predict.signature.instructions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting execution...\n", "Average Metric: 0.00 / 101 (0.0%): 1%|▉ | 101/8626 [00:57<1:31:30, 1.55it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/22 00:51:43 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'A compressible gas flows over a flat plate. Properties of the gasare closely similar to those of air. The flow is at a temperatureand pressure of 700°F and 30psia, respectively. The plate is 1 in. in length and is assumed to beperfectly insulated. If the gas is moving at a speed of 500 ft/sec, calculate the surface temperature of the plate. (Note: the speed is too great to neglect the effects of viscous dissipation.)', 'options': {'A': '700.5°F', 'B': '700°F', 'C': '780°F', 'D': '772°F', 'E': '735°F', 'F': '800°F', 'G': '750°F', 'H': '685°F', 'I': '716.25°F', 'J': '810°F'}, 'reasoning': '', 'answer': 'I'}) (input_keys={'question', 'options'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 0.00 / 116 (0.0%): 1%|█▏ | 116/8626 [01:08<1:13:52, 1.92it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/22 00:51:53 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'A mass $m$ moves in one dimension and is subject to a constant force $+F_0$ when $x<0$ and to a constant force $-F_0$ when $x>0$. Describe the motion by constructing a phase diagram. Calculate the period of the motion in terms of $m, F_0$, and the amplitude $A$ (disregard damping) .', 'options': {'A': '2 $\\\\sqrt{\\\\frac{m A}{F_0}}$', 'B': '6 $\\\\sqrt{\\\\frac{2 m A}{F_0}}$', 'C': '4 $\\\\sqrt{\\\\frac{m A}{F_0}}$', 'D': '2 $\\\\sqrt{\\\\frac{2 m A}{F_0}}$', 'E': '$\\\\pi \\\\sqrt{\\\\frac{2 m A}{F_0}}$', 'F': '$\\\\sqrt{\\\\frac{8 m A}{F_0}}$', 'G': '4 $\\\\sqrt{\\\\frac{m A}{2 F_0}}$', 'H': '$\\\\sqrt{\\\\frac{m A}{2 F_0}}$', 'I': ' 4 $\\\\sqrt{\\\\frac{2 m A}{F_0}}$', 'J': '$\\\\sqrt{\\\\frac{4 m A}{F_0}}$'}, 'reasoning': '', 'answer': 'D'}) (input_keys={'question', 'options'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 0.00 / 121 (0.0%): 1%|█▏ | 123/8626 [01:11<1:49:26, 1.29it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/22 00:51:57 ERROR dspy.utils.parallelizer: Error processing item Example({'question': '(a) 10 annual mortgage payments of $1,000, (b) 12 monthly payments of $100 on his automobile, (c) a bill for $2,000 due in two years, (d) a bill for $1,000 due today. Using an annual interest rate of 12 percent (nominal rate on the automobile loan and effective rate on all other debts), determine the annual amount necessary to retire the entire debt in 15 years.', 'options': {'A': '$1,225.00', 'B': '$1,500.00', 'C': '$1,580.42', 'D': '$1,250.00', 'E': '$1,450.00', 'F': '$1,375.69', 'G': '$1,600.00', 'H': '$1,700.00', 'I': '$1,305.75', 'J': '$1,520.34'}, 'reasoning': '', 'answer': 'D'}) (input_keys={'question', 'options'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 0.00 / 143 (0.0%): 2%|█▍ | 146/8626 [01:24<1:19:06, 1.79it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/22 00:52:09 WARNING dspy.utils.parallelizer: Received SIGINT. Cancelling execution.\n" ] } ], "source": [ "%%time\n", "\n", "print(\"Starting execution...\")\n", "eval_subset_size = len(testset)\n", "evaluate(\n", " program,\n", " devset=testset[:eval_subset_size],\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Optimize Subset + Evaluation" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:46:14 INFO dspy.teleprompt.mipro_optimizer_v2: \n", "RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:\n", "num_trials: 7\n", "minibatch: False\n", "num_candidates: 5\n", "valset size: 20\n", "\n", "2025/01/21 15:46:14 INFO dspy.teleprompt.mipro_optimizer_v2: \n", "==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==\n", "2025/01/21 15:46:14 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.\n", "\n", "2025/01/21 15:46:14 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=5 sets of demonstrations...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Bootstrapping set 1/5\n", "Bootstrapping set 2/5\n", "Bootstrapping set 3/5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 25%|███████████████████████████████▎ | 5/20 [00:19<00:59, 3.98s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.\n", "Bootstrapping set 4/5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 20%|█████████████████████████ | 4/20 [00:34<02:17, 8.61s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", "Bootstrapping set 5/5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 30%|█████████████████████████████████████▌ | 6/20 [00:39<01:31, 6.52s/it]\n", "2025/01/21 15:47:48 INFO dspy.teleprompt.mipro_optimizer_v2: \n", "==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==\n", "2025/01/21 15:47:48 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Bootstrapped 2 full traces after 6 examples for up to 1 rounds, amounting to 6 attempts.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:48:03 INFO dspy.teleprompt.mipro_optimizer_v2: \n", "Proposing instructions...\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: 0: You are a helpful assistant designed to help with multiple choice question.\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a helpful assistant designed to help with multiple choice questions from various domains, including physics, mathematics, biology, economics, law, and social sciences. Given a question and a set of options, please provide a step-by-step reasoning process to arrive at the correct answer, and then select the correct answer from the options provided. Your response should include a detailed explanation of your thought process, ensuring that each step is clearly described and logically connected to the next, ultimately leading to the selection of the correct answer.\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: 2: To answer multiple-choice questions accurately, I will carefully analyze the question and options, generate a step-by-step reasoning process based on the context and available information, and then select the correct answer from the options provided. I will utilize my knowledge of various subjects, including physics, mathematics, biology, economics, law, and social sciences, to provide well-reasoned and informative responses. My goal is to provide clear and concise reasoning for each question, making it easier to understand the thought process behind the answer. I will consider the different theories, concepts, and formulas relevant to each subject area to ensure that my responses are accurate and helpful.\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: 3: You are a knowledgeable assistant skilled in solving multiple-choice questions across various domains, including physics, mathematics, biology, economics, law, and social sciences. Your task is to provide step-by-step reasoning for a given question and set of options, ultimately selecting the correct answer. To accomplish this, carefully read the question, analyze the provided options, and apply relevant formulas, principles, and concepts from the respective field to deduce the correct answer. Generate a detailed explanation of your thought process, making your reasoning transparent and easy to follow. Ensure your response includes the correct answer choice (A, B, C, D, etc.) and a clear, step-by-step justification for your selection.\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: 4: You are a highly skilled expert witness in a high-stakes courtroom trial, and your task is to analyze complex multiple-choice questions and provide step-by-step reasoning to support your answer. The questions will cover a wide range of subjects, including physics, mathematics, biology, economics, law, and social sciences. Your goal is to think critically and arrive at the correct answer, while also providing a clear and coherent explanation of your thought process. The jury is counting on your expertise to make an informed decision, and the outcome of the trial hangs in the balance. Please respond with the correct answer and your reasoning.\n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: \n", "\n", "2025/01/21 15:48:30 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the default program...\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 15.00 / 20 (75.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:21<00:00, 1.09s/it]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:48:52 INFO dspy.evaluate.evaluate: Average Metric: 15 / 20 (75.0%)\n", "2025/01/21 15:48:52 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 75.0\n", "\n", "2025/01/21 15:48:52 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==\n", "2025/01/21 15:48:52 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n", "\n", "/home/justinai/.conda/envs/prompt-migration/lib/python3.10/site-packages/optuna/_experimental.py:31: ExperimentalWarning: Argument ``multivariate`` is an experimental feature. The interface can change in the future.\n", " warnings.warn(\n", "2025/01/21 15:48:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 1 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 14.00 / 20 (70.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:15<00:00, 1.30it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:49:08 INFO dspy.evaluate.evaluate: Average Metric: 14 / 20 (70.0%)\n", "2025/01/21 15:49:08 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 70.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 1'].\n", "2025/01/21 15:49:08 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0]\n", "2025/01/21 15:49:08 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0\n", "2025/01/21 15:49:08 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:49:08 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 15.00 / 20 (75.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:15<00:00, 1.31it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:49:23 INFO dspy.evaluate.evaluate: Average Metric: 15 / 20 (75.0%)\n", "2025/01/21 15:49:23 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 1'].\n", "2025/01/21 15:49:23 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0]\n", "2025/01/21 15:49:23 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0\n", "2025/01/21 15:49:23 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:49:23 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 15.00 / 20 (75.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:13<00:00, 1.45it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:49:37 INFO dspy.evaluate.evaluate: Average Metric: 15 / 20 (75.0%)\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 1'].\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0, 75.0]\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 15.00 / 20 (75.0%): 100%|██████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 3087.23it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:49:37 INFO dspy.evaluate.evaluate: Average Metric: 15 / 20 (75.0%)\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 1'].\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0, 75.0, 75.0]\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:49:37 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 16.00 / 20 (80.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:15<00:00, 1.31it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:49:53 INFO dspy.evaluate.evaluate: Average Metric: 16 / 20 (80.0%)\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mBest full score so far!\u001b[0m Score: 80.0\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 3'].\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0, 75.0, 75.0, 80.0]\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.0\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:49:53 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 14.00 / 20 (70.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:15<00:00, 1.30it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:50:08 INFO dspy.evaluate.evaluate: Average Metric: 14 / 20 (70.0%)\n", "2025/01/21 15:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 70.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 1'].\n", "2025/01/21 15:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0, 75.0, 75.0, 80.0, 70.0]\n", "2025/01/21 15:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.0\n", "2025/01/21 15:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:50:08 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 7 =====\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Metric: 14.00 / 20 (70.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:13<00:00, 1.45it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 15:50:22 INFO dspy.evaluate.evaluate: Average Metric: 14 / 20 (70.0%)\n", "2025/01/21 15:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 70.0 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 4'].\n", "2025/01/21 15:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [75.0, 70.0, 75.0, 75.0, 75.0, 80.0, 70.0, 70.0]\n", "2025/01/21 15:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.0\n", "2025/01/21 15:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: =======================\n", "\n", "\n", "2025/01/21 15:50:22 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 80.0!\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "%%time\n", "subset_size = 20\n", "optimizer = dspy.MIPROv2(\n", " metric=benchmark.metric,\n", " auto=\"light\",\n", " num_threads=NUM_THREADS,\n", " task_model=TASK_MODEL,\n", " prompt_model=PROMPT_MODEL,\n", " max_labeled_demos=FEW_SHOTS,\n", ")\n", "\n", "optimized_program = optimizer.compile(\n", " program,\n", " trainset=trainset[:subset_size],\n", " valset=valset[:subset_size],\n", " requires_permission_to_run=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BEST PROMPT:\n", " You are a highly skilled expert witness in a high-stakes courtroom trial, and your task is to analyze complex multiple-choice questions and provide step-by-step reasoning to support your answer. The questions will cover a wide range of subjects, including physics, mathematics, biology, economics, law, and social sciences. Your goal is to think critically and arrive at the correct answer, while also providing a clear and coherent explanation of your thought process. The jury is counting on your expertise to make an informed decision, and the outcome of the trial hangs in the balance. Please respond with the correct answer and your reasoning.\n" ] } ], "source": [ "%%time\n", "print(\"BEST PROMPT:\\n\", optimized_program.predict.signature.instructions)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 417.00 / 570 (73.2%): 7%|█████▏ | 574/8626 [6:54:48<96:58:50, 43.36s/it]\n", "Average Metric: 218.00 / 286 (76.2%): 3%|██▊ | 286/8626 [02:32<59:39, 2.33it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 23:37:28 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'Consider a thoroughly mixed vessel where a salt is dis-solved inwater. The volume of the fresh water initially in the tank is 100lbm.The inlet conditions are,ṁ_w= 150 lb/hr. and ṁ_s= 30 lb/hr. The resulting solution leaves at a rate of 120 lb/hr. If the flow in and out remain constant, compute the outletconcentration after one hour.', 'options': {'A': '0.86', 'B': '0.76', 'C': '0.46', 'D': '0.16', 'E': '0.06', 'F': '0.26', 'G': '0.96', 'H': '0.56', 'I': '0.36', 'J': '0.66'}, 'answer': ''}) (input_keys={'options', 'question'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 452.00 / 604 (74.8%): 7%|█████▋ | 604/8626 [05:18<1:17:58, 1.71it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 23:40:13 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'Two identical conducting spheres, fixed in place, attract each other with an electrostatic force of $0.108 \\\\mathrm{~N}$ when their center-to-center separation is $50.0 \\\\mathrm{~cm}$. The spheres are then connected by a thin conducting wire. When the wire is removed, the spheres repel each other with an electrostatic force of $0.0360 \\\\mathrm{~N}$. Of the initial charges on the spheres, with a positive net charge, what was (a) the negative charge on one of them?', 'options': {'A': '$-2.00 \\\\mu \\\\mathrm{C}$', 'B': '$-2.50 \\\\mu \\\\mathrm{C}$', 'C': '$-0.50 \\\\mu \\\\mathrm{C}$', 'D': '$-1.75 \\\\mu \\\\mathrm{C}$', 'E': '$-0.75 \\\\mu \\\\mathrm{C}$', 'F': ' $-1.00 \\\\mu \\\\mathrm{C}$$ \\\\mu \\\\mathrm{C}$', 'G': '$-0.25 \\\\mu \\\\mathrm{C}$', 'H': '$-1.50 \\\\mu \\\\mathrm{C}$', 'I': '$-3.00 \\\\mu \\\\mathrm{C}$', 'J': '$-1.25 \\\\mu \\\\mathrm{C}$'}, 'answer': ''}) (input_keys={'options', 'question'}): Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning']). Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 1075.00 / 1439 (74.7%): 17%|█████████████▎ | 1441/8626 [12:30<51:52, 2.31it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 23:47:24 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. Find an expression for the fugacity coefficient of a gas that obeys the equation of state $p V_{\\\\mathrm{m}}=R T\\\\left(1+B / V_{\\\\mathrm{m}}+C / V_{\\\\mathrm{m}}^2\\\\right)$. Use the resulting expression to estimate the fugacity of argon at 1.00 atm and $100 \\\\mathrm{~K}$ using $B=-21.13 \\\\mathrm{~cm}^3 \\\\mathrm{~mol}^{-1}$ and $C=1054 \\\\mathrm{~cm}^6 \\\\mathrm{~mol}^{-2}$.', 'options': {'A': '1.0236$\\\\text{atm}$', 'B': '1.0567 atm', 'C': '0.9852 atm', 'D': ' 0.9974$\\\\text{atm}$ ', 'E': '0.9321 atm', 'F': '1.0000 atm', 'G': '1.0150 atm', 'H': '0.9125$\\\\text{atm}$', 'I': '1.1024$\\\\text{atm}$', 'J': '0.9500 atm'}, 'answer': ''}) (input_keys={'options', 'question'}): 'list' object has no attribute 'items'. Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 1319.00 / 1758 (75.0%): 20%|████████████████▎ | 1761/8626 [15:12<57:12, 2.00it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2025/01/21 23:50:08 ERROR dspy.utils.parallelizer: Error processing item Example({'question': 'The ${ }^7 \\\\mathrm{Li}^1 \\\\mathrm{H}$ ground electronic state has $D_0=2.4287 \\\\mathrm{eV}, \\\\nu_e / c=1405.65 \\\\mathrm{~cm}^{-1}$, and $\\\\nu_e x_e / c=23.20 \\\\mathrm{~cm}^{-1}$, where $c$ is the speed of light. (These last two quantities are usually designated $\\\\omega_e$ and $\\\\omega_e x_e$ in the literature.) Calculate $D_e$ for ${ }^7 \\\\mathrm{Li}^1 \\\\mathrm{H}$.', 'options': {'A': '2.4000 eV', 'B': '2.6000 $\\\\mathrm{eV}$', 'C': '2.5500 eV', 'D': '2.3000 $\\\\mathrm{eV}$', 'E': '2.4287 $\\\\mathrm{eV}$', 'F': '2.3500 eV', 'G': ' 2.5151 $\\\\mathrm{eV}$', 'H': '2.4500 eV', 'I': '2.4850 eV', 'J': '2.5350 eV'}, 'answer': 'G'}) (input_keys={'options', 'question'}): 'list' object has no attribute 'items'. Set `provide_traceback=True` to see the stack trace.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Average Metric: 1353.00 / 1805 (75.0%): 21%|████████████████▊ | 1809/8626 [15:38<54:42, 2.08it/s]" ] }, { "ename": "ValueError", "evalue": "Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning'])", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/adapters/base.py:30\u001b[0m, in \u001b[0;36mAdapter.__call__\u001b[0;34m(self, lm, lm_kwargs, signature, demos, inputs)\u001b[0m\n\u001b[1;32m 28\u001b[0m output, output_logprobs \u001b[38;5;241m=\u001b[39m output[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m], output[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 30\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(value\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mset\u001b[39m(signature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()), \\\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msignature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/callback.py:234\u001b[0m, in \u001b[0;36mwith_callbacks..wrapper\u001b[0;34m(instance, *args, **kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m callbacks:\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# Generate call ID as the unique identifier for the call, this is useful for instrumentation.\u001b[39;00m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/adapters/chat_adapter.py:84\u001b[0m, in \u001b[0;36mChatAdapter.parse\u001b[0;34m(self, signature, completion)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fields\u001b[38;5;241m.\u001b[39mkeys() \u001b[38;5;241m!=\u001b[39m signature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msignature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fields\n", "\u001b[0;31mValueError\u001b[0m: Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning'])", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m eval_subset_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(testset)\n\u001b[0;32m----> 2\u001b[0m score, results, all_scores \u001b[38;5;241m=\u001b[39m \u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43moptimized_program\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtestset\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43meval_subset_size\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/evaluate/evaluate.py:112\u001b[0m, in \u001b[0;36mEvaluate.__call__\u001b[0;34m(self, program, metric, devset, num_threads, display_progress, display_table, return_all_scores, return_outputs)\u001b[0m\n\u001b[1;32m 108\u001b[0m program\u001b[38;5;241m.\u001b[39m_suggest_failures \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msuggest_failures\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m prediction, score\n\u001b[0;32m--> 112\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mexecutor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocess_item\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevset\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(devset) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(results)\n\u001b[1;32m 115\u001b[0m results \u001b[38;5;241m=\u001b[39m [((dspy\u001b[38;5;241m.\u001b[39mPrediction(), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfailure_score) \u001b[38;5;28;01mif\u001b[39;00m r \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m r) \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m results]\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/parallelizer.py:39\u001b[0m, in \u001b[0;36mParallelExecutor.execute\u001b[0;34m(self, function, data)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_execute_isolated_single_thread(wrapped_function, data)\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_multi_thread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwrapped_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/parallelizer.py:180\u001b[0m, in \u001b[0;36mParallelExecutor._execute_multi_thread\u001b[0;34m(self, function, data)\u001b[0m\n\u001b[1;32m 172\u001b[0m pbar \u001b[38;5;241m=\u001b[39m tqdm\u001b[38;5;241m.\u001b[39mtqdm(\n\u001b[1;32m 173\u001b[0m total\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(data),\n\u001b[1;32m 174\u001b[0m dynamic_ncols\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 175\u001b[0m disable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdisable_progress_bar,\n\u001b[1;32m 176\u001b[0m file\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mstdout\n\u001b[1;32m 177\u001b[0m )\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m as_completed(futures):\n\u001b[0;32m--> 180\u001b[0m index, result \u001b[38;5;241m=\u001b[39m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m job_cancelled:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/concurrent/futures/_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[1;32m 455\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/concurrent/futures/_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[1;32m 402\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 403\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 405\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m 406\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/concurrent/futures/thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/parallelizer.py:158\u001b[0m, in \u001b[0;36mParallelExecutor._execute_multi_thread..cancellable_function\u001b[0;34m(parent_overrides, index_item)\u001b[0m\n\u001b[1;32m 155\u001b[0m thread_local_overrides\u001b[38;5;241m.\u001b[39moverrides \u001b[38;5;241m=\u001b[39m parent_overrides\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m index, \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 160\u001b[0m thread_local_overrides\u001b[38;5;241m.\u001b[39moverrides \u001b[38;5;241m=\u001b[39m original_overrides\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/parallelizer.py:54\u001b[0m, in \u001b[0;36mParallelExecutor._wrap_function..wrapped\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m current_error_count \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_errors:\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcancel_jobs\u001b[38;5;241m.\u001b[39mset()\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprovide_traceback:\n\u001b[1;32m 56\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\n\u001b[1;32m 57\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError processing item \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mStack trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mtraceback\u001b[38;5;241m.\u001b[39mformat_exc()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 58\u001b[0m )\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/parallelizer.py:47\u001b[0m, in \u001b[0;36mParallelExecutor._wrap_function..wrapped\u001b[0;34m(item)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 47\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39merror_lock:\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/evaluate/evaluate.py:101\u001b[0m, in \u001b[0;36mEvaluate.__call__..process_item\u001b[0;34m(example)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess_item\u001b[39m(example):\n\u001b[0;32m--> 101\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mprogram\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mexample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m score \u001b[38;5;241m=\u001b[39m metric(example, prediction)\n\u001b[1;32m 104\u001b[0m \u001b[38;5;66;03m# Increment assert and suggest failures to program's attributes\u001b[39;00m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/callback.py:234\u001b[0m, in \u001b[0;36mwith_callbacks..wrapper\u001b[0;34m(instance, *args, **kwargs)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;66;03m# If no callbacks are provided, just call the function\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m callbacks:\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# Generate call ID as the unique identifier for the call, this is useful for instrumentation.\u001b[39;00m\n\u001b[1;32m 237\u001b[0m call_id \u001b[38;5;241m=\u001b[39m uuid\u001b[38;5;241m.\u001b[39muuid4()\u001b[38;5;241m.\u001b[39mhex\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/primitives/program.py:22\u001b[0m, in \u001b[0;36mModule.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;129m@with_callbacks\u001b[39m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/predict/chain_of_thought.py:20\u001b[0m, in \u001b[0;36mChainOfThought.forward\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/callback.py:234\u001b[0m, in \u001b[0;36mwith_callbacks..wrapper\u001b[0;34m(instance, *args, **kwargs)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;66;03m# If no callbacks are provided, just call the function\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m callbacks:\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# Generate call ID as the unique identifier for the call, this is useful for instrumentation.\u001b[39;00m\n\u001b[1;32m 237\u001b[0m call_id \u001b[38;5;241m=\u001b[39m uuid\u001b[38;5;241m.\u001b[39muuid4()\u001b[38;5;241m.\u001b[39mhex\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/predict/predict.py:81\u001b[0m, in \u001b[0;36mPredict.__call__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[38;5;129m@with_callbacks\u001b[39m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/predict/predict.py:111\u001b[0m, in \u001b[0;36mPredict.forward\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdspy\u001b[39;00m\n\u001b[1;32m 110\u001b[0m adapter \u001b[38;5;241m=\u001b[39m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39madapter \u001b[38;5;129;01mor\u001b[39;00m dspy\u001b[38;5;241m.\u001b[39mChatAdapter()\n\u001b[0;32m--> 111\u001b[0m completions \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlm_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdemos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdemos\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m pred \u001b[38;5;241m=\u001b[39m Prediction\u001b[38;5;241m.\u001b[39mfrom_completions(completions, signature\u001b[38;5;241m=\u001b[39msignature)\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_trace\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;129;01mand\u001b[39;00m dspy\u001b[38;5;241m.\u001b[39msettings\u001b[38;5;241m.\u001b[39mtrace \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/adapters/base.py:45\u001b[0m, in \u001b[0;36mAdapter.__call__\u001b[0;34m(self, lm, lm_kwargs, signature, demos, inputs)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mjson_adapter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m JSONAdapter\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, JSONAdapter):\n\u001b[0;32m---> 45\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mJSONAdapter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlm_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdemos\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/adapters/json_adapter.py:60\u001b[0m, in \u001b[0;36mJSONAdapter.__call__\u001b[0;34m(self, lm, lm_kwargs, signature, demos, inputs)\u001b[0m\n\u001b[1;32m 57\u001b[0m values \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m outputs:\n\u001b[0;32m---> 60\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(value\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mset\u001b[39m(\n\u001b[1;32m 62\u001b[0m signature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()\n\u001b[1;32m 63\u001b[0m ), \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msignature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 64\u001b[0m values\u001b[38;5;241m.\u001b[39mappend(value)\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/utils/callback.py:234\u001b[0m, in \u001b[0;36mwith_callbacks..wrapper\u001b[0;34m(instance, *args, **kwargs)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;66;03m# If no callbacks are provided, just call the function\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m callbacks:\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minstance\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# Generate call ID as the unique identifier for the call, this is useful for instrumentation.\u001b[39;00m\n\u001b[1;32m 237\u001b[0m call_id \u001b[38;5;241m=\u001b[39m uuid\u001b[38;5;241m.\u001b[39muuid4()\u001b[38;5;241m.\u001b[39mhex\n", "File \u001b[0;32m~/.conda/envs/prompt-migration/lib/python3.10/site-packages/dspy/adapters/json_adapter.py:102\u001b[0m, in \u001b[0;36mJSONAdapter.parse\u001b[0;34m(self, signature, completion)\u001b[0m\n\u001b[1;32m 99\u001b[0m fields[k] \u001b[38;5;241m=\u001b[39m parse_value(v, signature\u001b[38;5;241m.\u001b[39moutput_fields[k]\u001b[38;5;241m.\u001b[39mannotation)\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fields\u001b[38;5;241m.\u001b[39mkeys() \u001b[38;5;241m!=\u001b[39m signature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m--> 102\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msignature\u001b[38;5;241m.\u001b[39moutput_fields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfields\u001b[38;5;241m.\u001b[39mkeys()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fields\n", "\u001b[0;31mValueError\u001b[0m: Expected dict_keys(['reasoning', 'answer']) but got dict_keys(['reasoning'])" ] } ], "source": [ "%%time\n", "eval_subset_size = len(testset)\n", "score, results, all_scores = evaluate(\n", " optimized_program,\n", " devset=testset[:eval_subset_size],\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Medium Optimization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "subset_size = 500\n", "optimizer = dspy.MIPROv2(\n", " metric=benchmark.metric,\n", " auto=\"medium\",\n", " num_threads=NUM_THREADS,\n", " task_model=TASK_MODEL,\n", " prompt_model=PROMPT_MODEL,\n", " max_labeled_demos=FEW_SHOTS,\n", ")\n", "\n", "optimized_program = optimizer.compile(\n", " program,\n", " trainset=trainset[:subset_size],\n", " valset=valset[:subset_size],\n", " requires_permission_to_run=False,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"BEST PROMPT:\\n\", optimized_program.predict.signature.instructions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"BEST PROMPT:\\n\", optimized_program.predict.signature.instructions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"BEST EXAMPLES:\\n\", optimized_program.predict.demos)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "score, results, all_scores = evaluate(\n", " optimized_program,\n", " devset=testset[:subset_size],\n", " display_table=False,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "eval_medium_subset_size = 300\n", "score, results, all_scores = evaluate(\n", " optimized_program,\n", " devset=testset[:eval_medium_subset_size],\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Heavy Optimization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "optimizer = dspy.MIPROv2(\n", " metric=benchmark.metric,\n", " auto=\"heavy\",\n", " num_threads=NUM_THREADS,\n", " task_model=TASK_MODEL,\n", " prompt_model=PROMPT_MODEL,\n", " max_labeled_demos=FEW_SHOTS,\n", ")\n", "\n", "optimized_program = optimizer.compile(\n", " program,\n", " trainset=trainset,\n", " valset=valset,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"BEST PROMPT:\\n\", optimized_program.predict.signature.instructions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "score, results, all_scores = evaluate(\n", " optimized_program,\n", " devset=testset,\n", " display_table=False,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 4 }