4 months ago · b8cc256c0a
--- a/src/finetune_pipeline/data/data_loader.py
+++ b/src/finetune_pipeline/data/data_loader.py
@@ -107,7 +107,6 @@ def load_dataset(
 
																     if not data_path:
															
 
																         raise ValueError("data_path must be provided")
															
 
																-
															
 
																     dataset = None
															
 
																     if is_local:
															
 
																         # Load from local disk
															
--- a/src/finetune_pipeline/inference/run_inference.py
+++ b/src/finetune_pipeline/inference/run_inference.py
@@ -1,8 +1,8 @@
 
																 import argparse
															
 
																 import json
															
 
																 import logging
															
 
																-from typing import Any, Dict, List, Optional, TypedDict, Union
															
 
																 import os
															
 
																+from typing import Any, Dict, List, Optional, TypedDict, Union
															
 
																 import requests
															
 
																 from tqdm import tqdm
															
@@ -17,6 +17,8 @@ from ..data.data_loader import (
 
																     save_formatted_data,
															
 
																 )
															
 
																+logging.basicConfig(level=logging.INFO)
															
 
																+
															
 
																 # Set up logging
															
 
																 logger = logging.getLogger(__name__)
															
@@ -94,7 +96,6 @@ def load_inference_data(
 
																         try:
															
 
																             logger.info("Loading raw data...")
															
 
																             data = load_data(path, is_local, **dataset_kwargs)
															
 
																-
															
 
																             # Apply sample limit if specified
															
 
																             if max_samples and hasattr(data, "__len__") and len(data) > max_samples:
															
 
																                 logger.info(
															
@@ -131,7 +132,6 @@ def load_inference_data(
 
																                 logger.debug(
															
 
																                     f"Sample conversation: {sample_conv.messages[:2] if hasattr(sample_conv, 'messages') else sample_conv}"
															
 
																                 )
															
 
																-
															
 
																         except Exception as e:
															
 
																             logger.error(f"Failed to convert data to conversations: {e}")
															
 
																             logger.error(f"Column mapping: {column_mapping}")
															
@@ -405,7 +405,7 @@ def main():
 
																     config = read_config(args.config)
															
 
																     inference_config = config.get("inference", {})
															
 
																-    formatter_config = config.get("formatter", {})
															
 
																+    formatter_config = config.get("data", {})
															
 
																     # Model parameters
															
 
																     model_path = inference_config.get("model_path", None)
															
@@ -434,7 +434,6 @@ def main():
 
																     inference_data_kwargs = inference_config.get("inference_data_kwargs", {})
															
 
																     inference_data = load_inference_data(inference_data_kwargs, formatter_config)
															
 
																-
															
 
																     results = run_vllm_batch_inference_on_dataset(
															
 
																         inference_data,
															
 
																         model_path,
															
--- a/src/finetune_pipeline/run_pipeline.py
+++ b/src/finetune_pipeline/run_pipeline.py
@@ -36,11 +36,12 @@ logger = logging.getLogger(__name__)
 
																 from finetune_pipeline.data.data_loader import load_and_format_data, read_config
															
 
																 from finetune_pipeline.finetuning.run_finetuning import run_torch_tune
															
 
																-from finetune_pipeline.inference.run_inference import (
															
 
																-    run_vllm_batch_inference_on_dataset,
															
 
																-    save_inference_results,
															
 
																-)
															
 
																-from finetune_pipeline.inference.start_vllm_server import start_vllm_server
															
 
																+# from finetune_pipeline.inference.run_inference import (
															
 
																+#     run_vllm_batch_inference_on_dataset,
															
 
																+#     save_inference_results,
															
 
																+# )
															
 
																+
															
 
																+# from finetune_pipeline.inference.start_vllm_server import start_vllm_server
															
 
																 def run_data_loading(config_path: str) -> Tuple[List[str], List[str]]:
															
@@ -57,7 +58,7 @@ def run_data_loading(config_path: str) -> Tuple[List[str], List[str]]:
 
																     # Read the configuration
															
 
																     config = read_config(config_path)
															
 
																-    formatter_config = config.get("formatter", {})
															
 
																+    data_config = config.get("data", {})
															
 
																     output_dir = config.get("output_dir", "/tmp/finetune-pipeline/data/")
															
 
																     # Create the output directory if it doesn't exist
															
@@ -66,7 +67,7 @@ def run_data_loading(config_path: str) -> Tuple[List[str], List[str]]:
 
																     # Load and format the data
															
 
																     try:
															
 
																         formatted_data_paths, conversation_data_paths = load_and_format_data(
															
 
																-            formatter_config, output_dir
															
 
																+            data_config, output_dir
															
 
																         )
															
 
																         logger.info(f"Data loading and formatting complete. Saved to {output_dir}")
															
 
																         logger.info(f"Formatted data paths: {formatted_data_paths}")
															
@@ -133,149 +134,149 @@ def run_finetuning(config_path: str, formatted_data_paths: List[str]) -> str:
 
																         raise
															
 
																-def run_vllm_server(config_path: str, model_path: str) -> str:
															
 
																-    """
															
 
																-    Start the vLLM server.
															
 
																-
															
 
																-    Args:
															
 
																-        config_path: Path to the configuration file
															
 
																-        model_path: Path to the fine-tuned model
															
 
																-
															
 
																-    Returns:
															
 
																-        URL of the vLLM server
															
 
																-    """
															
 
																-    logger.info("=== Step 3: Starting vLLM Server ===")
															
 
																-
															
 
																-    # Read the configuration
															
 
																-    config = read_config(config_path)
															
 
																-    inference_config = config.get("inference", {})
															
 
																-
															
 
																-    model_path = inference_config.get(
															
 
																-        "model_path", "/home/ubuntu/yash-workspace/medgemma-4b-it"
															
 
																-    )
															
 
																-
															
 
																-    # # Update the model path in the inference config
															
 
																-    # inference_config["model_path"] = model_path
															
 
																-
															
 
																-    # Extract server parameters
															
 
																-    port = inference_config.get("port", 8000)
															
 
																-    host = inference_config.get("host", "0.0.0.0")
															
 
																-    tensor_parallel_size = inference_config.get("tensor_parallel_size", 1)
															
 
																-    max_model_len = inference_config.get("max_model_len", 4096)
															
 
																-    max_num_seqs = inference_config.get("max_num_seqs", 256)
															
 
																-    quantization = inference_config.get("quantization")
															
 
																-    gpu_memory_utilization = inference_config.get("gpu_memory_utilization", 0.9)
															
 
																-    enforce_eager = inference_config.get("enforce_eager", False)
															
 
																-
															
 
																-    # Start the server in a separate process
															
 
																-    try:
															
 
																-        logger.info(f"Starting vLLM server with model {model_path}")
															
 
																-        result = start_vllm_server(
															
 
																-            model_path,
															
 
																-            port,
															
 
																-            host,
															
 
																-            tensor_parallel_size,
															
 
																-            max_model_len,
															
 
																-            max_num_seqs,
															
 
																-            quantization,
															
 
																-            gpu_memory_utilization,
															
 
																-            enforce_eager,
															
 
																-        )
															
 
																-        if result.returncode == 0:
															
 
																-            server_url = f"http://{host}:{port}/v1"
															
 
																-            logger.info(f"vLLM server started at {server_url}")
															
 
																-            return server_url
															
 
																-        else:
															
 
																-            logger.error(f"vLLM server failed to start")
															
 
																-            raise RuntimeError("vLLM server failed to start")
															
 
																-    except Exception as e:
															
 
																-        logger.error(f"Error starting vLLM server: {e}")
															
 
																-        raise
															
 
																-
															
 
																-
															
 
																-def run_inference(
															
 
																-    config_path: str, formatted_data_paths: List[str], model_path: str = ""
															
 
																-) -> str:
															
 
																-    """
															
 
																-    Run inference on the fine-tuned model.
															
 
																-
															
 
																-    Args:
															
 
																-        config_path: Path to the configuration file
															
 
																-        formatted_data_paths: Paths to the formatted data (for compatibility)
															
 
																-
															
 
																-    Returns:
															
 
																-        Path to the inference results
															
 
																-    """
															
 
																-    logger.info("=== Step 4: Running Inference ===")
															
 
																-
															
 
																-    config = read_config(config_path)
															
 
																-    inference_config = config.get("inference", {})
															
 
																-    formatter_config = config.get("formatter", {})
															
 
																-    output_dir = config.get("output_dir", "/tmp/finetune-pipeline/")
															
 
																-
															
 
																-    # Model parameters
															
 
																-    if model_path == "":
															
 
																-        model_path = inference_config.get("model_path", None)
															
 
																-    if model_path is None:
															
 
																-        raise ValueError("model_path must be specified in the config")
															
 
																-
															
 
																-    # Get inference data configuration
															
 
																-    inference_data_kwargs = inference_config.get("inference_data_kwargs", {})
															
 
																-    if not inference_data_kwargs or not inference_data_kwargs.get("data_path"):
															
 
																-        raise ValueError(
															
 
																-            "inference_data_kwargs with data_path must be specified in config"
															
 
																-        )
															
 
																-
															
 
																-    # Performance parameters
															
 
																-    gpu_memory_utilization = inference_config.get("gpu_memory_utilization", 0.95)
															
 
																-    max_model_len = inference_config.get("max_model_len", 512)
															
 
																-
															
 
																-    # Generation parameters
															
 
																-    max_tokens = inference_config.get("max_tokens", 100)
															
 
																-    temperature = inference_config.get("temperature", 0.0)
															
 
																-    top_p = inference_config.get("top_p", 1.0)
															
 
																-    seed = inference_config.get("seed")
															
 
																-    structured = inference_config.get("structured", False)
															
 
																-
															
 
																-    # Load inference data using the new function
															
 
																-    try:
															
 
																-        logger.info("Loading inference data...")
															
 
																-        from finetune_pipeline.inference.run_inference import load_inference_data
															
 
																-
															
 
																-        inference_data = load_inference_data(
															
 
																-            inference_data_kwargs=inference_data_kwargs,
															
 
																-            formatter_config=formatter_config,
															
 
																-        )
															
 
																-        logger.info(f"Loaded {len(inference_data)} samples for inference")
															
 
																-
															
 
																-    except Exception as e:
															
 
																-        logger.error(f"Failed to load inference data: {e}")
															
 
																-        raise
															
 
																-
															
 
																-    # Run inference
															
 
																-    try:
															
 
																-        logger.info(f"Running inference with model: {model_path}")
															
 
																-        results = run_vllm_batch_inference_on_dataset(
															
 
																-            inference_data=inference_data,
															
 
																-            model_path=model_path,
															
 
																-            temperature=temperature,
															
 
																-            top_p=top_p,
															
 
																-            max_tokens=max_tokens,
															
 
																-            seed=seed,
															
 
																-            structured=structured,
															
 
																-            gpu_memory_utilization=gpu_memory_utilization,
															
 
																-            max_model_len=max_model_len,
															
 
																-        )
															
 
																-
															
 
																-        # Save the results
															
 
																-        results_path = os.path.join(output_dir, "inference_results.json")
															
 
																-        save_inference_results(results, results_path)
															
 
																-
															
 
																-        logger.info(f"Inference complete. Results saved to {results_path}")
															
 
																-        return results_path
															
 
																-    except Exception as e:
															
 
																-        logger.error(f"Error during inference: {e}")
															
 
																-        raise
															
 
																+# def run_vllm_server(config_path: str, model_path: str) -> str:
															
 
																+#     """
															
 
																+#     Start the vLLM server.
															
 
																+
															
 
																+#     Args:
															
 
																+#         config_path: Path to the configuration file
															
 
																+#         model_path: Path to the fine-tuned model
															
 
																+
															
 
																+#     Returns:
															
 
																+#         URL of the vLLM server
															
 
																+#     """
															
 
																+#     logger.info("=== Step 3: Starting vLLM Server ===")
															
 
																+
															
 
																+#     # Read the configuration
															
 
																+#     config = read_config(config_path)
															
 
																+#     inference_config = config.get("inference", {})
															
 
																+
															
 
																+#     model_path = inference_config.get(
															
 
																+#         "model_path", "/home/ubuntu/yash-workspace/medgemma-4b-it"
															
 
																+#     )
															
 
																+
															
 
																+#     # # Update the model path in the inference config
															
 
																+#     # inference_config["model_path"] = model_path
															
 
																+
															
 
																+#     # Extract server parameters
															
 
																+#     port = inference_config.get("port", 8000)
															
 
																+#     host = inference_config.get("host", "0.0.0.0")
															
 
																+#     tensor_parallel_size = inference_config.get("tensor_parallel_size", 1)
															
 
																+#     max_model_len = inference_config.get("max_model_len", 4096)
															
 
																+#     max_num_seqs = inference_config.get("max_num_seqs", 256)
															
 
																+#     quantization = inference_config.get("quantization")
															
 
																+#     gpu_memory_utilization = inference_config.get("gpu_memory_utilization", 0.9)
															
 
																+#     enforce_eager = inference_config.get("enforce_eager", False)
															
 
																+
															
 
																+#     # Start the server in a separate process
															
 
																+#     try:
															
 
																+#         logger.info(f"Starting vLLM server with model {model_path}")
															
 
																+#         result = start_vllm_server(
															
 
																+#             model_path,
															
 
																+#             port,
															
 
																+#             host,
															
 
																+#             tensor_parallel_size,
															
 
																+#             max_model_len,
															
 
																+#             max_num_seqs,
															
 
																+#             quantization,
															
 
																+#             gpu_memory_utilization,
															
 
																+#             enforce_eager,
															
 
																+#         )
															
 
																+#         if result.returncode == 0:
															
 
																+#             server_url = f"http://{host}:{port}/v1"
															
 
																+#             logger.info(f"vLLM server started at {server_url}")
															
 
																+#             return server_url
															
 
																+#         else:
															
 
																+#             logger.error(f"vLLM server failed to start")
															
 
																+#             raise RuntimeError("vLLM server failed to start")
															
 
																+#     except Exception as e:
															
 
																+#         logger.error(f"Error starting vLLM server: {e}")
															
 
																+#         raise
															
 
																+
															
 
																+
															
 
																+# def run_inference(
															
 
																+#     config_path: str, formatted_data_paths: List[str], model_path: str = ""
															
 
																+# ) -> str:
															
 
																+#     """
															
 
																+#     Run inference on the fine-tuned model.
															
 
																+
															
 
																+#     Args:
															
 
																+#         config_path: Path to the configuration file
															
 
																+#         formatted_data_paths: Paths to the formatted data (for compatibility)
															
 
																+
															
 
																+#     Returns:
															
 
																+#         Path to the inference results
															
 
																+#     """
															
 
																+#     logger.info("=== Step 4: Running Inference ===")
															
 
																+
															
 
																+#     config = read_config(config_path)
															
 
																+#     inference_config = config.get("inference", {})
															
 
																+#     formatter_config = config.get("formatter", {})
															
 
																+#     output_dir = config.get("output_dir", "/tmp/finetune-pipeline/")
															
 
																+
															
 
																+#     # Model parameters
															
 
																+#     if model_path == "":
															
 
																+#         model_path = inference_config.get("model_path", None)
															
 
																+#     if model_path is None:
															
 
																+#         raise ValueError("model_path must be specified in the config")
															
 
																+
															
 
																+#     # Get inference data configuration
															
 
																+#     inference_data_kwargs = inference_config.get("inference_data_kwargs", {})
															
 
																+#     if not inference_data_kwargs or not inference_data_kwargs.get("data_path"):
															
 
																+#         raise ValueError(
															
 
																+#             "inference_data_kwargs with data_path must be specified in config"
															
 
																+#         )
															
 
																+
															
 
																+#     # Performance parameters
															
 
																+#     gpu_memory_utilization = inference_config.get("gpu_memory_utilization", 0.95)
															
 
																+#     max_model_len = inference_config.get("max_model_len", 512)
															
 
																+
															
 
																+#     # Generation parameters
															
 
																+#     max_tokens = inference_config.get("max_tokens", 100)
															
 
																+#     temperature = inference_config.get("temperature", 0.0)
															
 
																+#     top_p = inference_config.get("top_p", 1.0)
															
 
																+#     seed = inference_config.get("seed")
															
 
																+#     structured = inference_config.get("structured", False)
															
 
																+
															
 
																+#     # Load inference data using the new function
															
 
																+#     try:
															
 
																+#         logger.info("Loading inference data...")
															
 
																+#         from finetune_pipeline.inference.run_inference import load_inference_data
															
 
																+
															
 
																+#         inference_data = load_inference_data(
															
 
																+#             inference_data_kwargs=inference_data_kwargs,
															
 
																+#             formatter_config=formatter_config,
															
 
																+#         )
															
 
																+#         logger.info(f"Loaded {len(inference_data)} samples for inference")
															
 
																+
															
 
																+#     except Exception as e:
															
 
																+#         logger.error(f"Failed to load inference data: {e}")
															
 
																+#         raise
															
 
																+
															
 
																+#     # Run inference
															
 
																+#     try:
															
 
																+#         logger.info(f"Running inference with model: {model_path}")
															
 
																+#         results = run_vllm_batch_inference_on_dataset(
															
 
																+#             inference_data=inference_data,
															
 
																+#             model_path=model_path,
															
 
																+#             temperature=temperature,
															
 
																+#             top_p=top_p,
															
 
																+#             max_tokens=max_tokens,
															
 
																+#             seed=seed,
															
 
																+#             structured=structured,
															
 
																+#             gpu_memory_utilization=gpu_memory_utilization,
															
 
																+#             max_model_len=max_model_len,
															
 
																+#         )
															
 
																+
															
 
																+#         # Save the results
															
 
																+#         results_path = os.path.join(output_dir, "inference_results.json")
															
 
																+#         save_inference_results(results, results_path)
															
 
																+
															
 
																+#         logger.info(f"Inference complete. Results saved to {results_path}")
															
 
																+#         return results_path
															
 
																+#     except Exception as e:
															
 
																+#         logger.error(f"Error during inference: {e}")
															
 
																+#         raise
															
 
																 def run_pipeline(
															
@@ -366,18 +367,18 @@ def run_pipeline(
 
																         model_path = os.path.join(output_dir, "finetuned_model")
															
 
																     time.sleep(5)
															
 
																-    # Step 3: Inference
															
 
																-    if not skip_inference:
															
 
																-        try:
															
 
																-            results_path = run_inference(config_path, formatted_data_paths, model_path)
															
 
																-            logger.info(
															
 
																-                f"Pipeline completed successfully. Results saved to {results_path}"
															
 
																-            )
															
 
																-        except Exception as e:
															
 
																-            logger.error(f"Pipeline failed at inference step: {e}")
															
 
																-            sys.exit(1)
															
 
																-    else:
															
 
																-        logger.info("Skipping inference step")
															
 
																+    # # Step 3: Inference
															
 
																+    # if not skip_inference:
															
 
																+    #     try:
															
 
																+    #         results_path = run_inference(config_path, formatted_data_paths, model_path)
															
 
																+    #         logger.info(
															
 
																+    #             f"Pipeline completed successfully. Results saved to {results_path}"
															
 
																+    #         )
															
 
																+    #     except Exception as e:
															
 
																+    #         logger.error(f"Pipeline failed at inference step: {e}")
															
 
																+    #         sys.exit(1)
															
 
																+    # else:
															
 
																+    #     logger.info("Skipping inference step")
															
 
																     logger.info("Pipeline execution complete")