1 rok temu · cd31ee99ac
--- a/recipes/use_cases/github_triage/README.md
+++ b/recipes/use_cases/github_triage/README.md
@@ -0,0 +1,60 @@
 
				+# Automatic Issues Triaging with Llama
			
 
				+
			
 
				+This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.
			
 
				+
			
 
				+## Features
			
 
				+
			
 
				+The tool performs the following tasks:
			
 
				+
			
 
				+* Fetches issue threads from a specified repository
			
 
				+* Analyzes issue discussions and generates annotations such as category, severity, component affected, etc.
			
 
				+* Categorizes all issues by theme
			
 
				+* Synthesizes key challenges faced by users, along with probable causes and remediations
			
 
				+* Generates a high-level executive summary providing insights on diagnosing and improving the developer experience
			
 
				+
			
 
				+For a step-by-step look, check out the [walkthrough notebook](walkthrough.ipynb).
			
 
				+
			
 
				+## Getting Started
			
 
				+
			
 
				+
			
 
				+### Installation
			
 
				+
			
 
				+```bash
			
 
				+pip install -r requirements.txt
			
 
				+```
			
 
				+
			
 
				+### Setup
			
 
				+
			
 
				+1. **API Keys and Model Service**: Set your GitHub token for API calls. Some privileged information may not be available if you don't have push-access to the target repository.
			
 
				+2. **Model Configuration**: Set the appropriate values in the `model` section of [config.yaml](config.yaml) for using Llama via VLLM or Groq.
			
 
				+3. **JSON Schemas**: Edit the output JSON schemas in [config.yaml](config.yaml) to ensure consistency in outputs. VLLM supports JSON-decoding via the `guided_json` generation argument, while Groq requires passing the schema in the system prompt.
			
 
				+
			
 
				+### Running the Tool
			
 
				+
			
 
				+```bash
			
 
				+python triage.py --repo_name='meta-llama/llama-recipes' --start_date='2024-08-14' --end_date='2024-08-27'
			
 
				+```
			
 
				+
			
 
				+### Output
			
 
				+
			
 
				+The tool generates:
			
 
				+
			
 
				+* CSV files with `annotations`, `challenges`, and `overview` data, which can be persisted in SQL tables for downstream analyses and reporting.
			
 
				+* Graphical matplotlib plots of repository traffic, maintenance activity, and issue attributes.
			
 
				+* A PDF report for easier reading and sharing.
			
 
				+
			
 
				+## Config
			
 
				+
			
 
				+The tool's configuration is stored in [config.yaml](config.yaml). The following sections can be edited:
			
 
				+
			
 
				+* **Github Token**: Use a token that has push-access on the target repo.
			
 
				+* **model**: Specify the model service (`vllm` or `groq`) and set the endpoints and API keys as applicable.
			
 
				+* **prompts**: For each of the 3 tasks Llama does in this tool, we specify a prompt and an output JSON schema:
			
 
				+  * `parse_issue`: Parsing and generating annotations for the issues 
			
 
				+  * `assign_category`: Assigns each issue to a category specified in an enum in the corresponding JSON schema
			
 
				+  * `get_overview`: Generates a high-level executive summary and analysis of all the parsed and generated data
			
 
				+
			
 
				+## Troubleshooting
			
 
				+
			
 
				+* If you encounter issues with API calls, ensure that your GitHub token is set correctly and that you have the necessary permissions.
			
 
				+* If you encounter issues with the model service, check the configuration values in [config.yaml](config.yaml).
			
--- a/recipes/use_cases/github_triage/config.yaml
+++ b/recipes/use_cases/github_triage/config.yaml
--- a/recipes/use_cases/github_triage/llm.py
+++ b/recipes/use_cases/github_triage/llm.py
@@ -7,7 +7,8 @@ import json
 
				 from openai import OpenAI
			
 
				 import groq
			
 
				 
			
 
				-log = logging.getLogger(__name__)
			
 
				+logger = logging.getLogger(__name__)
			
 
				+logger.addHandler(logging.StreamHandler())
			
 
				 CFG = yaml.safe_load(open("config.yaml", "r"))
			
 
				 
			
 
				 class LlamaVLLM():
			
@@ -47,7 +48,7 @@ class LlamaGroq():
 
				     def __init__(self, key, model_id):
			
 
				         self.model_id = model_id
			
 
				         self.client = groq.Groq(api_key=key)
			
 
				-        print(f"Using Groq:{self.model_id} for inference")
			
 
				+        logger.debug(f"Using Groq:{self.model_id} for inference")
			
 
				 
			
 
				     def chat(
			
 
				         self, 
			
@@ -78,13 +79,13 @@ class LlamaGroq():
 
				                 output = completion.choices[0].message.content
			
 
				                 break
			
 
				             except groq.RateLimitError as e:
			
 
				-                wait = response.headers['X-Ratelimit-Reset']
			
 
				+                wait = e.response.headers['X-Ratelimit-Reset']
			
 
				                 response = e.response
			
 
				                 print(e)
			
 
				-                print(f"waiting for {wait} to prevent ratelimiting")
			
 
				+                print(f"[groq] waiting for {wait} to prevent ratelimiting")
			
 
				                 time.sleep(wait)
			
 
				-            except:
			
 
				-                print(f"inference failed for input: {inputs}")
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"INFERENCE FAILED with Error: {e.response.status_code}! for input:\n{inputs[-1]['content'][:300]}")
			
 
				 
			
 
				         return output
			
 
				 
			
@@ -107,7 +108,6 @@ def run_llm_inference(
 
				     Returns:
			
 
				     - Union[str, List[str]]: The response(s) from the LLM.
			
 
				     """
			
 
				-    log.info(f"[run_llm_inference] {prompt_name}")
			
 
				     
			
 
				     # initialize appropriate LLM accessor
			
 
				     if CFG['model']['use'] == 'vllm':
			
@@ -117,6 +117,8 @@ def run_llm_inference(
 
				     else:
			
 
				         raise ValueError("Invalid model type in config.yaml")
			
 
				     
			
 
				+    logger.debug(f"Running `{prompt_name}` inference with {CFG['model']['use']}")
			
 
				+    
			
 
				     _batch = True
			
 
				     if isinstance(inputs, str):
			
 
				         _batch = False
			
@@ -150,7 +152,7 @@ def run_llm_inference(
 
				                     responses_json.append(json.loads(r, strict=False))
			
 
				                     continue
			
 
				                 except json.JSONDecodeError:
			
 
				-                    log.error(f"Error decoding JSON: {r}")
			
 
				+                    logger.error(f"Error decoding JSON: {r}")
			
 
				             responses_json.append(None)
			
 
				         responses = responses_json
			
 
				 
			
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/annotated_issues.csv
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/challenges.csv
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/overview.csv
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/commits.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/engagement_sankey.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/expertise.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/sentiment.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/severity.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/plots/themes.png
--- a/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf
+++ b/recipes/use_cases/github_triage/output/pytorch/pytorch/2024-08-28_2024-08-28/report.pdf
--- a/recipes/use_cases/github_triage/pdf_report.py
+++ b/recipes/use_cases/github_triage/pdf_report.py
@@ -1,6 +1,10 @@
 
				 from fpdf import FPDF
			
 
				 import os
			
 
				 from datetime import datetime
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+logger.addHandler(logging.StreamHandler())
			
 
				 
			
 
				 class ReportPDF(FPDF):
			
 
				     def __init__(self, repository_name, start_date, end_date):
			
@@ -38,7 +42,12 @@ class ReportPDF(FPDF):
 
				         self.cell(0, 10, "Possible Causes", 0, 0, 'L')
			
 
				         self.ln(5)
			
 
				         self.set_font('Arial', '', 10)
			
 
				-        for x in challenge_data['possible_causes']:
			
 
				+
			
 
				+        x_list = challenge_data['possible_causes']
			
 
				+        if isinstance(x_list, str):
			
 
				+            x_list = x_list.split(',')
			
 
				+
			
 
				+        for x in x_list:
			
 
				             self.cell(0, 10, "* " + x, 0, 0, 'L')
			
 
				             self.ln(5)
			
 
				         self.ln(3)
			
@@ -48,7 +57,12 @@ class ReportPDF(FPDF):
 
				         self.cell(0, 10, "Remediations", 0, 0, 'L')
			
 
				         self.ln(5)
			
 
				         self.set_font('Arial', '', 10)
			
 
				-        for x in challenge_data['remediations']:
			
 
				+
			
 
				+        x_list = challenge_data['remediations']
			
 
				+        if isinstance(x_list, str):
			
 
				+            x_list = x_list.split(',')
			
 
				+
			
 
				+        for x in x_list:
			
 
				             self.cell(0, 10, "* " + x, 0, 0, 'L')
			
 
				             self.ln(5)
			
 
				         self.ln(3)
			
@@ -56,9 +70,15 @@ class ReportPDF(FPDF):
 
				         # affected issues
			
 
				         self.set_font('Arial', 'B', 10)
			
 
				         self.cell(30, 10, f"Affected issues: ", 0, 0, 'L')
			
 
				-        for iss in challenge_data['affected_issues']:
			
 
				+        
			
 
				+        x_list = challenge_data['affected_issues']
			
 
				+        if isinstance(x_list, str):
			
 
				+            x_list = x_list.split(',')
			
 
				+            
			
 
				+        for iss in x_list:
			
 
				             self.set_text_color(0,0,255)
			
 
				-            self.cell(8, 10, str(iss), 0, 0, 'L', link=f"https://github.com/{self.repo}/issues/{iss}")
			
 
				+            self.cell(12, 10, str(iss), 0, 0, 'L', link=f"https://github.com/{self.repo}/issues/{iss}")
			
 
				+            
			
 
				         self.set_text_color(0,0,0)
			
 
				         self.ln(15)
			
 
				 
			
@@ -75,6 +95,10 @@ class ReportPDF(FPDF):
 
				         self.cell(0, 8, 'Open Questions', 'B', 0, 'L')
			
 
				         self.ln(10)
			
 
				         self.set_font('Arial', '', 10)
			
 
				+
			
 
				+        if isinstance(open_questions, str):
			
 
				+            open_questions = open_questions.split(',')
			
 
				+                    
			
 
				         for qq in open_questions:
			
 
				             self.multi_cell(0, 5, "* " + qq, 0, 0, 'L')
			
 
				             self.ln(5)
			
@@ -87,6 +111,9 @@ class ReportPDF(FPDF):
 
				         for path in plot_paths:
			
 
				             if os.path.exists(path):
			
 
				                 self.add_plot(path)
			
 
				+            else:
			
 
				+                self.set_font('Arial', 'BI', 10)
			
 
				+                self.cell(0, 8, '< Plot not found, make sure you have push-acces to this repo >', 0, 0)
			
 
				         self.ln(10)
			
 
				             
			
 
				     def add_plot(self, img):
			
@@ -96,6 +123,9 @@ class ReportPDF(FPDF):
 
				     
			
 
				     
			
 
				 def create_report_pdf(repo_name, start_date, end_date, key_challenges_data, executive_summary, open_questions, out_folder):#, image1, image2):
			
 
				+    out_path = f'{out_folder}/report.pdf'
			
 
				+    logger.info(f"Creating PDF report at {out_path}")
			
 
				+    
			
 
				     pdf = ReportPDF(repo_name, start_date, end_date)
			
 
				     pdf.add_page()
			
 
				     pdf.exec_summary(executive_summary)
			
@@ -107,5 +137,5 @@ def create_report_pdf(repo_name, start_date, end_date, key_challenges_data, exec
 
				     pdf.add_graphs_section("Traffic in the last 2 weeks", [f'{out_folder}/plots/{x}.png' for x in ['views_clones','resources', 'referrers']])
			
 
				     pdf.add_page()
			
 
				     pdf.add_graphs_section("New issues in the last 2 weeks", [f'{out_folder}/plots/{x}.png' for x in ['themes', 'severity', 'sentiment', 'expertise']])
			
 
				-    pdf.output(f'{out_folder}/report.pdf', 'F')
			
 
				+    pdf.output(out_path, 'F')
			
 
				 
			
--- a/recipes/use_cases/github_triage/plots.py
+++ b/recipes/use_cases/github_triage/plots.py
@@ -2,7 +2,10 @@ import matplotlib.pyplot as plt
 
				 import pandas as pd
			
 
				 import plotly.graph_objects as go
			
 
				 from utils import fetch_github_endpoint
			
 
				+import logging
			
 
				 
			
 
				+logger = logging.getLogger(__name__)
			
 
				+logger.addHandler(logging.StreamHandler())
			
 
				 
			
 
				 def plot_views_clones(repo_name, out_folder):
			
 
				     def json_to_df(json_data, key):
			
@@ -124,7 +127,6 @@ def plot_themes(df, out_folder):
 
				     plt.figure(figsize=(10, 6))
			
 
				     plt.barh(data['Theme'], data['Count'])
			
 
				     plt.xlabel('Count', fontsize=18)
			
 
				-    # plt.ylabel('Theme', fontsize=18)
			
 
				     plt.title('Themes', fontsize=24)
			
 
				     plt.tight_layout()
			
 
				     plt.savefig(f'{out_folder}/themes.png', dpi=120)
			
@@ -164,11 +166,13 @@ def issue_activity_sankey(df, out_folder):
 
				 def draw_all_plots(repo_name, out_folder, overview):
			
 
				     func1 = [plot_views_clones, plot_high_traffic_resources, plot_high_traffic_referrers, plot_commit_activity]
			
 
				     func2 = [plot_user_expertise, plot_severity, plot_sentiment, plot_themes, issue_activity_sankey]
			
 
				+    logger.info("Plotting traffic trends...")
			
 
				     for func in func1:
			
 
				         try:
			
 
				             func(repo_name, out_folder)
			
 
				         except:
			
 
				             print(f"Github fetch failed for {func}. Make sure you have push-access to {repo_name}!")
			
 
				+    logger.info("Plotting issue trends...")
			
 
				     for func in func2:
			
 
				         func(overview, out_folder)
			
 
				     
			
--- a/recipes/use_cases/github_triage/triage.py
+++ b/recipes/use_cases/github_triage/triage.py
@@ -1,3 +1,4 @@
 
				+import logging
			
 
				 import os
			
 
				 from typing import Optional, Tuple, Dict
			
 
				 import pandas as pd
			
@@ -8,9 +9,12 @@ from utils import fetch_repo_issues, validate_df_values
 
				 from plots import draw_all_plots
			
 
				 from pdf_report import create_report_pdf
			
 
				 
			
 
				+logging.basicConfig(level=logging.INFO, filename='log.txt', format='%(asctime)s [%(levelname)-5.5s] %(message)s')
			
 
				+logger = logging.getLogger(__name__)
			
 
				+logger.addHandler(logging.StreamHandler())
			
 
				+
			
 
				 def generate_issue_annotations(
			
 
				-    issues_df: pd.DataFrame,
			
 
				-    save_folder: Optional[str] = None
			
 
				+    issues_df: pd.DataFrame
			
 
				 ) -> Tuple[pd.DataFrame, Dict[str, int]]:
			
 
				     """
			
 
				     Get the annotations for the given issues.
			
@@ -64,6 +68,8 @@ def generate_issue_annotations(
 
				         }
			
 
				         return themes, theme_count
			
 
				 
			
 
				+    logger.info(f"Generating annotations for {len(issues_df)}")
			
 
				+    
			
 
				     discussions = issues_df["discussion"].tolist()
			
 
				     metadata = run_llm_inference(
			
 
				         "parse_issue",
			
@@ -83,10 +89,7 @@ def generate_issue_annotations(
 
				 
			
 
				     themes, theme_count = _categorize_issues(issues_metadata_df)
			
 
				     issues_metadata_df["themes"] = themes
			
 
				-    
			
 
				-    if save_folder:
			
 
				-        save_df(issues_metadata_df, save_folder, 'annotated_issues')
			
 
				-        
			
 
				+
			
 
				     return issues_metadata_df, theme_count
			
 
				 
			
 
				 
			
@@ -111,7 +114,8 @@ def generate_executive_reports(
 
				     Returns:
			
 
				     - Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the challenges DataFrame and the overview DataFrame.
			
 
				     """
			
 
				-
			
 
				+    logger.info(f"Generating high-level summaries from annotations...")
			
 
				+    
			
 
				     report = {
			
 
				         "repo_name": repo_name,
			
 
				         "start_date": start_date,
			
@@ -181,6 +185,8 @@ def generate_executive_reports(
 
				             overview_df[f"{col}_{k}"] = v
			
 
				 
			
 
				     overview_df = pd.DataFrame(overview_df)
			
 
				+    
			
 
				+    logger.info(f"Identifying key-challenges faced by users...")
			
 
				 
			
 
				     challenges_df = {k: report[k] for k in ["repo_name", "start_date", "end_date"]}
			
 
				     challenges_df["key_challenge"] = [
			
@@ -198,18 +204,7 @@ def generate_executive_reports(
 
				     challenges_df = pd.DataFrame(challenges_df)
			
 
				 
			
 
				     return challenges_df, overview_df
			
 
				-
			
 
				-
			
 
				-def create_report(repo_name, start_date, end_date, challenges, overview, out_folder):
			
 
				-    
			
 
				-    # generate pdf report
			
 
				-    challenges = validate_df_values(challenges)
			
 
				-    overview = validate_df_values(overview)
			
 
				-    exec_summary = overview['executive_summary'].iloc[0]
			
 
				-    open_qs = overview['open_questions'].iloc[0]
			
 
				-    key_challenges_data = challenges[['key_challenge', 'possible_causes', 'remediations', 'affected_issues']].to_dict('records')
			
 
				-    create_report_pdf(repo_name, start_date, end_date, key_challenges_data, exec_summary, open_qs, out_folder)
			
 
				-       
			
 
				+   
			
 
				    
			
 
				 def main(repo_name, start_date, end_date):
			
 
				     out_folder = f'output/{repo_name}/{start_date}_{end_date}'
			
@@ -220,14 +215,14 @@ def main(repo_name, start_date, end_date):
 
				     
			
 
				     # Generate annotations and metadata
			
 
				     annotated_issues, theme_counts = generate_issue_annotations(issues_df)
			
 
				+    # Validate and save generated data
			
 
				+    annotated_issues = validate_df_values(annotated_issues, out_folder, 'annotated_issues')
			
 
				     
			
 
				     # Generate high-level analysis
			
 
				     challenges, overview = generate_executive_reports(annotated_issues, theme_counts, repo_name, start_date, end_date)
			
 
				-    
			
 
				     # Validate and save generated data
			
 
				-    annotated_issues = validate_df_values(annotated_issues)
			
 
				-    challenges = validate_df_values(challenges)
			
 
				-    overview = validate_df_values(overview)
			
 
				+    challenges = validate_df_values(challenges, out_folder, 'challenges')
			
 
				+    overview = validate_df_values(overview, out_folder, 'overview')
			
 
				     
			
 
				     # Create graphs and charts
			
 
				     plot_folder = out_folder + "/plots"
			
@@ -241,6 +236,5 @@ def main(repo_name, start_date, end_date):
 
				     create_report_pdf(repo_name, start_date, end_date, key_challenges_data, exec_summary, open_qs, out_folder)
			
 
				     
			
 
				 
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     fire.Fire(main)
			
--- a/recipes/use_cases/github_triage/utils.py
+++ b/recipes/use_cases/github_triage/utils.py
@@ -1,16 +1,20 @@
 
				 import requests
			
 
				 import yaml
			
 
				 import pandas as pd
			
 
				+import logging
			
 
				 
			
 
				+logger = logging.getLogger(__name__)
			
 
				+logger.addHandler(logging.StreamHandler())
			
 
				 
			
 
				 CFG = yaml.safe_load(open("config.yaml", "r"))
			
 
				 
			
 
				 
			
 
				 def fetch_github_endpoint(url):
			
 
				     headers = {
			
 
				-        "Authorization": f"Bearer {CFG['tokens']['github']}",
			
 
				+        "Authorization": f"Bearer {CFG['github_token']}",
			
 
				         "Content-Type": "application/json"
			
 
				     }
			
 
				+    logger.debug(f"Requesting url: {url}")
			
 
				     response = requests.get(url, headers=headers, timeout=10)
			
 
				     return response
			
 
				 
			
@@ -27,13 +31,12 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
 
				     url = f"https://api.github.com/search/issues?per_page=100&sort=created&order=asc&q=repo:{repo}+is:issue{time_filter}"
			
 
				 
			
 
				     samples = []
			
 
				-    print(f"[{repo}/issues] Fetching page: ", end=" ", flush=True)
			
 
				+    logger.info(f"Fetching issues on {repo} from {start_date} to {end_date}")
			
 
				 
			
 
				     while True:
			
 
				         response = fetch_github_endpoint(url)
			
 
				-
			
 
				+        
			
 
				         if response.status_code == 200:
			
 
				-            print(". ", end=" ", flush=True)
			
 
				             issues = response.json()['items']
			
 
				             for issue in issues:
			
 
				                 if issue['body'] is None:
			
@@ -58,9 +61,9 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
 
				             else:
			
 
				                 break
			
 
				         else:
			
 
				-            print(f"Error: {response.status_code}")
			
 
				-            break
			
 
				-
			
 
				+            raise Exception(f"Fetching issues failed with Error: {response.status_code}")
			
 
				+        print()
			
 
				+        
			
 
				     rows = [{
			
 
				         "repo_name": repo,
			
 
				         "number": d['number'],
			
@@ -71,6 +74,8 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
 
				         "discussion": d['discussion'],
			
 
				     } for d in samples]
			
 
				     
			
 
				+    logger.info(f"Fetched {len(samples)} issues on {repo} from {start_date} to {end_date}")
			
 
				+    
			
 
				     return pd.DataFrame(rows)
			
 
				 
			
 
				 
			
@@ -88,10 +93,12 @@ def fetch_repo_stats(repo):
 
				 
			
 
				 def validate_df_values(df, out_folder=None, name=None):
			
 
				     df.columns = df.columns.str.lower().str.replace(" ", "_").str.replace("-", "_")
			
 
				-    for c in df.columns:
			
 
				-        x = df[c].iloc[0]
			
 
				-        if isinstance(x, str) and '[' in x:
			
 
				-            df[c] = df[c].apply(lambda x: eval(x))
			
 
				+    # for c in df.columns:
			
 
				+    #     x = df[c].iloc[0]
			
 
				+    #     if isinstance(x, str) and '[' in x:
			
 
				+    #         df[c] = df[c].apply(lambda x: eval(x))
			
 
				     if out_folder is not None:
			
 
				-        df.to_csv(f"{out_folder}/{name}.csv", index=False)
			
 
				+        path = f"{out_folder}/{name}.csv"
			
 
				+        df.to_csv(path, index=False)
			
 
				+        logger.info(f"Data saved to {path}")
			
 
				     return df
			
--- a/recipes/use_cases/github_triage/walkthrough.ipynb
+++ b/recipes/use_cases/github_triage/walkthrough.ipynb