1 năm trước cách đây · 713206fdab
--- a/.github/scripts/spellcheck_conf/wordlist.txt
+++ b/.github/scripts/spellcheck_conf/wordlist.txt
@@ -1451,3 +1451,7 @@ openhathi
 
				 sarvam
			
 
				 subtask
			
 
				 acc
			
 
				+Triaging
			
 
				+matplotlib
			
 
				+remediations
			
 
				+walkthrough
			
--- a/recipes/use_cases/github_triage/llm.py
+++ b/recipes/use_cases/github_triage/llm.py
@@ -4,6 +4,7 @@ import yaml
 
				 import time
			
 
				 import json
			
 
				 
			
 
				+from tqdm import tqdm
			
 
				 from openai import OpenAI
			
 
				 import groq
			
 
				 
			
@@ -37,7 +38,7 @@ class LlamaVLLM():
 
				             )
			
 
				             output = response.choices[0].message
			
 
				         except Exception as e:
			
 
				-            log.error(
			
 
				+            logger.error(
			
 
				                 f"FAILED to generate inference for input {inputs}\nError: {str(e)}"
			
 
				             )
			
 
				             output = None
			
@@ -85,7 +86,8 @@ class LlamaGroq():
 
				                 print(f"[groq] waiting for {wait} to prevent ratelimiting")
			
 
				                 time.sleep(wait)
			
 
				             except Exception as e:
			
 
				-                logger.error(f"INFERENCE FAILED with Error: {e.response.status_code}! for input:\n{inputs[-1]['content'][:300]}")
			
 
				+                logger.error(f"INFERENCE FAILED with Error: {e.response.status_code} for input:\n{inputs[-1]['content'][:300]}")
			
 
				+                break
			
 
				 
			
 
				         return output
			
 
				 
			
@@ -141,7 +143,8 @@ def run_llm_inference(
 
				         )
			
 
				 
			
 
				     responses = [
			
 
				-        LLM.chat(i, generation_kwargs, guided_decode_json_schema) for i in inputs
			
 
				+        LLM.chat(i, generation_kwargs, guided_decode_json_schema) 
			
 
				+        for i in tqdm(inputs, desc=f"Inference[{prompt_name}]")
			
 
				     ]
			
 
				 
			
 
				     if guided_decode_json_schema is not None:
			
@@ -159,4 +162,4 @@ def run_llm_inference(
 
				     if not _batch:
			
 
				         responses = responses[0]
			
 
				 
			
 
				-    return responses
			
 
				+    return responses
			
--- a/recipes/use_cases/github_triage/triage.py
+++ b/recipes/use_cases/github_triage/triage.py
@@ -68,7 +68,7 @@ def generate_issue_annotations(
 
				         }
			
 
				         return themes, theme_count
			
 
				 
			
 
				-    logger.info(f"Generating annotations for {len(issues_df)}")
			
 
				+    logger.info(f"Generating annotations for {len(issues_df)} issues")
			
 
				     
			
 
				     discussions = issues_df["discussion"].tolist()
			
 
				     metadata = run_llm_inference(
			
--- a/recipes/use_cases/github_triage/utils.py
+++ b/recipes/use_cases/github_triage/utils.py
@@ -31,7 +31,6 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
 
				     url = f"https://api.github.com/search/issues?per_page=100&sort=created&order=asc&q=repo:{repo}+is:issue{time_filter}"
			
 
				 
			
 
				     samples = []
			
 
				-    logger.info(f"Fetching issues on {repo} from {start_date} to {end_date}")
			
 
				 
			
 
				     while True:
			
 
				         response = fetch_github_endpoint(url)
			
@@ -61,8 +60,7 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
 
				             else:
			
 
				                 break
			
 
				         else:
			
 
				-            raise Exception(f"Fetching issues failed with Error: {response.status_code}")
			
 
				-        print()
			
 
				+            raise Exception(f"Fetching issues failed with Error: {response.status_code} on url {url}")
			
 
				         
			
 
				     rows = [{
			
 
				         "repo_name": repo,
			
@@ -93,12 +91,8 @@ def fetch_repo_stats(repo):
 
				 
			
 
				 def validate_df_values(df, out_folder=None, name=None):
			
 
				     df.columns = df.columns.str.lower().str.replace(" ", "_").str.replace("-", "_")
			
 
				-    # for c in df.columns:
			
 
				-    #     x = df[c].iloc[0]
			
 
				-    #     if isinstance(x, str) and '[' in x:
			
 
				-    #         df[c] = df[c].apply(lambda x: eval(x))
			
 
				     if out_folder is not None:
			
 
				         path = f"{out_folder}/{name}.csv"
			
 
				         df.to_csv(path, index=False)
			
 
				         logger.info(f"Data saved to {path}")
			
 
				-    return df
			
 
				+    return df
			
--- a/recipes/use_cases/github_triage/walkthrough.ipynb
+++ b/recipes/use_cases/github_triage/walkthrough.ipynb