Browse Source

improve readability, logging, fix spellcheck test

Suraj Subramanian 7 tháng trước cách đây
mục cha
commit
713206fdab

+ 4 - 0
.github/scripts/spellcheck_conf/wordlist.txt

@@ -1451,3 +1451,7 @@ openhathi
 sarvam
 subtask
 acc
+Triaging
+matplotlib
+remediations
+walkthrough

+ 7 - 4
recipes/use_cases/github_triage/llm.py

@@ -4,6 +4,7 @@ import yaml
 import time
 import json
 
+from tqdm import tqdm
 from openai import OpenAI
 import groq
 
@@ -37,7 +38,7 @@ class LlamaVLLM():
             )
             output = response.choices[0].message
         except Exception as e:
-            log.error(
+            logger.error(
                 f"FAILED to generate inference for input {inputs}\nError: {str(e)}"
             )
             output = None
@@ -85,7 +86,8 @@ class LlamaGroq():
                 print(f"[groq] waiting for {wait} to prevent ratelimiting")
                 time.sleep(wait)
             except Exception as e:
-                logger.error(f"INFERENCE FAILED with Error: {e.response.status_code}! for input:\n{inputs[-1]['content'][:300]}")
+                logger.error(f"INFERENCE FAILED with Error: {e.response.status_code} for input:\n{inputs[-1]['content'][:300]}")
+                break
 
         return output
 
@@ -141,7 +143,8 @@ def run_llm_inference(
         )
 
     responses = [
-        LLM.chat(i, generation_kwargs, guided_decode_json_schema) for i in inputs
+        LLM.chat(i, generation_kwargs, guided_decode_json_schema) 
+        for i in tqdm(inputs, desc=f"Inference[{prompt_name}]")
     ]
 
     if guided_decode_json_schema is not None:
@@ -159,4 +162,4 @@ def run_llm_inference(
     if not _batch:
         responses = responses[0]
 
-    return responses
+    return responses

+ 1 - 1
recipes/use_cases/github_triage/triage.py

@@ -68,7 +68,7 @@ def generate_issue_annotations(
         }
         return themes, theme_count
 
-    logger.info(f"Generating annotations for {len(issues_df)}")
+    logger.info(f"Generating annotations for {len(issues_df)} issues")
     
     discussions = issues_df["discussion"].tolist()
     metadata = run_llm_inference(

+ 2 - 8
recipes/use_cases/github_triage/utils.py

@@ -31,7 +31,6 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
     url = f"https://api.github.com/search/issues?per_page=100&sort=created&order=asc&q=repo:{repo}+is:issue{time_filter}"
 
     samples = []
-    logger.info(f"Fetching issues on {repo} from {start_date} to {end_date}")
 
     while True:
         response = fetch_github_endpoint(url)
@@ -61,8 +60,7 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
             else:
                 break
         else:
-            raise Exception(f"Fetching issues failed with Error: {response.status_code}")
-        print()
+            raise Exception(f"Fetching issues failed with Error: {response.status_code} on url {url}")
         
     rows = [{
         "repo_name": repo,
@@ -93,12 +91,8 @@ def fetch_repo_stats(repo):
 
 def validate_df_values(df, out_folder=None, name=None):
     df.columns = df.columns.str.lower().str.replace(" ", "_").str.replace("-", "_")
-    # for c in df.columns:
-    #     x = df[c].iloc[0]
-    #     if isinstance(x, str) and '[' in x:
-    #         df[c] = df[c].apply(lambda x: eval(x))
     if out_folder is not None:
         path = f"{out_folder}/{name}.csv"
         df.to_csv(path, index=False)
         logger.info(f"Data saved to {path}")
-    return df
+    return df

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 654 - 562
recipes/use_cases/github_triage/walkthrough.ipynb