Hamid Shojanazeri 1 年之前
父节点
当前提交
7ec3deac7b
共有 1 个文件被更改,包括 2 次插入3 次删除
  1. 2 3
      inference/inference.py

+ 2 - 3
inference/inference.py

@@ -33,6 +33,7 @@ def main(
     enable_azure_content_safety: bool=False, # Enable safety check with Azure content safety api
     enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
     enable_saleforce_content_safety: bool=True, # Enable safety check woth Saleforce safety flan t5
+    max_padding_length: int=0, # specifies the max padding length to pad the context/ prompt
     **kwargs
 ):
     if prompt_file is not None:
@@ -85,10 +86,8 @@ def main(
         model = load_peft_model(model, peft_model)
 
     model.eval()
-    max_length = 350
-    batch = tokenizer(user_prompt, pad_to_max_length=True, max_length=max_length, return_tensors="pt")
+    batch = tokenizer(user_prompt, pad_to_max_length=True, max_length=max_padding_length, return_tensors="pt")
     batch = {k: v.to("cuda") for k, v in batch.items()}
-    print("lentgh******", batch["input_ids"].size())
     start = time.perf_counter()
     with torch.no_grad():
         outputs = model.generate(