Allen 1 年之前
父節點
當前提交
9fb1080e17

File diff suppressed because it is too large
+ 13 - 0
research/long-context-llama/H2O/README.md


+ 7 - 7
research/long-context-llama/H2O/exp.sh

@@ -4,13 +4,13 @@
 # --model-name meta-llama/Llama-2-7b-hf 
 # 20.46/4.9/15.11
 
-# CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
-# --input-path data/summarization/xsum.jsonl \
-# --output-path summarization_output/xsum_h2o.jsonl \
-# --model-name meta-llama/Llama-2-7b-hf \
-# --enable_h2o_generation 
-
 CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
 --input-path data/summarization/xsum.jsonl \
 --output-path summarization_output/xsum_h2o.jsonl \
---model-name meta-llama/Llama-2-7b-hf
+--model-name meta-llama/Llama-2-7b-hf \
+--enable_h2o_generation 
+
+# CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
+# --input-path data/summarization/xsum.jsonl \
+# --output-path summarization_output/xsum_h2o.jsonl \
+# --model-name meta-llama/Llama-2-7b-hf

+ 1 - 1
research/long-context-llama/H2O/generation.py

@@ -34,7 +34,7 @@ if __name__ == '__main__':
 
     parser.add_argument("--enable_h2o_generation", action='store_true')
     parser.add_argument("--num_heavy_hitter_tokens", type=int, default=256)
-    parser.add_argument("--num_window_length", type=int, default=1024)
+    parser.add_argument("--num_window_length", type=int, default=512)
 
     parser.add_argument("--enable_position_rolling", action='store_true')