Allen 1 yıl önce
ebeveyn
işleme
228d7103ef

+ 7 - 7
research/long-context-llama/H2O/exp.sh

@@ -4,13 +4,13 @@
 # --model-name meta-llama/Llama-2-7b-hf 
 # 20.46/4.9/15.11
 
-# CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
-# --input-path data/summarization/xsum.jsonl \
-# --output-path summarization_output/xsum_h2o.jsonl \
-# --model-name meta-llama/Llama-2-7b-hf \
-# --enable_h2o_generation 
-
 CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
 --input-path data/summarization/xsum.jsonl \
 --output-path summarization_output/xsum_h2o.jsonl \
---model-name meta-llama/Llama-2-7b-hf
+--model-name meta-llama/Llama-2-7b-hf \
+--enable_h2o_generation 
+
+# CUDA_VISIBLE_DEVICES=$1 python -u generation.py \
+# --input-path data/summarization/xsum.jsonl \
+# --output-path summarization_output/xsum_h2o.jsonl \
+# --model-name meta-llama/Llama-2-7b-hf

+ 3 - 0
research/long-context-llama/H2O/utils_llama.py

@@ -300,6 +300,9 @@ class H2OLlamaAttention(nn.Module):
 
         if not output_attentions:
             attn_weights = None
+        
+        if layer_idx == 0:
+            print(past_key_value.key_cache[0].shape, past_key_value.value_cache[0].shape, past_key_value.accumulated_attention_scores[0][0,0,0].item())
 
         return attn_output, attn_weights, past_key_value