2 年之前 · dfd56f76ec
--- a/research/long-context-llama/H2O/utils_llama.py
+++ b/research/long-context-llama/H2O/utils_llama.py
@@ -301,7 +301,7 @@ class H2OLlamaAttention(nn.Module):
 
				         if not output_attentions:
			
 
				             attn_weights = None
			
 
				         
			
 
				-        if layer_idx == 0:
			
 
				+        if self.layer_idx == 0:
			
 
				             print(past_key_value.key_cache[0].shape, past_key_value.value_cache[0].shape, past_key_value.accumulated_attention_scores[0][0,0,0].item())
			
 
				 
			
 
				         return attn_output, attn_weights, past_key_value