| 1234567891011121314151617181920212223242526272829303132333435363738394041424344 | import torchfrom transformers import AutoTokenizer, AutoModelfrom llama_index.core.base.embeddings.base import BaseEmbeddingdevice = "cuda" if torch.cuda.is_available() else "cpu"# Load tokenizer and modelmodel_id = "jinaai/jina-embeddings-v2-base-en" #"jinaai/jina-embeddings-v3"tokenizer = AutoTokenizer.from_pretrained(model_id)model = AutoModel.from_pretrained(model_id, trust_remote_code=True).to(device)# Define function to generate embeddingsdef get_embedding(text):    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)    with torch.no_grad():        outputs = model(**inputs)        return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy() #.to(torch.float32)class LocalJinaEmbedding(BaseEmbedding):    def __init__(self):        super().__init__()    def _get_text_embedding(self, text):        return get_embedding(text).tolist()  # Ensure compatibility with LlamaIndex    def _get_query_embedding(self, query):        return get_embedding(query).tolist()        async def _aget_query_embedding(self, query: str) -> list:        return get_embedding(query).tolist()def test(): #this did not produce reasonable results for some reason    #!pip install llama-index-embeddings-huggingface    from llama_index.embeddings.huggingface import HuggingFaceEmbedding     embed_model = HuggingFaceEmbedding(model_name=model_id)if __name__=="__main__":	emb = get_embedding("hi there")	print(emb.shape)
 |