Visual Document Retrieval
Transformers
Safetensors
Vietnamese
English
Chinese
internvl_chat
feature-extraction
custom_code
Instructions to use 5CD-AI/Vintern-Embedding-1B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use 5CD-AI/Vintern-Embedding-1B with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("5CD-AI/Vintern-Embedding-1B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from abc import ABC, abstractmethod | |
| from typing import List, Optional, Tuple, Union | |
| import torch | |
| from PIL import Image | |
| from transformers import BatchEncoding, BatchFeature | |
| from .torch_utils import get_torch_device | |
| class BaseVisualRetrieverProcessor(ABC): | |
| """ | |
| Base class for visual retriever processors. | |
| """ | |
| def process_images( | |
| self, | |
| images: List[Image.Image], | |
| ) -> Union[BatchFeature, BatchEncoding]: | |
| pass | |
| def process_queries( | |
| self, | |
| queries: List[str], | |
| max_length: int = 50, | |
| suffix: Optional[str] = None, | |
| ) -> Union[BatchFeature, BatchEncoding]: | |
| pass | |
| def score( | |
| self, | |
| qs: List[torch.Tensor], | |
| ps: List[torch.Tensor], | |
| device: Optional[Union[str, torch.device]] = None, | |
| **kwargs, | |
| ) -> torch.Tensor: | |
| pass | |
| def score_single_vector( | |
| qs: List[torch.Tensor], | |
| ps: List[torch.Tensor], | |
| device: Optional[Union[str, torch.device]] = None, | |
| ) -> torch.Tensor: | |
| """ | |
| Compute the dot product score for the given single-vector query and passage embeddings. | |
| """ | |
| device = device or get_torch_device("auto") | |
| if len(qs) == 0: | |
| raise ValueError("No queries provided") | |
| if len(ps) == 0: | |
| raise ValueError("No passages provided") | |
| qs_stacked = torch.stack(qs).to(device) | |
| ps_stacked = torch.stack(ps).to(device) | |
| scores = torch.einsum("bd,cd->bc", qs_stacked, ps_stacked) | |
| assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}" | |
| scores = scores.to(torch.float32) | |
| return scores | |
| def score_multi_vector( | |
| qs: List[torch.Tensor], | |
| ps: List[torch.Tensor], | |
| batch_size: int = 128, | |
| device: Optional[Union[str, torch.device]] = None, | |
| ) -> torch.Tensor: | |
| """ | |
| Compute the MaxSim score (ColBERT-like) for the given multi-vector query and passage embeddings. | |
| """ | |
| device = device or get_torch_device("auto") | |
| if len(qs) == 0: | |
| raise ValueError("No queries provided") | |
| if len(ps) == 0: | |
| raise ValueError("No passages provided") | |
| scores_list: List[torch.Tensor] = [] | |
| for i in range(0, len(qs), batch_size): | |
| scores_batch = [] | |
| qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to( | |
| device | |
| ) | |
| for j in range(0, len(ps), batch_size): | |
| ps_batch = torch.nn.utils.rnn.pad_sequence( | |
| ps[j : j + batch_size], batch_first=True, padding_value=0 | |
| ).to(device) | |
| scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2)) | |
| scores_batch = torch.cat(scores_batch, dim=1).cpu() | |
| scores_list.append(scores_batch) | |
| scores = torch.cat(scores_list, dim=0) | |
| assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}" | |
| scores = scores.to(torch.float32) | |
| return scores | |
| def get_n_patches( | |
| self, | |
| image_size: Tuple[int, int], | |
| patch_size: int = 14, | |
| *args, | |
| **kwargs, | |
| ) -> Tuple[int, int]: | |
| """ | |
| Get the number of patches (n_patches_x, n_patches_y) that will be used to process an | |
| image of size (height, width) with the given patch size. | |
| """ | |
| pass | |