metadata
license: mit
datasets:
- mteb/mtop_intent
language:
- en
pipeline_tag: text-classification
library_name: sentence-transformers
tags:
- mteb
- text
- transformers
- text-embeddings-inference
- sparse-encoder
- sparse
- csr
model-index:
- name: CSR
results:
- dataset:
name: MTEB MTOPIntentClassification (en)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: en
split: test
languages:
- eng-Latn
metrics:
- type: accuracy
value: 0.906407
- type: f1
value: 0.694457
- type: f1_weighted
value: 0.917326
- type: main_score
value: 0.906407
task:
type: Classification
- dataset:
name: MTEB MTOPIntentClassification (de)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: de
split: test
languages:
- deu-Latn
metrics:
- type: accuracy
value: 0.851
- type: f1
value: 0.601279
- type: f1_weighted
value: 0.863969
- type: main_score
value: 0.851
task:
type: Classification
- dataset:
name: MTEB MTOPIntentClassification (es)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: es
split: test
languages:
- spa-Latn
metrics:
- type: accuracy
value: 0.906738
- type: f1
value: 0.642295
- type: f1_weighted
value: 0.910882
- type: main_score
value: 0.906738
task:
type: Classification
- dataset:
name: MTEB MTOPIntentClassification (fr)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: fr
split: test
languages:
- fra-Latn
metrics:
- type: accuracy
value: 0.849045
- type: f1
value: 0.59923
- type: f1_weighted
value: 0.863301
- type: main_score
value: 0.849045
task:
type: Classification
- dataset:
name: MTEB MTOPIntentClassification (hi)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: hi
split: test
languages:
- hin-Deva
metrics:
- type: accuracy
value: 0.751094
- type: f1
value: 0.44095
- type: f1_weighted
value: 0.762567
- type: main_score
value: 0.751094
task:
type: Classification
- dataset:
name: MTEB MTOPIntentClassification (th)
type: mteb/mtop_intent
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
config: th
split: test
languages:
- tha-Thai
metrics:
- type: accuracy
value: 0.75566
- type: f1
value: 0.498529
- type: f1_weighted
value: 0.76994
- type: main_score
value: 0.75566
task:
type: Classification
base_model:
- nvidia/NV-Embed-v2
For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our Github.
Usage
📌 Tip: For NV-Embed-V2, using Transformers versions later than 4.47.0 may lead to performance degradation, as model_type=bidir_mistral in config.json is unsupported is no longer supported.
We recommend using Transformers 4.47.0.
Sentence Transformers Usage
You can evaluate this model loaded by Sentence Transformers with the following code snippet:
import mteb
from sentence_transformers import SparseEncoder
model = SparseEncoder(
"Y-Research-Group/CSR-NV_Embed_v2-Classification-MTOPIntent",
trust_remote_code=True
)
model.prompts = {
"MTOPIntentClassification": "Instruct: Classify the intent of the given utterance in task-oriented conversation\nQuery:"
}
task = mteb.get_tasks(tasks=["MTOPIntentClassification"])
evaluation = mteb.MTEB(tasks=task)
evaluation.run(model,
eval_splits=["test"],
output_folder="./results/MTOPIntentClassification",
show_progress_bar=True
encode_kwargs={"convert_to_sparse_tensor": False, "batch_size": 8},
) # MTEB don't support sparse tensors yet, so we need to convert to dense tensors
Citation
@inproceedings{wenbeyond,
title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
author={Wen, Tiansheng and Wang, Yifei and Zeng, Zequn and Peng, Zhong and Su, Yudi and Liu, Xinyang and Chen, Bo and Liu, Hongwei and Jegelka, Stefanie and You, Chenyu},
booktitle={Forty-second International Conference on Machine Learning}
}