mteb/scifact
Viewer • Updated • 7.55k • 21.1k • 5
How to use Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT", trust_remote_code=True)
sentences = [
"The weather is lovely today.",
"It's so sunny outside!",
"He drove to the stadium."
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]How to use Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT with Transformers:
# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT", trust_remote_code=True, dtype="auto")For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our Github.
📌 Tip: For NV-Embed-V2, using Transformers versions later than 4.47.0 may lead to performance degradation, as model_type=bidir_mistral in config.json is no longer supported.
We recommend using Transformers 4.47.0.
You can evaluate this model loaded by Sentence Transformers with the following code snippet:
import mteb
from sentence_transformers import SparseEncoder
model = SparseEncoder(
"Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT ",
trust_remote_code=True
)
model.prompts = {
"SciFact-query": "Instrcut: Given a scientific claim, retrieve documents that support or refute the claim\nQuery:"
}
task = mteb.get_tasks(tasks=["SciFact"])
evaluation = mteb.MTEB(tasks=task)
evaluation.run(
model,
eval_splits=["test"],
output_folder="./results/SciFact",
show_progress_bar=True
encode_kwargs={"convert_to_sparse_tensor": False, "batch_size": 8},
) # MTEB don't support sparse tensors yet, so we need to convert to dense tensors
@inproceedings{wenbeyond,
title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
author={Wen, Tiansheng and Wang, Yifei and Zeng, Zequn and Peng, Zhong and Su, Yudi and Liu, Xinyang and Chen, Bo and Liu, Hongwei and Jegelka, Stefanie and You, Chenyu},
booktitle={Forty-second International Conference on Machine Learning}
}
from sentence_transformers import SentenceTransformer model = SentenceTransformer("Y-Research-Group/CSR-NV_Embed_v2-Retrieval-SciFACT", trust_remote_code=True) sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3]