Skip to content

PairClassification

  • Number of tasks: 44

ArEntail

A manually-curated Arabic natural language inference dataset from news headlines.

Dataset: arbml/ArEntail β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ara News, Written human-annotated found
Citation
@article{obeidat2024arentail,
  author = {Obeidat, Rasha and Al-Harahsheh, Yara and Al-Ayyoub, Mahmoud and Gharaibeh, Maram},
  journal = {Language Resources and Evaluation},
  pages = {1--27},
  publisher = {Springer},
  title = {ArEntail: manually-curated Arabic natural language inference dataset from news headlines},
  year = {2024},
}

ArmenianParaphrasePC

asparius/Armenian-Paraphrase-PC

Dataset: asparius/Armenian-Paraphrase-PC β€’ License: apache-2.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap hye News, Written derived found
Citation
@misc{malajyan2020arpa,
  archiveprefix = {arXiv},
  author = {Arthur Malajyan and Karen Avetisyan and Tsolak Ghukasyan},
  eprint = {2009.12615},
  primaryclass = {cs.CL},
  title = {ARPA: Armenian Paraphrase Detection Corpus and Models},
  year = {2020},
}

Assin2RTE

Recognizing Textual Entailment part of the ASSIN 2, an evaluation shared task collocated with STIL 2019.

Dataset: nilc-nlp/assin2 β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap por Written human-annotated found
Citation
@inproceedings{real2020assin,
  author = {Real, Livy and Fonseca, Erick and Oliveira, Hugo Goncalo},
  booktitle = {International Conference on Computational Processing of the Portuguese Language},
  organization = {Springer},
  pages = {406--412},
  title = {The assin 2 shared task: a quick overview},
  year = {2020},
}

CDSC-E

Compositional Distributional Semantics Corpus for textual entailment.

Dataset: PL-MTEB/cdsce-pairclassification β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap pol Written human-annotated found
Citation
@inproceedings{wroblewska-krasnowska-kieras-2017-polish,
  abstract = {The paper presents a procedure of building an evaluation dataset. for the validation of compositional distributional semantics models estimated for languages other than English. The procedure generally builds on steps designed to assemble the SICK corpus, which contains pairs of English sentences annotated for semantic relatedness and entailment, because we aim at building a comparable dataset. However, the implementation of particular building steps significantly differs from the original SICK design assumptions, which is caused by both lack of necessary extraneous resources for an investigated language and the need for language-specific transformation rules. The designed procedure is verified on Polish, a fusional language with a relatively free word order, and contributes to building a Polish evaluation dataset. The resource consists of 10K sentence pairs which are human-annotated for semantic relatedness and entailment. The dataset may be used for the evaluation of compositional distributional semantics models of Polish.},
  address = {Vancouver, Canada},
  author = {Wr{\'o}blewska, Alina  and
Krasnowska-Kiera{\'s}, Katarzyna},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  doi = {10.18653/v1/P17-1073},
  editor = {Barzilay, Regina  and
Kan, Min-Yen},
  month = jul,
  pages = {784--792},
  publisher = {Association for Computational Linguistics},
  title = {{P}olish evaluation dataset for compositional distributional semantics models},
  url = {https://aclanthology.org/P17-1073},
  year = {2017},
}

CExaPPC

ExaPPC is a large paraphrase corpus consisting of monolingual sentence-level paraphrases using different sources.

Dataset: PNLPhub/C-ExaPPC β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Social, Web derived found
Citation
@inproceedings{9786243,
  author = {Sadeghi, Reyhaneh and Karbasi, Hamed and Akbari, Ahmad},
  booktitle = {2022 8th International Conference on Web Research (ICWR)},
  doi = {10.1109/ICWR54782.2022.9786243},
  keywords = {Data mining;Task analysis;Paraphrase Identification;Semantic Similarity;Deep Learning;Paraphrasing Corpora},
  number = {},
  pages = {168-175},
  title = {ExaPPC: a Large-Scale Persian Paraphrase Detection Corpus},
  volume = {},
  year = {2022},
}

CTKFactsNLI

Czech Natural Language Inference dataset of around 3K evidence-claim pairs labelled with SUPPORTS, REFUTES or NOT ENOUGH INFO veracity labels. Extracted from a round of fact-checking experiments.

Dataset: ctu-aic/ctkfacts_nli β€’ License: cc-by-sa-3.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ces News, Written human-annotated found
Citation
@article{ullrich2023csfever,
  author = {Ullrich, Herbert and Drchal, Jan and R{\\`y}par, Martin and Vincourov{\\'a}, Hana and Moravec, V{\\'a}clav},
  journal = {Language Resources and Evaluation},
  number = {4},
  pages = {1571--1605},
  publisher = {Springer},
  title = {CsFEVER and CTKFacts: acquiring Czech data for fact verification},
  volume = {57},
  year = {2023},
}

Cmnli

Chinese Multi-Genre NLI

Dataset: C-MTEB/CMNLI β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_accuracy cmn not specified not specified not specified
Citation
@inproceedings{xu-etal-2020-clue,
  address = {Barcelona, Spain (Online)},
  author = {Xu, Liang  and
Hu, Hai  and
Zhang, Xuanwei  and
Li, Lu  and
Cao, Chenjie  and
Li, Yudong  and
Xu, Yechen  and
Sun, Kai  and
Yu, Dian  and
Yu, Cong  and
Tian, Yin  and
Dong, Qianqian  and
Liu, Weitang  and
Shi, Bo  and
Cui, Yiming  and
Li, Junyi  and
Zeng, Jun  and
Wang, Rongzhao  and
Xie, Weijian  and
Li, Yanting  and
Patterson, Yina  and
Tian, Zuoyu  and
Zhang, Yiwen  and
Zhou, He  and
Liu, Shaoweihua  and
Zhao, Zhe  and
Zhao, Qipeng  and
Yue, Cong  and
Zhang, Xinrui  and
Yang, Zhengliang  and
Richardson, Kyle  and
Lan, Zhenzhong},
  booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
  doi = {10.18653/v1/2020.coling-main.419},
  month = dec,
  pages = {4762--4772},
  publisher = {International Committee on Computational Linguistics},
  title = {{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark},
  url = {https://aclanthology.org/2020.coling-main.419},
  year = {2020},
}

DisCoTexPairClassification

The DisCoTEX dataset aims at assessing discourse coherence in Italian texts. This dataset focuses on Italian real-world texts and provides resources to model coherence in natural language.

Dataset: MattiaSangermano/DisCoTex-last-sentence β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ita Social, Written derived found
Citation
@inproceedings{brunato2023discotex,
  author = {Brunato, Dominique and Colla, Davide and Dell'Orletta, Felice and Dini, Irene and Radicioni, Daniele Paolo and Ravelli, Andrea Amelio and others},
  booktitle = {CEUR WORKSHOP PROCEEDINGS},
  organization = {CEUR},
  pages = {1--8},
  title = {DisCoTex at EVALITA 2023: overview of the assessing discourse coherence in Italian texts task},
  volume = {3473},
  year = {2023},
}

FalseFriendsGermanEnglish

A dataset to identify False Friends / false cognates between English and German. A generally challenging task for multilingual models.

Dataset: aari1995/false_friends_de_en_mteb β€’ License: mit β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap deu Written human-annotated created
Citation
@misc{Chibb_2022,
  abstract = {{This paper explores the robustness of multilingual language models against false friends. False friends are words that sound or are written the same in two different languages but have different meaning. Generally, it is argued that multilingual models, such as XLM-RoBERTA, can outperform monolingual models in most tasks on conventional datasets. However, false friends are not considered in these tests. In this paper, experiments with a false friends dataset show that multilingual models are not robust against false friends; they have problems creating monolingual representations and differentiating between meanings of similarly written words in different languages. An attempt of word-based finetuning multilingual models on false friends pairs is promising, however the results do not generally solve the presented problem and still, monolingual models are more robust against false friends.}},
  author = {Chibb, Aaron},
  month = {Sep},
  title = {{German-English False Friends in Multilingual Transformer Models: An Evaluation on Robustness and Word-to-Word Fine-Tuning}},
  year = {2022},
}

FarsTail

This dataset, named FarsTail, includes 10,367 samples which are provided in both the Persian language as well as the indexed format to be useful for non-Persian researchers. The samples are generated from 3,539 multiple-choice questions with the least amount of annotator interventions in a way similar to the SciTail dataset

Dataset: azarijafari/FarsTail β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Academic, Written human-annotated found
Citation
@article{amirkhani2023farstail,
  author = {Amirkhani, Hossein and AzariJafari, Mohammad and Faridan-Jahromi, Soroush and Kouhkan, Zeinab and Pourjafari, Zohreh and Amirak, Azadeh},
  doi = {10.1007/s00500-023-08959-3},
  journal = {Soft Computing},
  publisher = {Springer},
  title = {FarsTail: a Persian natural language inference dataset},
  year = {2023},
}

FarsiParaphraseDetection

Farsi Paraphrase Detection

Dataset: alighasemi/farsi_paraphrase_detection β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas not specified derived found
Citation

IndicXnliPairClassification

INDICXNLI is similar to existing XNLI dataset in shape/form, but focusses on Indic language family. The train (392,702), validation (2,490), and evaluation sets (5,010) of English XNLI were translated from English into each of the eleven Indic languages. IndicTrans is a large Transformer-based sequence to sequence model. It is trained on Samanantar dataset (Ramesh et al., 2021), which is the largest parallel multi- lingual corpus over eleven Indic languages.

Dataset: Divyanshu/indicxnli β€’ License: cc-by-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap asm, ben, guj, hin, kan, ... (11) Fiction, Government, Non-fiction, Written derived machine-translated
Citation
@misc{aggarwal_gupta_kunch_22,
  author = {Aggarwal, Divyanshu and Gupta, Vivek and Kunchukuttan, Anoop},
  copyright = {Creative Commons Attribution 4.0 International},
  doi = {10.48550/ARXIV.2204.08776},
  publisher = {arXiv},
  title = {IndicXNLI: Evaluating Multilingual Inference for Indian Languages},
  url = {https://arxiv.org/abs/2204.08776},
  year = {2022},
}

KLUE-NLI

Textual Entailment between a hypothesis sentence and a premise sentence. Part of the Korean Language Understanding Evaluation (KLUE).

Dataset: klue/klue β€’ License: cc-by-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap kor Encyclopaedic, News, Written human-annotated found
Citation
@misc{park2021klue,
  archiveprefix = {arXiv},
  author = {Sungjoon Park and Jihyung Moon and Sungdong Kim and Won Ik Cho and Jiyoon Han and Jangwon Park and Chisung Song and Junseong Kim and Yongsook Song and Taehwan Oh and Joohong Lee and Juhyun Oh and Sungwon Lyu and Younghoon Jeong and Inkwon Lee and Sangwoo Seo and Dongjun Lee and Hyunwoo Kim and Myeonghwa Lee and Seongbo Jang and Seungwon Do and Sunkyoung Kim and Kyungtae Lim and Jongwon Lee and Kyumin Park and Jamin Shin and Seonghyun Kim and Lucy Park and Alice Oh and Jungwoo Ha and Kyunghyun Cho},
  eprint = {2105.09680},
  primaryclass = {cs.CL},
  title = {KLUE: Korean Language Understanding Evaluation},
  year = {2021},
}

LegalBenchPC

This LegalBench pair classification task is a combination of the following datasets:

    - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement.
    - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation.
    - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc.
    - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine ifβ€”as it relates to the issueβ€”the evidence would be considered hearsay under the definition provided above.
    - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., β€œThe policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”).
    - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., β€œdo you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.

Dataset: nguha/legalbench β€’ License: cc-by-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_accuracy eng Legal, Written expert-annotated found
Citation
@misc{guha2023legalbench,
  archiveprefix = {arXiv},
  author = {Neel Guha and Julian Nyarko and Daniel E. Ho and Christopher RΓ© and Adam Chilton and Aditya Narayana and Alex Chohlas-Wood and Austin Peters and Brandon Waldon and Daniel N. Rockmore and Diego Zambrano and Dmitry Talisman and Enam Hoque and Faiz Surani and Frank Fagan and Galit Sarfaty and Gregory M. Dickinson and Haggai Porat and Jason Hegland and Jessica Wu and Joe Nudell and Joel Niklaus and John Nay and Jonathan H. Choi and Kevin Tobia and Margaret Hagan and Megan Ma and Michael Livermore and Nikon Rasumov-Rahe and Nils Holzenberger and Noam Kolt and Peter Henderson and Sean Rehaag and Sharad Goel and Shang Gao and Spencer Williams and Sunny Gandhi and Tom Zur and Varun Iyer and Zehua Li},
  eprint = {2308.11462},
  primaryclass = {cs.CL},
  title = {LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models},
  year = {2023},
}

@article{kolt2022predicting,
  author = {Kolt, Noam},
  journal = {Berkeley Tech. LJ},
  pages = {71},
  publisher = {HeinOnline},
  title = {Predicting consumer contracts},
  volume = {37},
  year = {2022},
}

@article{ravichander2019question,
  author = {Ravichander, Abhilasha and Black, Alan W and Wilson, Shomir and Norton, Thomas and Sadeh, Norman},
  journal = {arXiv preprint arXiv:1911.00841},
  title = {Question answering for privacy policies: Combining computational and legal perspectives},
  year = {2019},
}

@article{zimmeck2019maps,
  author = {Zimmeck, Sebastian and Story, Peter and Smullen, Daniel and Ravichander, Abhilasha and Wang, Ziqi and Reidenberg, Joel R and Russell, N Cameron and Sadeh, Norman},
  journal = {Proc. Priv. Enhancing Tech.},
  pages = {66},
  title = {Maps: Scaling privacy compliance analysis to a million apps},
  volume = {2019},
  year = {2019},
}

Ocnli

Original Chinese Natural Language Inference dataset

Dataset: C-MTEB/OCNLI β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_accuracy cmn not specified not specified not specified
Citation
@misc{hu2020ocnli,
  archiveprefix = {arXiv},
  author = {Hai Hu and Kyle Richardson and Liang Xu and Lu Li and Sandra Kuebler and Lawrence S. Moss},
  eprint = {2010.05444},
  primaryclass = {cs.CL},
  title = {OCNLI: Original Chinese Natural Language Inference},
  year = {2020},
}

OpusparcusPC

Opusparcus is a paraphrase corpus for six European language: German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows.

Dataset: GEM/opusparcus β€’ License: cc-by-nc-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap deu, eng, fin, fra, rus, ... (6) Spoken, Spoken human-annotated created
Citation
@misc{creutz2018open,
  archiveprefix = {arXiv},
  author = {Mathias Creutz},
  eprint = {1809.06142},
  primaryclass = {cs.CL},
  title = {Open Subtitles Paraphrase Corpus for Six Languages},
  year = {2018},
}

PSC

Polish Summaries Corpus

Dataset: PL-MTEB/psc-pairclassification β€’ License: cc-by-3.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap pol News, Written derived found
Citation
@inproceedings{ogrodniczuk-kopec-2014-polish,
  abstract = {This article presents the Polish Summaries Corpus, a new resource created to support the development and evaluation of the tools for automated single-document summarization of Polish. The Corpus contains a large number of manual summaries of news articles, with many independently created summaries for a single text. Such approach is supposed to overcome the annotator bias, which is often described as a problem during the evaluation of the summarization algorithms against a single gold standard. There are several summarizers developed specifically for Polish language, but their in-depth evaluation and comparison was impossible without a large, manually created corpus. We present in detail the process of text selection, annotation process and the contents of the corpus, which includes both abstract free-word summaries, as well as extraction-based summaries created by selecting text spans from the original document. Finally, we describe how that resource could be used not only for the evaluation of the existing summarization tools, but also for studies on the human summarization process in Polish language.},
  address = {Reykjavik, Iceland},
  author = {Ogrodniczuk, Maciej  and
Kope{\'c}, Mateusz},
  booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)},
  editor = {Calzolari, Nicoletta  and
Choukri, Khalid  and
Declerck, Thierry  and
Loftsson, Hrafn  and
Maegaard, Bente  and
Mariani, Joseph  and
Moreno, Asuncion  and
Odijk, Jan  and
Piperidis, Stelios},
  month = may,
  pages = {3712--3715},
  publisher = {European Language Resources Association (ELRA)},
  title = {The {P}olish Summaries Corpus},
  url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf},
  year = {2014},
}

ParsinluEntail

A Persian textual entailment task (deciding sent1 entails sent2). The questions are partially translated from the SNLI dataset and partially generated by expert annotators.

Dataset: persiannlp/parsinlu_entailment β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Reviews, Written derived found
Citation
@misc{khashabi2021parsinlusuitelanguageunderstanding,
  archiveprefix = {arXiv},
  author = {Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh},
  eprint = {2012.06154},
  primaryclass = {cs.CL},
  title = {ParsiNLU: A Suite of Language Understanding Challenges for Persian},
  url = {https://arxiv.org/abs/2012.06154},
  year = {2021},
}

ParsinluQueryParaphPC

A Persian query paraphrasng task (deciding whether two questions are paraphrases of each other). The questions are partially generated from Google auto-complete, and partially translated from the Quora paraphrasing dataset.

Dataset: persiannlp/parsinlu_query_paraphrasing β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Reviews, Written derived found
Citation
@misc{khashabi2021parsinlusuitelanguageunderstanding,
  archiveprefix = {arXiv},
  author = {Daniel Khashabi and Arman Cohan and Siamak Shakeri and Pedram Hosseini and Pouya Pezeshkpour and Malihe Alikhani and Moin Aminnaseri and Marzieh Bitaab and Faeze Brahman and Sarik Ghazarian and Mozhdeh Gheini and Arman Kabiri and Rabeeh Karimi Mahabadi and Omid Memarrast and Ahmadreza Mosallanezhad and Erfan Noury and Shahab Raji and Mohammad Sadegh Rasooli and Sepideh Sadeghi and Erfan Sadeqi Azer and Niloofar Safi Samghabadi and Mahsa Shafaei and Saber Sheybani and Ali Tazarv and Yadollah Yaghoobzadeh},
  eprint = {2012.06154},
  primaryclass = {cs.CL},
  title = {ParsiNLU: A Suite of Language Understanding Challenges for Persian},
  url = {https://arxiv.org/abs/2012.06154},
  year = {2021},
}

PawsXPairClassification

{PAWS-X: A Cross-lingual Adversarial Dataset for Paraphrase Identification

Dataset: google-research-datasets/paws-x β€’ License: https://huggingface.co/datasets/google-research-datasets/paws-x#licensing-information β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap cmn, deu, eng, fra, jpn, ... (7) Encyclopaedic, Web, Written human-annotated human-translated
Citation
@misc{yang2019pawsx,
  archiveprefix = {arXiv},
  author = {Yinfei Yang and Yuan Zhang and Chris Tar and Jason Baldridge},
  eprint = {1908.11828},
  primaryclass = {cs.CL},
  title = {PAWS-X: A Cross-lingual Adversarial Dataset for Paraphrase Identification},
  year = {2019},
}

PpcPC

Polish Paraphrase Corpus

Dataset: PL-MTEB/ppc-pairclassification β€’ License: gpl-3.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap pol Fiction, News, Non-fiction, Social, Spoken, ... (7) derived found
Citation
@misc{dadas2022training,
  archiveprefix = {arXiv},
  author = {SΕ‚awomir Dadas},
  eprint = {2207.12759},
  primaryclass = {cs.CL},
  title = {Training Effective Neural Sentence Encoders from Automatically Mined Paraphrases},
  year = {2022},
}

PubChemAISentenceParaphrasePC

ChemTEB evaluates the performance of text embedding models on chemical domain data.

Dataset: BASF-AI/PubChemAISentenceParaphrasePC β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Chemistry LM-generated created
Citation
@article{kasmaee2024chemteb,
  author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
  journal = {arXiv preprint arXiv:2412.00532},
  title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain},
  year = {2024},
}

@article{kim2023pubchem,
  author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others},
  journal = {Nucleic acids research},
  number = {D1},
  pages = {D1373--D1380},
  publisher = {Oxford University Press},
  title = {PubChem 2023 update},
  volume = {51},
  year = {2023},
}

PubChemSMILESPC

ChemTEB evaluates the performance of text embedding models on chemical domain data.

Dataset: BASF-AI/PubChemSMILESPairClassification β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Chemistry derived created
Citation
@article{kasmaee2024chemteb,
  author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
  journal = {arXiv preprint arXiv:2412.00532},
  title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain},
  year = {2024},
}

@article{kim2023pubchem,
  author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others},
  journal = {Nucleic acids research},
  number = {D1},
  pages = {D1373--D1380},
  publisher = {Oxford University Press},
  title = {PubChem 2023 update},
  volume = {51},
  year = {2023},
}

PubChemSynonymPC

ChemTEB evaluates the performance of text embedding models on chemical domain data.

Dataset: BASF-AI/PubChemSynonymPC β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Chemistry derived created
Citation
@article{kasmaee2024chemteb,
  author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
  journal = {arXiv preprint arXiv:2412.00532},
  title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain},
  year = {2024},
}

@article{kim2023pubchem,
  author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others},
  journal = {Nucleic acids research},
  number = {D1},
  pages = {D1373--D1380},
  publisher = {Oxford University Press},
  title = {PubChem 2023 update},
  volume = {51},
  year = {2023},
}

PubChemWikiPairClassification

ChemTEB evaluates the performance of text embedding models on chemical domain data.

Dataset: BASF-AI/PubChemWikiMultilingualPC β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ces, deu, eng, fra, hin, ... (13) Chemistry derived created
Citation
@article{kasmaee2024chemteb,
  author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
  journal = {arXiv preprint arXiv:2412.00532},
  title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \\& Efficiency on a Specific Domain},
  year = {2024},
}

@article{kim2023pubchem,
  author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others},
  journal = {Nucleic acids research},
  number = {D1},
  pages = {D1373--D1380},
  publisher = {Oxford University Press},
  title = {PubChem 2023 update},
  volume = {51},
  year = {2023},
}

PubChemWikiParagraphsPC

ChemTEB evaluates the performance of text embedding models on chemical domain data.

Dataset: BASF-AI/PubChemWikiParagraphsPC β€’ License: cc-by-nc-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Chemistry derived created
Citation
@article{kasmaee2024chemteb,
  author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila},
  journal = {arXiv preprint arXiv:2412.00532},
  title = {ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain},
  year = {2024},
}

@article{kim2023pubchem,
  author = {Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others},
  journal = {Nucleic acids research},
  number = {D1},
  pages = {D1373--D1380},
  publisher = {Oxford University Press},
  title = {PubChem 2023 update},
  volume = {51},
  year = {2023},
}

RTE3

Recognising Textual Entailment Challenge (RTE-3) aim to provide the NLP community with a benchmark to test progress in recognizing textual entailment

Dataset: maximoss/rte3-multi β€’ License: cc-by-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap deu, eng, fra, ita Encyclopaedic, News, Web, Written expert-annotated found
Citation
@inproceedings{giampiccolo-etal-2007-third,
  address = {Prague},
  author = {Giampiccolo, Danilo  and
Magnini, Bernardo  and
Dagan, Ido  and
Dolan, Bill},
  booktitle = {Proceedings of the {ACL}-{PASCAL} Workshop on Textual Entailment and Paraphrasing},
  month = jun,
  pages = {1--9},
  publisher = {Association for Computational Linguistics},
  title = {The Third {PASCAL} Recognizing Textual Entailment Challenge},
  url = {https://aclanthology.org/W07-1401},
  year = {2007},
}

SICK-BR-PC

SICK-BR is a Portuguese inference corpus, human translated from SICK

Dataset: eduagarcia/sick-br β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap por Web, Written human-annotated human-translated and localized
Citation
@inproceedings{real18,
  author = {Real, Livy
and Rodrigues, Ana
and Vieira e Silva, Andressa
and Albiero, Beatriz
and Thalenberg, Bruna
and Guide, Bruno
and Silva, Cindy
and de Oliveira Lima, Guilherme
and C{\^a}mara, Igor C. S.
and Stanojevi{\'{c}}, Milo{\v{s}}
and Souza, Rodrigo
and de Paiva, Valeria},
  booktitle = {{Computational Processing of the Portuguese Language. PROPOR 2018.}},
  doi = {10.1007/978-3-319-99722-3_31},
  isbn = {978-3-319-99722-3},
  title = {{SICK-BR: A Portuguese Corpus for Inference}},
  year = {2018},
}

SICK-E-PL

Polish version of SICK dataset for textual entailment.

Dataset: PL-MTEB/sicke-pl-pairclassification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap pol Reviews not specified not specified
Citation
@inproceedings{dadas-etal-2020-evaluation,
  abstract = {Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.},
  address = {Marseille, France},
  author = {Dadas, Slawomir  and
Pere{\l}kiewicz, Micha{\l}  and
Po{\'s}wiata, Rafa{\l}},
  booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
  editor = {Calzolari, Nicoletta  and
B{\'e}chet, Fr{\'e}d{\'e}ric  and
Blache, Philippe  and
Choukri, Khalid  and
Cieri, Christopher  and
Declerck, Thierry  and
Goggi, Sara  and
Isahara, Hitoshi  and
Maegaard, Bente  and
Mariani, Joseph  and
Mazo, H{\'e}l{\`e}ne  and
Moreno, Asuncion  and
Odijk, Jan  and
Piperidis, Stelios},
  isbn = {979-10-95546-34-4},
  language = {English},
  month = may,
  pages = {1674--1680},
  publisher = {European Language Resources Association},
  title = {Evaluation of Sentence Representations in {P}olish},
  url = {https://aclanthology.org/2020.lrec-1.207},
  year = {2020},
}

SprintDuplicateQuestions

Duplicate questions from the Sprint community.

Dataset: mteb/sprintduplicatequestions-pairclassification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Programming, Written derived found
Citation
@inproceedings{shah-etal-2018-adversarial,
  abstract = {We address the problem of detecting duplicate questions in forums, which is an important step towards automating the process of answering new questions. As finding and annotating such potential duplicates manually is very tedious and costly, automatic methods based on machine learning are a viable alternative. However, many forums do not have annotated data, i.e., questions labeled by experts as duplicates, and thus a promising solution is to use domain adaptation from another forum that has such annotations. Here we focus on adversarial domain adaptation, deriving important findings about when it performs well and what properties of the domains are important in this regard. Our experiments with StackExchange data show an average improvement of 5.6{\%} over the best baseline across multiple pairs of domains.},
  address = {Brussels, Belgium},
  author = {Shah, Darsh  and
Lei, Tao  and
Moschitti, Alessandro  and
Romeo, Salvatore  and
Nakov, Preslav},
  booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
  doi = {10.18653/v1/D18-1131},
  editor = {Riloff, Ellen  and
Chiang, David  and
Hockenmaier, Julia  and
Tsujii, Jun{'}ichi},
  month = oct # {-} # nov,
  pages = {1056--1063},
  publisher = {Association for Computational Linguistics},
  title = {Adversarial Domain Adaptation for Duplicate Question Detection},
  url = {https://aclanthology.org/D18-1131},
  year = {2018},
}

SprintDuplicateQuestions-VN

A translated dataset from Duplicate questions from the Sprint community. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.

Dataset: GreenNode/sprintduplicatequestions-pairclassification-vn β€’ License: cc-by-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to category (t2c) max_ap vie Programming, Written derived machine-translated and LM verified
Citation
@misc{pham2025vnmtebvietnamesemassivetext,
  archiveprefix = {arXiv},
  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
  eprint = {2507.21500},
  primaryclass = {cs.CL},
  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
  url = {https://arxiv.org/abs/2507.21500},
  year = {2025},
}

SynPerChatbotRAGFAQPC

Synthetic Persian Chatbot RAG FAQ Pair Classification

Dataset: MCINext/synthetic-persian-chatbot-rag-faq-pair-classification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Spoken LM-generated LM-generated and verified
Citation

SynPerQAPC

Synthetic Persian QA Pair Classification

Dataset: MCINext/synthetic-persian-qa-pair-classification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Blog, News, Religious, Web LM-generated LM-generated and verified
Citation

SynPerTextKeywordsPC

Synthetic Persian Text Keywords Pair Classification

Dataset: MCINext/synthetic-persian-text-keyword-pair-classification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap fas Blog, News, Religious, Web LM-generated LM-generated and verified
Citation

TERRa

Textual Entailment Recognition for Russian. This task requires to recognize, given two text fragments, whether the meaning of one text is entailed (can be inferred) from the other text.

Dataset: ai-forever/terra-pairclassification β€’ License: mit β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap rus News, Web, Written human-annotated found
Citation
@article{shavrina2020russiansuperglue,
  author = {Shavrina, Tatiana
and Fenogenova, Alena
and Emelyanov, Anton
and Shevelev, Denis
and Artemova, Ekaterina
and Malykh, Valentin
and Mikhailov, Vladislav
and Tikhonova, Maria
and Chertok, Andrey
and Evlampiev, Andrey},
  journal = {arXiv preprint arXiv:2010.15925},
  title = {RussianSuperGLUE: A Russian Language Understanding Evaluation Benchmark},
  year = {2020},
}

TalemaaderPC

The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish. The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions. For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared. The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.

Dataset: mteb/talemaader_pc β€’ License: cc-by-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_accuracy dan Academic, Written derived created
Citation
@misc{DSLDK1000Talemader,
  author = {{Det Danske Sprog- og Litteraturselskab}},
  howpublished = {Sprogteknologi.dk},
  language = {Danish},
  note = {CC-BY licensed dataset of 1000 Danish sayings and expressions},
  publisher = {Digitaliseringsstyrelsen \& Det Danske Sprog- og Litteraturselskab},
  title = {1000 danske talemΓ₯der - evalueringsdatasΓ¦t},
  url = {https://sprogteknologi.dk/dataset/1000-talemader-evalueringsdatasaet},
  year = {2024},
}

TwitterSemEval2015

Paraphrase-Pairs of Tweets from the SemEval 2015 workshop.

Dataset: mteb/twittersemeval2015-pairclassification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Social, Written human-annotated found
Citation
@inproceedings{xu-etal-2015-semeval,
  address = {Denver, Colorado},
  author = {Xu, Wei  and
Callison-Burch, Chris  and
Dolan, Bill},
  booktitle = {Proceedings of the 9th International Workshop on Semantic Evaluation ({S}em{E}val 2015)},
  doi = {10.18653/v1/S15-2001},
  editor = {Nakov, Preslav  and
Zesch, Torsten  and
Cer, Daniel  and
Jurgens, David},
  month = jun,
  pages = {1--11},
  publisher = {Association for Computational Linguistics},
  title = {{S}em{E}val-2015 Task 1: Paraphrase and Semantic Similarity in {T}witter ({PIT})},
  url = {https://aclanthology.org/S15-2001},
  year = {2015},
}

TwitterSemEval2015-VN

A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.

Dataset: GreenNode/twittersemeval2015-pairclassification-vn β€’ License: cc-by-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to category (t2c) max_ap vie Social, Written derived machine-translated and LM verified
Citation
@misc{pham2025vnmtebvietnamesemassivetext,
  archiveprefix = {arXiv},
  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
  eprint = {2507.21500},
  primaryclass = {cs.CL},
  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
  url = {https://arxiv.org/abs/2507.21500},
  year = {2025},
}

TwitterURLCorpus

Paraphrase-Pairs of Tweets.

Dataset: mteb/twitterurlcorpus-pairclassification β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap eng Social, Written derived found
Citation
@inproceedings{lan-etal-2017-continuously,
  abstract = {A major challenge in paraphrase research is the lack of parallel corpora. In this paper, we present a new method to collect large-scale sentential paraphrases from Twitter by linking tweets through shared URLs. The main advantage of our method is its simplicity, as it gets rid of the classifier or human in the loop needed to select data before annotation and subsequent application of paraphrase identification algorithms in the previous work. We present the largest human-labeled paraphrase corpus to date of 51,524 sentence pairs and the first cross-domain benchmarking for automatic paraphrase identification. In addition, we show that more than 30,000 new sentential paraphrases can be easily and continuously captured every month at {\textasciitilde}70{\%} precision, and demonstrate their utility for downstream NLP tasks through phrasal paraphrase extraction. We make our code and data freely available.},
  address = {Copenhagen, Denmark},
  author = {Lan, Wuwei  and
Qiu, Siyu  and
He, Hua  and
Xu, Wei},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  doi = {10.18653/v1/D17-1126},
  editor = {Palmer, Martha  and
Hwa, Rebecca  and
Riedel, Sebastian},
  month = sep,
  pages = {1224--1234},
  publisher = {Association for Computational Linguistics},
  title = {A Continuously Growing Dataset of Sentential Paraphrases},
  url = {https://aclanthology.org/D17-1126},
  year = {2017},
}

TwitterURLCorpus-VN

A translated dataset from Paraphrase-Pairs of Tweets. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.

Dataset: GreenNode/twitterurlcorpus-pairclassification-vn β€’ License: cc-by-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to category (t2c) max_ap vie Social, Written derived machine-translated and LM verified
Citation
@misc{pham2025vnmtebvietnamesemassivetext,
  archiveprefix = {arXiv},
  author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
  eprint = {2507.21500},
  primaryclass = {cs.CL},
  title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
  url = {https://arxiv.org/abs/2507.21500},
  year = {2025},
}

XNLI

Dataset: mteb/xnli β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ara, bul, deu, ell, eng, ... (14) Fiction, Government, Non-fiction, Written expert-annotated created
Citation
@inproceedings{conneau2018xnli,
  author = {Conneau, Alexis
and Rinott, Ruty
and Lample, Guillaume
and Williams, Adina
and Bowman, Samuel R.
and Schwenk, Holger
and Stoyanov, Veselin},
  booktitle = {Proceedings of the 2018 Conference on Empirical Methods
in Natural Language Processing},
  location = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  title = {XNLI: Evaluating Cross-lingual Sentence Representations},
  year = {2018},
}

XNLIV2

This is subset of 'XNLI 2.0: Improving XNLI dataset and performance on Cross Lingual Understanding' with languages that were not part of the original XNLI plus three (verified) languages that are not strongly covered in MTEB

Dataset: mteb/XNLIV2 β€’ License: not specified β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap asm, ben, bho, ell, guj, ... (13) Fiction, Government, Non-fiction, Written expert-annotated machine-translated and verified
Citation
@inproceedings{upadhyay2023xnli,
  author = {Upadhyay, Ankit Kumar and Upadhya, Harsit Kumar},
  booktitle = {2023 IEEE 8th International Conference for Convergence in Technology (I2CT)},
  organization = {IEEE},
  pages = {1--6},
  title = {XNLI 2.0: Improving XNLI dataset and performance on Cross Lingual Understanding (XLU)},
  year = {2023},
}

XStance

A Multilingual Multi-Target Dataset for Stance Detection in French, German, and Italian.

Dataset: ZurichNLP/x_stance β€’ License: cc-by-nc-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap deu, fra, ita Social, Written human-annotated created
Citation
@inproceedings{vamvas2020xstance,
  address = {Zurich, Switzerland},
  author = {Vamvas, Jannis and Sennrich, Rico},
  booktitle = {Proceedings of the 5th Swiss Text Analytics Conference (SwissText)  16th Conference on Natural Language Processing (KONVENS)},
  month = {jun},
  title = {{X-Stance}: A Multilingual Multi-Target Dataset for Stance Detection},
  url = {http://ceur-ws.org/Vol-2624/paper9.pdf},
  year = {2020},
}

indonli

IndoNLI is the first human-elicited Natural Language Inference (NLI) dataset for Indonesian. IndoNLI is annotated by both crowd workers and experts.

Dataset: afaji/indonli β€’ License: cc-by-sa-4.0 β€’ Learn more β†’

Task category Score Languages Domains Annotations Creators Sample Creation
text to text (t2t) max_ap ind Encyclopaedic, News, Web, Written expert-annotated found
Citation
@inproceedings{mahendra-etal-2021-indonli,
  address = {Online and Punta Cana, Dominican Republic},
  author = {Mahendra, Rahmad and Aji, Alham Fikri and Louvan, Samuel and Rahman, Fahrurrozi and Vania, Clara},
  booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
  month = nov,
  pages = {10511--10527},
  publisher = {Association for Computational Linguistics},
  title = {{I}ndo{NLI}: A Natural Language Inference Dataset for {I}ndonesian},
  url = {https://aclanthology.org/2021.emnlp-main.821},
  year = {2021},
}