Skip to content

Text Model

  • Number of models: 204

Instruction Model

Alibaba-NLP/gte-Qwen1.5-7B-instruct

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.7B 28.8 GB 2024-04-20 eng-Latn

Alibaba-NLP/gte-Qwen2-1.5B-instruct

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 9.0K 1.8B 6.6 GB 2024-07-29 eng-Latn

Alibaba-NLP/gte-Qwen2-7B-instruct

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 3.6K 7.6B 28.4 GB 2024-06-15 not specified
Citation
@article{li2023towards,
  title={Towards general text embeddings with multi-stage contrastive learning},
  author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
  journal={arXiv preprint arXiv:2308.03281},
  year={2023}
}

BAAI/bge-base-en

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 390.0 MB 2023-08-05 eng-Latn

BAAI/bge-base-en-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 390.0 MB 2023-09-11 eng-Latn
Citation
@misc{bge_embedding,
      title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
      author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
      year={2023},
      eprint={2309.07597},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

BAAI/bge-base-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 390.0 MB 2023-08-05 zho-Hans

BAAI/bge-base-zh-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 416.0 MB 2023-09-11 zho-Hans

BAAI/bge-large-en

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-08-05 eng-Latn

BAAI/bge-large-en-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-09-12 eng-Latn
Citation
@misc{bge_embedding,
      title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
      author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
      year={2023},
      eprint={2309.07597},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

BAAI/bge-large-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-08-02 zho-Hans

BAAI/bge-large-zh-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-09-12 zho-Hans

BAAI/bge-small-en

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 33.4M 127.0 MB 2023-08-05 eng-Latn

BAAI/bge-small-en-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 33.4M 127.0 MB 2023-09-12 eng-Latn
Citation
@misc{bge_embedding,
      title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
      author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
      year={2023},
      eprint={2309.07597},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

BAAI/bge-small-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 33.4M 127.0 MB 2023-08-05 zho-Hans

BAAI/bge-small-zh-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 33.4M 91.0 MB 2023-09-12 zho-Hans

BeastyZ/e5-R-mistral-7b

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.2B 27.0 GB 2024-06-28 eng-Latn

ByteDance-Seed/Seed1.5-Embedding

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 2.0K not specified not specified 2025-04-25 eng-Latn, zho-Hans

Bytedance/Seed1.6-embedding

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 2.0K not specified not specified 2025-06-18 eng-Latn, zho-Hans

Cohere/Cohere-embed-english-light-v3.0

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 not specified not specified 2023-11-02 eng-Latn

Cohere/Cohere-embed-english-v3.0

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2023-11-02 eng-Latn

Cohere/Cohere-embed-multilingual-light-v3.0

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 not specified not specified 2023-11-02 afr-Latn, amh-Ethi, ara-Arab, asm-Beng, aze-Latn, ... (109)

Cohere/Cohere-embed-multilingual-v3.0

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified 512 not specified not specified 2023-11-02 afr-Latn, amh-Ethi, ara-Arab, asm-Beng, aze-Latn, ... (109)

GeoGPT-Research-Project/GeoEmbedding

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.2B 27.0 GB 2025-04-22 eng-Latn

GritLM/GritLM-7B

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 7.2B 13.5 GB 2024-02-15 deu-Latn, eng-Latn, fra-Latn, ita-Latn, spa-Latn
Citation
@misc{muennighoff2024generative,
      title={Generative Representational Instruction Tuning},
      author={Niklas Muennighoff and Hongjin Su and Liang Wang and Nan Yang and Furu Wei and Tao Yu and Amanpreet Singh and Douwe Kiela},
      year={2024},
      eprint={2402.09906},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

GritLM/GritLM-8x7B

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 57.9B 87.0 GB 2024-02-15 deu-Latn, eng-Latn, fra-Latn, ita-Latn, spa-Latn
Citation
@misc{muennighoff2024generative,
      title={Generative Representational Instruction Tuning},
      author={Niklas Muennighoff and Hongjin Su and Liang Wang and Nan Yang and Furu Wei and Tao Yu and Amanpreet Singh and Douwe Kiela},
      year={2024},
      eprint={2402.09906},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 896 494.0M 1.8 GB 2024-10-23 eng-Latn, zho-Hans

HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 896 494.0M 1.8 GB 2024-12-26 eng-Latn, zho-Hans

HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 896 494.0M 942.0 MB 2025-06-25 eng-Latn, zho-Hans

Kingsoft-LLM/QZhou-Embedding

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 3.6K 7.1B 28.4 GB 2025-08-24 eng-Latn, zho-Hans

Linq-AI-Research/Linq-Embed-Mistral

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 13.2 GB 2024-05-29 eng-Latn

McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 7.5B 28.0 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 7.5B 28.0 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-04-09 eng-Latn
Citation
@misc{behnamghader2024llm2veclargelanguagemodels,
      title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders},
      author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy},
      year={2024},
      eprint={2404.05961},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2404.05961},
}

MongoDB/mdbr-leaf-ir

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 22.9M 86.0 MB 2025-08-27 eng-Latn

MongoDB/mdbr-leaf-mt

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 23.0M 86.0 MB 2025-08-27 eng-Latn

NovaSearch/jasper_en_vision_language_v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
131.1K 9.0K 2.0B 3.7 GB 2024-12-11 eng-Latn

NovaSearch/stella_en_1.5B_v5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
131.1K 9.0K 1.5B 5.7 GB 2024-07-12 eng-Latn

NovaSearch/stella_en_400M_v5

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 435.0M 1.6 GB 2024-07-12 eng-Latn

Qwen/Qwen3-Embedding-0.6B

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 1.0K 595.8M 2.2 GB 2025-06-05 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (71)

Qwen/Qwen3-Embedding-4B

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 2.6K 4.0B 15.0 GB 2025-06-05 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (71)

Qwen/Qwen3-Embedding-8B

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.6B 28.2 GB 2025-06-05 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (71)

Sailesh97/Hinvec

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 2.0K 939.6M 3.6 GB 2025-06-19 eng-Latn, hin-Deva

Salesforce/SFR-Embedding-2_R

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 13.2 GB 2024-06-14 eng-Latn
Citation
@misc{SFR-embedding-2,
      title={SFR-Embedding-2: Advanced Text Embedding with Multi-stage Training},
      author={Rui Meng*, Ye Liu*, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, Semih Yavuz},
      year={2024},
      url={https://huggingface.co/Salesforce/SFR-Embedding-2_R}
    }

Salesforce/SFR-Embedding-Code-2B_R

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 2.3K 2.6B 4.9 GB 2025-01-17 eng-Latn

Salesforce/SFR-Embedding-Mistral

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 13.2 GB 2024-01-24 eng-Latn

SamilPwC-AXNode-GenAI/PwC-Embedding_expr

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 560.0M 2.1 GB 2025-08-12 kor-Hang

Snowflake/snowflake-arctic-embed-l

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2024-04-12 eng-Latn

Snowflake/snowflake-arctic-embed-l-v2.0

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-12-04 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (74)

Snowflake/snowflake-arctic-embed-m

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 415.0 MB 2024-04-12 eng-Latn

Snowflake/snowflake-arctic-embed-m-long

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 768 137.0M 522.0 MB 2024-04-12 eng-Latn

Snowflake/snowflake-arctic-embed-m-v1.5

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 415.0 MB 2024-07-08 eng-Latn

Snowflake/snowflake-arctic-embed-m-v2.0

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 305.0M 1.1 GB 2024-12-04 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (74)

Snowflake/snowflake-arctic-embed-s

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 32.2M 127.0 MB 2024-04-12 eng-Latn

Snowflake/snowflake-arctic-embed-xs

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 22.6M 86.0 MB 2024-07-08 eng-Latn

TencentBAC/Conan-embedding-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 3.6K not specified not specified 2025-04-10 eng-Latn, zho-Hans

VPLabs/SearchMap_Preview

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 435.0M 1.6 GB 2025-03-05 eng-Latn

WhereIsAI/UAE-Large-V1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-12-04 eng-Latn
Citation
    @article{li2023angle,
      title={AnglE-optimized Text Embeddings},
      author={Li, Xianming and Li, Jing},
      journal={arXiv preprint arXiv:2309.12871},
      year={2023}
    }

Youtu-RAG/CoDi-Embedding-V1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 2.3K 2.7B not specified 2025-08-20 zho-Hans

ai-forever/FRIDA

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.5K 823.0M 3.1 GB 2024-12-29 rus-Cyrl

ai-forever/ru-en-RoSBERTa

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 404.0M 1.5 GB 2024-07-29 rus-Cyrl
Citation
@misc{snegirev2024russianfocusedembeddersexplorationrumteb,
      title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
      author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
      year={2024},
      eprint={2408.12503},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2408.12503},
    }

ai-sage/Giga-Embeddings-instruct

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 2.0K 2.5B 9.4 GB 2024-12-13 eng-Latn, rus-Cyrl

annamodels/LGAI-Embedding-Preview

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2025-06-11 eng-Latn

bchoiced/CHAIN19

License: cc-by-sa-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2025-05-07 eng-Latn

bedrock/cohere-embed-english-v3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2023-11-02 eng-Latn

bedrock/cohere-embed-multilingual-v3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2023-11-02 afr-Latn, amh-Ethi, ara-Arab, asm-Beng, aze-Latn, ... (109)

castorini/repllama-v1-7b-lora-passage

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 7.0M 27.0 MB 2023-10-11 eng-Latn
Citation
@article{rankllama,
      title={Fine-Tuning LLaMA for Multi-Stage Text Retrieval},
      author={Xueguang Ma and Liang Wang and Nan Yang and Furu Wei and Jimmy Lin},
      year={2023},
      journal={arXiv:2310.08319},
}

deepvk/USER-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 427.0M 473.0 MB 2024-06-10 rus-Cyrl
Citation
@misc{deepvk2024user,
        title={USER: Universal Sentence Encoder for Russian},
        author={Malashenko, Boris and  Zemerov, Anton and Spirin, Egor},
        url={https://huggingface.co/datasets/deepvk/USER-base},
        publisher={Hugging Face}
        year={2024},
    }

deepvk/USER2-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 149.0M 568.0 MB 2025-04-19 rus-Cyrl

deepvk/USER2-small

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 384 34.4M 131.0 MB 2025-04-19 rus-Cyrl

google/gemini-embedding-001

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 3.1K not specified not specified 2025-03-07 arb-Arab, ben-Beng, deu-Latn, eng-Latn, fin-Latn, ... (19)

google/text-embedding-004

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 768 not specified not specified 2024-05-14 eng-Latn

google/text-embedding-005

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 768 not specified not specified 2024-11-18 eng-Latn

google/text-multilingual-embedding-002

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 768 not specified not specified 2024-05-14 arb-Arab, ben-Beng, deu-Latn, eng-Latn, fin-Latn, ... (19)

infly/inf-retriever-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 3.6K 7.1B 13.2 GB 2024-12-24 eng-Latn, zho-Hans

infly/inf-retriever-v1-1.5b

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 1.5K 1.5B 2.9 GB 2025-02-08 eng-Latn, zho-Hans

intfloat/e5-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 418.0 MB 2022-12-26 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/e5-base-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 418.0 MB 2024-02-08 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/e5-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2022-12-26 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/e5-large-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 335.0M 1.2 GB 2024-02-08 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/e5-mistral-7b-instruct

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 13.2 GB 2024-02-08 deu-Latn, eng-Latn, fra-Latn, ita-Latn, spa-Latn
Citation
    @article{wang2023improving,
      title={Improving Text Embeddings with Large Language Models},
      author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu},
      journal={arXiv preprint arXiv:2401.00368},
      year={2023}
    }

    @article{wang2022text,
      title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
      author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
      journal={arXiv preprint arXiv:2212.03533},
      year={2022}
    }

intfloat/e5-small

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.0M 127.0 MB 2024-02-08 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/e5-small-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.0M 127.0 MB 2024-02-08 eng-Latn
Citation
@article{wang2022text,
  title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2212.03533},
  year={2022}
}

intfloat/multilingual-e5-base

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2024-02-08 afr-Latn, amh-Latn, ara-Latn, asm-Latn, aze-Latn, ... (99)
Citation
@article{wang2024multilingual,
  title={Multilingual E5 Text Embeddings: A Technical Report},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2402.05672},
  year={2024}
}

intfloat/multilingual-e5-large

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 560.0M 2.1 GB 2024-02-08 afr-Latn, amh-Latn, ara-Latn, asm-Latn, aze-Latn, ... (99)
Citation
@article{wang2024multilingual,
  title={Multilingual E5 Text Embeddings: A Technical Report},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2402.05672},
  year={2024}
}

intfloat/multilingual-e5-large-instruct

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 560.0M 1.0 GB 2024-02-08 afr-Latn, amh-Latn, ara-Latn, asm-Latn, aze-Latn, ... (99)
Citation
@article{wang2024multilingual,
      title={Multilingual E5 Text Embeddings: A Technical Report},
      author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu},
      journal={arXiv preprint arXiv:2402.05672},
      year={2024}
    }

intfloat/multilingual-e5-small

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 118.0M 449.0 MB 2024-02-08 afr-Latn, amh-Latn, ara-Latn, asm-Latn, aze-Latn, ... (99)
Citation
@article{wang2024multilingual,
  title={Multilingual E5 Text Embeddings: A Technical Report},
  author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu},
  journal={arXiv preprint arXiv:2402.05672},
  year={2024}
}

jinaai/jina-embeddings-v3

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 572.0M 1.1 GB 2024-09-18 afr-Latn, amh-Latn, ara-Latn, asm-Latn, aze-Latn, ... (99)
Citation
    @misc{sturua2024jinaembeddingsv3multilingualembeddingstask,
      title={jina-embeddings-v3: Multilingual Embeddings With Task LoRA},
      author={Saba Sturua and Isabelle Mohr and Mohammad Kalim Akram and Michael Günther and Bo Wang and Markus Krimmel and Feng Wang and Georgios Mastrapas and Andreas Koukounas and Andreas Koukounas and Nan Wang and Han Xiao},
      year={2024},
      eprint={2409.10173},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2409.10173},
    }

jxm/cde-small-v1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 281.0M 1.0 GB 2024-09-24 eng-Latn

jxm/cde-small-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 306.0M 1.1 GB 2025-01-13 eng-Latn

llamaindex/vdr-2b-multi-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 1.5K 2.0B 4.1 GB 2024-01-08 deu-Latn, eng-Latn, fra-Latn, ita-Latn, spa-Latn

manveertamber/cadet-embed-base-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.0M 418.0 MB 2025-05-11 eng-Latn

mixedbread-ai/mxbai-embed-2d-large-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 335.0M not specified 2024-03-04 eng-Latn

mixedbread-ai/mxbai-embed-large-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 639.0 MB 2024-03-07 eng-Latn
Citation
    @online{emb2024mxbai,
      title={Open Source Strikes Bread - New Fluffy Embeddings Model},
      author={Sean Lee and Aamir Shakir and Darius Koenig and Julius Lipp},
      year={2024},
      url={https://www.mixedbread.ai/blog/mxbai-embed-large-v1},
    }

    @article{li2023angle,
      title={AnglE-optimized Text Embeddings},
      author={Li, Xianming and Li, Jing},
      journal={arXiv preprint arXiv:2309.12871},
      year={2023}
    }

mixedbread-ai/mxbai-embed-xsmall-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 24.1M not specified 2024-08-13 eng-Latn

nomic-ai/modernbert-embed-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 149.0M 568.0 MB 2024-12-29 eng-Latn

nomic-ai/nomic-embed-text-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 not specified 522.0 MB 2024-01-31 eng-Latn
Citation
@misc{nussbaum2024nomic,
      title={Nomic Embed: Training a Reproducible Long Context Text Embedder},
      author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar},
      year={2024},
      eprint={2402.01613},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

nomic-ai/nomic-embed-text-v1-ablated

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 not specified not specified 2024-01-15 eng-Latn

nomic-ai/nomic-embed-text-v1-unsupervised

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 not specified not specified 2024-01-15 eng-Latn

nomic-ai/nomic-embed-text-v1.5

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 137.0M 522.0 MB 2024-02-10 eng-Latn
Citation
@misc{nussbaum2024nomic,
      title={Nomic Embed: Training a Reproducible Long Context Text Embedder},
      author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar},
      year={2024},
      eprint={2402.01613},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

nvidia/NV-Embed-v1

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.8B 29.2 GB 2024-09-13 eng-Latn

nvidia/NV-Embed-v2

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.8B 14.6 GB 2024-09-09 eng-Latn

opensearch-project/opensearch-neural-sparse-encoding-doc-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 30.5K 133.0M 507.0 MB 2024-03-07 eng-Latn

opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 30.5K 67.0M 267.0 MB 2024-07-17 eng-Latn

opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 30.5K 22.7M 86.0 MB 2024-07-18 eng-Latn

opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 30.5K 67.0M 267.0 MB 2025-03-28 eng-Latn

opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 30.5K 137.4M 549.0 MB 2025-06-18 eng-Latn

samaya-ai/RepLLaMA-reproduced

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 7.0M 27.0 MB 2024-09-15 eng-Latn
Citation
@article{rankllama,
      title={Fine-Tuning LLaMA for Multi-Stage Text Retrieval},
      author={Xueguang Ma and Liang Wang and Nan Yang and Furu Wei and Jimmy Lin},
      year={2023},
      journal={arXiv:2310.08319},
}

samaya-ai/promptriever-llama2-7b-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 7.0B 27.0 MB 2024-09-15 eng-Latn
Citation
@article{weller2024promptriever,
      title={Promptriever: Instruction-Trained Retrievers Can Be Prompted Like Language Models},
      author={Orion Weller and Benjamin Van Durme and Dawn Lawrie and Ashwin Paranjape and Yuhao Zhang and Jack Hessel},
      year={2024},
      eprint={2409.11136},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2409.11136},
}

samaya-ai/promptriever-llama3.1-8b-instruct-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 8.0B 31.0 MB 2024-09-15 eng-Latn
Citation
@article{weller2024promptriever,
      title={Promptriever: Instruction-Trained Retrievers Can Be Prompted Like Language Models},
      author={Orion Weller and Benjamin Van Durme and Dawn Lawrie and Ashwin Paranjape and Yuhao Zhang and Jack Hessel},
      year={2024},
      eprint={2409.11136},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2409.11136},
}

samaya-ai/promptriever-llama3.1-8b-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 4.1K 8.0B 31.0 MB 2024-09-15 eng-Latn
Citation
@article{weller2024promptriever,
      title={Promptriever: Instruction-Trained Retrievers Can Be Prompted Like Language Models},
      author={Orion Weller and Benjamin Van Durme and Dawn Lawrie and Ashwin Paranjape and Yuhao Zhang and Jack Hessel},
      year={2024},
      eprint={2409.11136},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2409.11136},
}

samaya-ai/promptriever-mistral-v0.1-7b-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 4.1K 7.0B 27.0 MB 2024-09-15 eng-Latn
Citation
@article{weller2024promptriever,
      title={Promptriever: Instruction-Trained Retrievers Can Be Prompted Like Language Models},
      author={Orion Weller and Benjamin Van Durme and Dawn Lawrie and Ashwin Paranjape and Yuhao Zhang and Jack Hessel},
      year={2024},
      eprint={2409.11136},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2409.11136},
}

sergeyzh/BERTA

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 128.0M 489.0 MB 2025-03-10 rus-Cyrl

sergeyzh/rubert-mini-frida

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 312 32.3M 123.0 MB 2025-03-02 rus-Cyrl

voyageai/voyage-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.0K 1.0K not specified not specified 2023-10-29 not specified

voyageai/voyage-3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 1.0K not specified not specified 2024-09-18 not specified

voyageai/voyage-3-lite

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 512 not specified not specified 2024-09-18 not specified

voyageai/voyage-3-m-exp

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 2.0K 6.9B not specified 2025-01-08 eng-Latn

voyageai/voyage-3.5

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 1.0K not specified not specified 2025-01-21 not specified

voyageai/voyage-code-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
16.0K 1.5K not specified not specified 2024-01-23 not specified

voyageai/voyage-code-3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 1.0K not specified not specified 2024-12-04 not specified

voyageai/voyage-finance-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 1.0K not specified not specified 2024-05-30 not specified

voyageai/voyage-large-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
16.0K 1.5K not specified not specified 2023-10-29 not specified

voyageai/voyage-large-2-instruct

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
16.0K 1.0K not specified not specified 2024-05-05 not specified

voyageai/voyage-law-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
16.0K 1.0K not specified not specified 2024-04-15 not specified

voyageai/voyage-multilingual-2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.0K 1.0K not specified not specified 2024-06-10 not specified

yibinlei/LENS-d4000

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.0K 7.1B 26.5 GB 2025-01-17 not specified

yibinlei/LENS-d8000

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 8.0K 7.1B 26.5 GB 2025-01-17 not specified

zeta-alpha-ai/Zeta-Alpha-E5-Mistral

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 13.2 GB 2024-08-30 eng-Latn

Non-instruction Model

AITeamVN/Vietnamese_Embedding

License: cc-by-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-03-17 vie-Latn

Alibaba-NLP/gte-base-en-v1.5

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 137.0M not specified 2024-06-20 eng-Latn

Alibaba-NLP/gte-modernbert-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 149.0M 284.0 MB 2025-01-21 eng-Latn

Alibaba-NLP/gte-multilingual-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 305.0M 582.0 MB 2024-07-20 afr-Latn, ara-Arab, aze-Latn, bel-Cyrl, ben-Beng, ... (71)

BAAI/bge-en-icl

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 4.1K 7.1B 26.5 GB 2024-07-25 eng-Latn

BAAI/bge-m3

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-06-28 afr-Latn, amh-Ethi, ast-Latn, azj-Latn, azj-Latn, ... (29)

BAAI/bge-m3-unsupervised

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-01-30 afr-Latn, amh-Ethi, ast-Latn, azj-Latn, azj-Latn, ... (29)

BAAI/bge-multilingual-gemma2

License: https://ai.google.dev/gemma/terms

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 3.6K 9.2B 34.4 GB 2024-07-25 eng-Latn, fra-Latn, jpn-Jpan, jpn-Latn, kor-Hang, ... (7)

BAAI/bge-reranker-v2-m3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 2.1 GB 2024-06-24 ara-Arab, ben-Beng, dan-Latn, deu-Latn, eng-Latn, ... (32)
Citation
    @misc{li2023making,
      title={Making Large Language Models A Better Foundation For Dense Retrieval},
      author={Chaofan Li and Zheng Liu and Shitao Xiao and Yingxia Shao},
      year={2023},
      eprint={2312.15503},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
    }
    @misc{chen2024bge,
          title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
          author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
          year={2024},
          eprint={2402.03216},
          archivePrefix={arXiv},
          primaryClass={cs.CL}
    }

ByteDance/ListConRanker

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 401.0M 1.2 GB 2024-12-11 zho-Hans

Classical/Yinka

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 326.0M 1.2 GB 2024-01-09 zho-Hans

DMetaSoul/Dmeta-embedding-zh-small

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
1.0K 768 74.2M 283.0 MB 2024-03-25 zho-Hans

DMetaSoul/sbert-chinese-general-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 128 not specified not specified 2022-03-25 zho-Hans

DeepPavlov/distilrubert-small-cased-conversational

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 107.0M 408.0 MB 2022-06-28 rus-Cyrl
Citation
@misc{https://doi.org/10.48550/arxiv.2205.02340,
      doi = {10.48550/ARXIV.2205.02340},
      url = {https://arxiv.org/abs/2205.02340},
      author = {Kolesnikova, Alina and Kuratov, Yuri and Konovalov, Vasily and Burtsev, Mikhail},
      keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
      title = {Knowledge Distillation of Russian Language Models with Reduction of Vocabulary},
      publisher = {arXiv},
      year = {2022},
      copyright = {arXiv.org perpetual, non-exclusive license}
    }

DeepPavlov/rubert-base-cased

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 1.3B 4.8 GB 2020-03-04 rus-Cyrl
Citation
@misc{kuratov2019adaptationdeepbidirectionalmultilingual,
      title={Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language},
      author={Yuri Kuratov and Mikhail Arkhipov},
      year={2019},
      eprint={1905.07213},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/1905.07213},
    }

DeepPavlov/rubert-base-cased-sentence

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 107.0M 408.0 MB 2020-03-04 rus-Cyrl

Gameselo/STS-multilingual-mpnet-base-v2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2024-06-07 not specified

GreenNode/GreenNode-Embedding-Large-VN-Mixed-V1

License: cc-by-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-04-11 vie-Latn

GreenNode/GreenNode-Embedding-Large-VN-V1

License: cc-by-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 568.0M 2.1 GB 2024-04-11 vie-Latn

HIT-TMG/KaLM-embedding-multilingual-mini-v1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 896 494.0M 1.8 GB 2024-08-27 eng-Latn, zho-Hans

Haon-Chen/speed-embedding-7b-instruct

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K not specified 7.1B 13.2 GB 2024-10-31 eng-Latn

HooshvareLab/bert-base-parsbert-uncased

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 162.8M 621.0 MB 2021-05-19 fas-Arab

Hum-Works/lodestone-base-4096-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified 768 not specified not specified 2023-08-25 eng-Latn

Jaume/gemma-2b-embeddings

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 2.0K 2.5B 9.3 GB 2024-06-29 not specified

Lajavaness/bilingual-embedding-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2024-06-26 not specified

Lajavaness/bilingual-embedding-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 559.9M 2.1 GB 2024-06-24 eng-Latn, fra-Latn

Lajavaness/bilingual-embedding-small

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 117.7M 449.0 MB 2024-07-17 eng-Latn, fra-Latn

MCINext/Hakim

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 124.4M 475.0 MB 2025-05-10 fas-Arab

MCINext/Hakim-small

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 38.7M 148.0 MB 2025-05-10 fas-Arab

MCINext/Hakim-unsup

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 124.4M 475.0 MB 2025-05-10 fas-Arab

Mihaiii/Bulbasaur

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 17.4M 66.0 MB 2024-04-27 not specified

Mihaiii/Ivysaur

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 22.7M 87.0 MB 2024-04-27 not specified

Mihaiii/Squirtle

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 15.6M 60.0 MB 2024-04-30 not specified

Mihaiii/Venusaur

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 15.6M 60.0 MB 2024-04-29 not specified

Mihaiii/Wartortle

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 17.4M 66.0 MB 2024-04-30 not specified

Mihaiii/gte-micro

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 17.4M 66.0 MB 2024-04-21 not specified

Mihaiii/gte-micro-v4

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 19.2M 73.0 MB 2024-04-22 not specified

NbAiLab/nb-sbert-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
75.0 4.1K 1.8B 197.0 MB 2022-11-23 dan-Latn, nno-Latn, nob-Latn, swe-Latn

NeuML/pubmedbert-base-embeddings-100K

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 64 100.0K 0.0 MB 2025-01-03 eng-Latn

NeuML/pubmedbert-base-embeddings-1M

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 64 1.0M 2.0 MB 2025-01-03 eng-Latn

NeuML/pubmedbert-base-embeddings-2M

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 64 1.9M 7.0 MB 2025-01-03 eng-Latn

NeuML/pubmedbert-base-embeddings-500K

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 64 500.0K 2.0 MB 2025-01-03 eng-Latn

NeuML/pubmedbert-base-embeddings-8M

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 7.8M 30.0 MB 2025-01-03 eng-Latn

Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 135.2M 516.0 MB 2024-06-16 ara-Arab

Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 117.7M 449.0 MB 2024-06-25 ara-Arab

Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
768.0 768 135.0M 516.0 MB 2024-07-28 ara-Arab

Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2024-06-14 ara-Arab

Omartificial-Intelligence-Space/Arabic-labse-Matryoshka

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 470.9M 1.8 GB 2024-06-16 ara-Arab

Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 109.5M 418.0 MB 2024-06-15 ara-Arab

Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 162.8M 621.0 MB 2024-06-17 ara-Arab

OpenSearch-AI/Ops-MoA-Conan-embedding-v1

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.5K 343.0M 2.0 GB 2025-03-26 zho-Hans

OpenSearch-AI/Ops-MoA-Yuan-embedding-1.0

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.5K 343.0M 2.0 GB 2025-03-26 zho-Hans

OrdalieTech/Solon-embeddings-large-0.1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 559.9M 2.1 GB 2023-12-09 fra-Latn

OrlikB/KartonBERT-USE-base-v1

License: gpl-3.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 103.7M 396.0 MB 2024-09-30 pol-Latn

OrlikB/st-polish-kartonberta-base-alpha-v1

License: lgpl

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 not specified not specified 2023-11-12 pol-Latn

PartAI/Tooka-SBERT

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 353.0M 1.3 GB 2024-12-07 fas-Arab

PartAI/Tooka-SBERT-V2-Large

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 353.0M 1.3 GB 2025-05-01 fas-Arab

PartAI/Tooka-SBERT-V2-Small

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 122.9M 496.0 MB 2025-05-01 fas-Arab

PartAI/TookaBERT-Base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 122.9M 469.0 MB 2024-12-08 fas-Arab

Qodo/Qodo-Embed-1-1.5B

License: https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 1.5K 1.8B 6.6 GB 2025-02-19 c#-Code, c++-Code, go-Code, java-Code, javascript-Code, ... (9)

Qodo/Qodo-Embed-1-7B

License: https://huggingface.co/Qodo/Qodo-Embed-1-1.5B/blob/main/LICENSE

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
32.8K 3.6K 7.6B 28.4 GB 2025-02-24 c#-Code, c++-Code, go-Code, java-Code, javascript-Code, ... (9)

Shuu12121/CodeSearch-ModernBERT-Crow-Plus

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
1.0K 768 151.7M 607.0 MB 2025-04-21 eng-Latn

TencentBAC/Conan-embedding-v1

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 326.0M 1.2 GB 2024-08-22 zho-Hans

VoVanPhuc/sup-SimCSE-VietNamese-phobert-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
256.0 768 135.0M 517.0 MB 2021-05-26 vie-Latn

aari1995/German_Semantic_STS_V2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.7M 1.3 GB 2022-11-17 deu-Latn

abhinand/MedEmbed-small-v0.1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.4M 127.0 MB 2024-10-20 eng-Latn

ai-forever/sbert_large_mt_nlu_ru

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 427.0M 1.6 GB 2021-05-18 rus-Cyrl

ai-forever/sbert_large_nlu_ru

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 427.0M 1.6 GB 2020-11-20 rus-Cyrl

amazon/Titan-text-embeddings-v2

License: https://aws.amazon.com/service-terms/

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2024-04-30 eng-Latn

avsolatorio/GIST-Embedding-v0

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.5M 418.0 MB 2024-01-31 eng-Latn

avsolatorio/GIST-all-MiniLM-L6-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 22.7M 87.0 MB 2024-02-03 eng-Latn

avsolatorio/GIST-large-Embedding-v0

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.1M 1.2 GB 2024-02-14 eng-Latn

avsolatorio/GIST-small-Embedding-v0

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.4M 127.0 MB 2024-02-03 eng-Latn

avsolatorio/NoInstruct-small-Embedding-v0

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.4M 127.0 MB 2024-05-01 eng-Latn

bedrock/amazon-titan-embed-text-v1

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.5K not specified not specified 2023-09-27 not specified

bedrock/amazon-titan-embed-text-v2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K not specified not specified 2024-04-30 not specified

bigscience/sgpt-bloom-7b1-msmarco

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified 4.1K not specified not specified 2022-08-26 not specified

bkai-foundation-models/vietnamese-bi-encoder

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
256.0 768 135.0M 515.0 MB 2023-09-09 vie-Latn

bm25s

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2024-07-10 eng-Latn

brahmairesearch/slx-v0.1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 22.7M 87.0 MB 2024-08-13 eng-Latn

castorini/monobert-large-msmarco

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2020-05-28 eng-Latn

castorini/monot5-3b-msmarco-10k

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-03-28 eng-Latn
Citation
@misc{rosa2022parameterleftbehinddistillation,
      title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
      author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
      year={2022},
      eprint={2206.02873},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2206.02873},
    }

castorini/monot5-base-msmarco-10k

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-03-28 eng-Latn
Citation
@misc{rosa2022parameterleftbehinddistillation,
      title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
      author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
      year={2022},
      eprint={2206.02873},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2206.02873},
    }

castorini/monot5-large-msmarco-10k

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-03-28 eng-Latn
Citation
@misc{rosa2022parameterleftbehinddistillation,
      title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
      author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
      year={2022},
      eprint={2206.02873},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2206.02873},
    }

castorini/monot5-small-msmarco-10k

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-03-28 eng-Latn
Citation
@misc{rosa2022parameterleftbehinddistillation,
      title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval},
      author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira},
      year={2022},
      eprint={2206.02873},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2206.02873},
    }

codesage/codesage-base-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 1.0K 356.0M 1.3 GB 2024-02-03 go-Code, java-Code, javascript-Code, php-Code, python-Code, ... (6)

codesage/codesage-large-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 2.0K 1.3B 4.8 GB 2024-02-03 go-Code, java-Code, javascript-Code, php-Code, python-Code, ... (6)

codesage/codesage-small-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 1.0K 130.0M 496.0 MB 2024-02-03 go-Code, java-Code, javascript-Code, php-Code, python-Code, ... (6)

cointegrated/LaBSE-en-ru

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 129.0M 492.0 MB 2021-06-10 rus-Cyrl

cointegrated/rubert-tiny

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 312 11.9M 45.0 MB 2021-05-24 rus-Cyrl

cointegrated/rubert-tiny2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 312 29.4M 112.0 MB 2021-10-28 rus-Cyrl

colbert-ir/colbertv2.0

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
180.0 not specified 110.0M 418.0 MB 2024-09-21 eng-Latn

consciousAI/cai-lunaris-text-embeddings

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2023-06-22 not specified

consciousAI/cai-stellaris-text-embeddings

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 not specified not specified 2023-06-23 not specified

deepfile/embedder-100p

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 not specified 1.0 GB 2023-07-24 not specified

deepvk/USER-bge-m3

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 359.0M 1.3 GB 2024-07-05 rus-Cyrl

deepvk/deberta-v1-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 124.0M 473.0 MB 2023-02-07 rus-Cyrl

dunzhang/stella-large-zh-v3-1792d

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.8K not specified not specified 2024-02-17 zho-Hans

dunzhang/stella-mrl-large-zh-v3.5-1792d

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.8K 326.0M 1.2 GB 2024-02-27 zho-Hans

dwzhu/e5-base-4k

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K not specified not specified not specified 2024-03-28 eng-Latn

facebook/SONAR

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2021-05-21 ace-Arab, ace-Latn, acm-Arab, acq-Arab, aeb-Arab, ... (204)

facebook/contriever-msmarco

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 150.0M 572.0 MB 2022-06-25 eng-Latn
Citation
    @misc{izacard2021contriever,
      title={Unsupervised Dense Information Retrieval with Contrastive Learning},
      author={Gautier Izacard and Mathilde Caron and Lucas Hosseini and Sebastian Riedel and Piotr Bojanowski and Armand Joulin and Edouard Grave},
      year={2021},
      url = {https://arxiv.org/abs/2112.09118},
      doi = {10.48550/ARXIV.2112.09118},
    }

fangxq/XYZ-embedding

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 326.0M 1.2 GB 2024-09-13 zho-Hans

google/flan-t5-base

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 944.0 MB 2022-10-21 eng-Latn
Citation
@misc{10.48550/arxiv.2210.11416,
      doi = {10.48550/ARXIV.2210.11416},
      url = {https://arxiv.org/abs/2210.11416},
      author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason},
      keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
      title = {Scaling Instruction-Finetuned Language Models},
      publisher = {arXiv},
      year = {2022},
      copyright = {Creative Commons Attribution 4.0 International}
    }

google/flan-t5-large

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 2.9 GB 2022-10-21 eng-Latn
Citation
@misc{10.48550/arxiv.2210.11416,
      doi = {10.48550/ARXIV.2210.11416},
      url = {https://arxiv.org/abs/2210.11416},
      author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason},
      keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
      title = {Scaling Instruction-Finetuned Language Models},
      publisher = {arXiv},
      year = {2022},
      copyright = {Creative Commons Attribution 4.0 International}
    }

google/flan-t5-xl

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 10.6 GB 2022-10-21 eng-Latn
Citation
@misc{10.48550/arxiv.2210.11416,
      doi = {10.48550/ARXIV.2210.11416},
      url = {https://arxiv.org/abs/2210.11416},
      author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason},
      keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
      title = {Scaling Instruction-Finetuned Language Models},
      publisher = {arXiv},
      year = {2022},
      copyright = {Creative Commons Attribution 4.0 International}
    }

google/flan-t5-xxl

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 42.0 GB 2022-10-21 eng-Latn
Citation
@misc{10.48550/arxiv.2210.11416,
      doi = {10.48550/ARXIV.2210.11416},
      url = {https://arxiv.org/abs/2210.11416},
      author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason},
      keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
      title = {Scaling Instruction-Finetuned Language Models},
      publisher = {arXiv},
      year = {2022},
      copyright = {Creative Commons Attribution 4.0 International}
    }

hiieu/halong_embedding

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2024-07-06 vie-Latn

iampanda/zpoint_large_embedding_zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.8K 326.0M 1.2 GB 2024-06-04 zho-Hans

ibm-granite/granite-embedding-107m-multilingual

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 107.0M 204.0 MB 2024-12-18 ara-Latn, ces-Latn, deu-Latn, eng-Latn, fra-Latn, ... (13)

ibm-granite/granite-embedding-125m-english

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 125.0M 238.0 MB 2024-12-18 eng-Latn

ibm-granite/granite-embedding-278m-multilingual

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 278.0M 530.0 MB 2024-12-18 ara-Latn, ces-Latn, deu-Latn, eng-Latn, fra-Latn, ... (13)

ibm-granite/granite-embedding-30m-english

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 30.0M 58.0 MB 2024-12-18 eng-Latn

ibm-granite/granite-embedding-english-r2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 149.0M 284.0 MB 2025-08-15 eng-Latn

ibm-granite/granite-embedding-small-english-r2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 384 47.0M 91.0 MB 2025-08-15 eng-Latn

infgrad/stella-base-en-v2

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 not specified not specified not specified 2023-10-19 eng-Latn

infgrad/stella-base-zh-v3-1792d

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.8K not specified not specified 2024-02-17 zho-Hans

izhx/udever-bloom-1b1

License: https://huggingface.co/spaces/bigscience/license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-10-24 aka-Latn, ara-Arab, asm-Beng, bam-Latn, ben-Beng, ... (45)

izhx/udever-bloom-3b

License: https://huggingface.co/spaces/bigscience/license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-10-24 aka-Latn, ara-Arab, asm-Beng, bam-Latn, ben-Beng, ... (45)

izhx/udever-bloom-560m

License: https://huggingface.co/spaces/bigscience/license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-10-24 aka-Latn, ara-Arab, asm-Beng, bam-Latn, ben-Beng, ... (45)

izhx/udever-bloom-7b1

License: https://huggingface.co/spaces/bigscience/license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-10-24 aka-Latn, ara-Arab, asm-Beng, bam-Latn, ben-Beng, ... (45)

jhu-clsp/FollowIR-7B

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 13.5 GB 2024-04-29 eng-Latn
Citation
    @misc{weller2024followir,
      title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions},
      author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini},
      year={2024},
      eprint={2403.15246},
      archivePrefix={arXiv},
      primaryClass={cs.IR}
    }

jinaai/jina-colbert-v2

License: cc-by-nc-4.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K not specified 559.0M 1.0 GB 2024-08-16 ara-Arab, ben-Beng, deu-Latn, eng-Latn, fas-Arab, ... (22)

jinaai/jina-embedding-b-en-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 110.0M 420.0 MB 2023-07-07 eng-Latn

jinaai/jina-embedding-s-en-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 35.0M 134.0 MB 2023-07-07 eng-Latn

jinaai/jina-embeddings-v2-base-en

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 768 137.0M 262.0 MB 2023-09-27 eng-Latn

jinaai/jina-embeddings-v2-small-en

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 512 32.7M 62.0 MB 2023-09-27 eng-Latn

jinaai/jina-reranker-v2-base-multilingual

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified 531.0 MB 2024-09-26 eng-Latn

keeeeenw/MicroLlama-text-embedding

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 1.0K 272.0M 1.0 GB 2024-11-10 eng-Latn

lier007/xiaobu-embedding

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 326.0M 1.2 GB 2024-01-09 zho-Hans

lier007/xiaobu-embedding-v2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 326.0M 1.2 GB 2024-06-30 zho-Hans

lightonai/GTE-ModernColBERT-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K not specified 149.0M not specified 2025-04-30 eng-Latn

llmrails/ember-v1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.0M 1.2 GB 2023-10-10 eng-Latn

m3hrdadfi/bert-zwnj-wnli-mean-tokens

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 118.3M 451.0 MB 2021-06-28 fas-Arab

m3hrdadfi/roberta-zwnj-wnli-mean-tokens

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 118.3M 451.0 MB 2021-06-28 fas-Arab

malenia1/ternary-weight-embedding

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 98.7M 158.0 MB 2024-10-23 not specified

manu/bge-m3-custom-fr

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.0K 567.8M 2.1 GB 2024-04-11 not specified

manu/sentence_croissant_alpha_v0.2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 2.0K 1.3B 2.4 GB 2024-03-15 not specified

manu/sentence_croissant_alpha_v0.3

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 2.0K 1.3B 2.4 GB 2024-04-26 not specified

manu/sentence_croissant_alpha_v0.4

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 2.0K 1.3B 2.4 GB 2024-04-27 eng-Latn, fra-Latn

meta-llama/Llama-2-7b-chat-hf

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-07-18 eng-Latn
Citation
@misc{touvron2023llama2openfoundation,
      title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
      author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
      year={2023},
      eprint={2307.09288},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2307.09288},
    }

meta-llama/Llama-2-7b-hf

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-07-18 eng-Latn
Citation
@misc{touvron2023llama2openfoundation,
      title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
      author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
      year={2023},
      eprint={2307.09288},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2307.09288},
    }

minishlab/M2V_base_glove

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 102.0M 391.0 MB 2024-09-21 eng-Latn

minishlab/M2V_base_glove_subword

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 103.0M 391.0 MB 2024-09-21 eng-Latn

minishlab/M2V_base_output

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 7.6M 29.0 MB 2024-09-21 eng-Latn

minishlab/M2V_multilingual_output

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 128.0M 489.0 MB 2024-09-21 eng-Latn

minishlab/potion-base-2M

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 64 2.0M 7.0 MB 2024-10-29 eng-Latn

minishlab/potion-base-4M

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 128 3.8M 14.0 MB 2024-10-29 eng-Latn

minishlab/potion-base-8M

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 7.6M 29.0 MB 2024-10-29 eng-Latn

minishlab/potion-multilingual-128M

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
infP 256 128.0M 489.0 MB 2025-05-23 afr-Latn, amh-Ethi, ara-Arab, aze-Latn, bel-Cyrl, ... (101)

mistralai/Mistral-7B-Instruct-v0.2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2023-12-11 eng-Latn
Citation
@misc{jiang2023mistral7b,
      title={Mistral 7B},
      author={Albert Q. Jiang and Alexandre Sablayrolles and Arthur Mensch and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Florian Bressand and Gianna Lengyel and Guillaume Lample and Lucile Saulnier and Lélio Renard Lavaud and Marie-Anne Lachaux and Pierre Stock and Teven Le Scao and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
      year={2023},
      eprint={2310.06825},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2310.06825},
    }

moka-ai/m3e-base

License: https://huggingface.co/moka-ai/m3e-base#%F0%9F%93%9C-license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 102.0M 390.0 MB 2023-06-06 eng-Latn, zho-Hans

moka-ai/m3e-large

License: https://huggingface.co/moka-ai/m3e-base#%F0%9F%93%9C-license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 not specified not specified 2023-06-21 eng-Latn, zho-Hans

moka-ai/m3e-small

License: https://huggingface.co/moka-ai/m3e-base#%F0%9F%93%9C-license

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 512 not specified not specified 2023-06-02 eng-Latn, zho-Hans

myrkur/sentence-transformer-parsbert-fa

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 162.8M 621.0 MB 2024-12-10 fas-Arab

omarelshehy/arabic-english-sts-matryoshka

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 559.9M 2.1 GB 2024-10-13 ara-Arab, eng-Latn

openai/text-embedding-3-large

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 3.1K not specified not specified 2024-01-25 not specified

openai/text-embedding-3-large (embed_dim=512)

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 512 not specified not specified 2024-01-25 not specified

openai/text-embedding-3-small

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.5K not specified not specified 2024-01-25 not specified

openai/text-embedding-3-small (embed_dim=512)

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 512 not specified not specified 2024-01-25 not specified

openai/text-embedding-ada-002

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
8.2K 1.5K not specified not specified 2022-12-15 not specified

openbmb/MiniCPM-Embedding

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 2.3K 2.7B 5.1 GB 2024-09-04 eng-Latn, zho-Hans

panalexeu/xlm-roberta-ua-distilled

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 278.0M 1.0 GB 2025-04-15 eng-Latn, ukr-Cyrl

prdev/mini-gte

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 66.3M 253.0 MB 2025-01-28 eng-Latn

richinfoai/ritrieve_zh_v1

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.8K 326.0M 1.2 GB 2025-03-25 zho-Hans

sbunlp/fabert

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 124.4M 475.0 MB 2024-10-07 fas-Arab

sdadas/mmlw-e5-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 278.0M 1.0 GB 2023-11-17 pol-Latn

sdadas/mmlw-e5-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 559.9M 2.1 GB 2023-11-17 pol-Latn

sdadas/mmlw-e5-small

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 117.7M 449.0 MB 2023-11-17 pol-Latn

sdadas/mmlw-roberta-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 768 124.4M 475.0 MB 2023-11-17 pol-Latn

sdadas/mmlw-roberta-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
514.0 1.0K 435.0M 1.6 GB 2023-11-17 pol-Latn

sensenova/piccolo-base-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 not specified not specified 2023-09-04 zho-Hans

sensenova/piccolo-large-zh-v2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K not specified not specified 2024-04-22 zho-Hans

sentence-transformers/LaBSE

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 471.0M 1.8 GB 2019-11-01 ara-Arab, bul-Cyrl, cat-Latn, ces-Latn, dan-Latn, ... (53)
Citation
@misc{feng2022languageagnosticbertsentenceembedding,
      title={Language-agnostic BERT Sentence Embedding},
      author={Fangxiaoyu Feng and Yinfei Yang and Daniel Cer and Naveen Arivazhagan and Wei Wang},
      year={2022},
      eprint={2007.01852},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2007.01852},
    }

sentence-transformers/all-MiniLM-L12-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
256.0 384 33.4M 127.0 MB 2021-08-30 eng-Latn
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/all-MiniLM-L6-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
256.0 384 22.7M 87.0 MB 2021-08-30 eng-Latn
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/all-mpnet-base-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
384.0 768 109.0M 418.0 MB 2021-08-30 eng-Latn
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/gtr-t5-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 110.0M 209.0 MB 2022-02-09 eng-Latn

sentence-transformers/gtr-t5-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 335.0M 639.0 MB 2022-02-09 eng-Latn

sentence-transformers/gtr-t5-xl

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 1.2B 2.3 GB 2022-02-09 eng-Latn

sentence-transformers/gtr-t5-xxl

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 4.9B 9.1 GB 2022-02-09 eng-Latn

sentence-transformers/multi-qa-MiniLM-L6-cos-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 22.7M 87.0 MB 2021-08-30 eng-Latn
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 118.0M 449.0 MB 2019-11-01 ara-Arab, bul-Cyrl, cat-Latn, ces-Latn, dan-Latn, ... (53)
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/paraphrase-multilingual-mpnet-base-v2

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 278.0M 1.0 GB 2019-11-01 ara-Arab, bul-Cyrl, cat-Latn, ces-Latn, dan-Latn, ... (53)
Citation
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}

sentence-transformers/sentence-t5-base

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 110.0M 209.0 MB 2022-02-09 eng-Latn

sentence-transformers/sentence-t5-large

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 335.0M 639.0 MB 2022-02-09 eng-Latn

sentence-transformers/sentence-t5-xl

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 3.0B 2.3 GB 2024-03-27 eng-Latn

sentence-transformers/sentence-t5-xxl

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 11.0B 9.1 GB 2024-03-27 eng-Latn

sentence-transformers/static-similarity-mrl-multilingual-v1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified 1.0K 108.4M 413.0 MB 2025-01-15 ara-Arab, bul-Cyrl, cat-Latn, ces-Latn, dan-Latn, ... (49)

sergeyzh/LaBSE-ru-turbo

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 129.0M 490.0 MB 2024-06-27 rus-Cyrl

sergeyzh/rubert-tiny-turbo

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
2.0K 312 29.2M 111.0 MB 2024-06-21 rus-Cyrl

shibing624/text2vec-base-chinese

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 102.0M 390.0 MB 2022-01-23 zho-Hans

shibing624/text2vec-base-chinese-paraphrase

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 118.0M 450.0 MB 2023-06-19 zho-Hans

shibing624/text2vec-base-multilingual

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 117.7M 449.0 MB 2023-06-22 deu-Latn, eng-Latn, fra-Latn, ita-Latn, nld-Latn, ... (9)

silma-ai/silma-embeddding-matryoshka-v0.1

License: apache-2.0

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 135.2M 516.0 MB 2024-10-12 ara-Arab, eng-Latn

thenlper/gte-base

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 768 109.5M 209.0 MB 2023-07-27 eng-Latn

thenlper/gte-base-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 102.0M 195.0 MB 2023-11-08 zho-Hans

thenlper/gte-large

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 335.1M 639.0 MB 2023-07-27 eng-Latn

thenlper/gte-large-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 326.0M 621.0 MB 2023-11-08 zho-Hans

thenlper/gte-small

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 384 33.4M 64.0 MB 2023-07-27 eng-Latn

thenlper/gte-small-zh

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
512.0 1.0K 30.3M 58.0 MB 2023-11-08 zho-Hans

unicamp-dl/mt5-13b-mmarco-100k

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-11-04 afr-Latn, amh-Ethi, ara-Arab, aze-Latn, bel-Cyrl, ... (101)

unicamp-dl/mt5-base-mmarco-v2

License: not specified

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
not specified not specified not specified not specified 2022-01-05 afr-Latn, amh-Ethi, ara-Arab, aze-Latn, bel-Cyrl, ... (101)
Citation
@misc{bonifacio2021mmarco,
      title={mMARCO: A Multilingual Version of MS MARCO Passage Ranking Dataset},
      author={Luiz Henrique Bonifacio and Vitor Jeronymo and Hugo Queiroz Abonizio and Israel Campiotti and Marzieh Fadaee and  and Roberto Lotufo and Rodrigo Nogueira},
      year={2021},
      eprint={2108.13897},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
    }

w601sxs/b1ade-embed

License: mit

Max Tokens Embedding dimension Parameters Required Memory (Mb) Release date Languages
4.1K 1.0K 335.0M 1.2 GB 2025-03-10 eng-Latn