Skip to content

Multimodal Model

2 Models

Instruction Model

Qwen/Qwen3-VL-Embedding-2B

License: apache-2.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
2.1B 2048 32.8K 7.5 GB 2026-01-08 eng-Latn
Citation
@article{qwen3vlembedding,
  title={Qwen3-VL-Embedding and Qwen3-VL-Reranker: A Unified Framework for State-of-the-Art Multimodal Retrieval and Ranking},
  author={Li, Mingxin and Zhang, Yanzhao and Long, Dingkun and Chen Keqin and Song, Sibo and Bai, Shuai and Yang, Zhibo and Xie, Pengjun and Yang, An and Liu, Dayiheng and Zhou, Jingren and Lin, Junyang},
  journal={arXiv preprint arXiv:2601.04720},
  year={2026}
}

Qwen/Qwen3-VL-Embedding-8B

License: apache-2.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
8.1B 4096 32.8K 29.8 GB 2026-01-08 eng-Latn
Citation
@article{qwen3vlembedding,
  title={Qwen3-VL-Embedding and Qwen3-VL-Reranker: A Unified Framework for State-of-the-Art Multimodal Retrieval and Ranking},
  author={Li, Mingxin and Zhang, Yanzhao and Long, Dingkun and Chen Keqin and Song, Sibo and Bai, Shuai and Yang, Zhibo and Xie, Pengjun and Yang, An and Liu, Dayiheng and Zhou, Jingren and Lin, Junyang},
  journal={arXiv preprint arXiv:2601.04720},
  year={2026}
}

zhibinlan/UME-R1-2B

License: apache-2.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
2.2B 1536 32.8K 8.2 GB 2025-11-10 eng-Latn
Citation
@article{lan2025ume,
  title={UME-R1: Exploring Reasoning-Driven Generative Multimodal Embeddings},
  author={Lan, Zhibin and Niu, Liqiang and Meng, Fandong and Zhou, Jie and Su, Jinsong},
  journal={arXiv preprint arXiv:2511.00405},
  year={2025}
}

zhibinlan/UME-R1-7B

License: apache-2.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
8.3B 3584 32.8K 30.9 GB 2025-11-10 eng-Latn
Citation
@article{lan2025ume,
  title={UME-R1: Exploring Reasoning-Driven Generative Multimodal Embeddings},
  author={Lan, Zhibin and Niu, Liqiang and Meng, Fandong and Zhou, Jie and Su, Jinsong},
  journal={arXiv preprint arXiv:2511.00405},
  year={2025}
}

Non-instruction Model

VLM2Vec/VLM2Vec-V2.0

License: apache-2.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
2.2B 1536 32.8K 4.1 GB 2025-04-30 eng-Latn
Citation
@misc{meng2025vlm2vecv2advancingmultimodalembedding,
    title={VLM2Vec-V2: Advancing Multimodal Embedding for Videos, Images, and Visual Documents},
    author={Rui Meng and Ziyan Jiang and Ye Liu and Mingyi Su and Xinyi Yang and Yuepeng Fu and Can Qin and Zeyuan Chen and Ran Xu and Caiming Xiong and Yingbo Zhou and Wenhu Chen and Semih Yavuz},
    year={2025},
    eprint={2507.04590},
    archivePrefix={arXiv},
    primaryClass={cs.CV},
    url={https://arxiv.org/abs/2507.04590},
}

encord-team/ebind-points-vision

License: cc-by-nc-sa-4.0 • Learn more →

Parameters Emb. Dim Max Tokens Memory Released Languages
1.7B 1024 512 6.3 GB 2025-11-19 eng-Latn
Citation
@misc{broadbent2025ebindpracticalapproachspace,
      title={{EBind}: a practical approach to space binding},
      author={Jim Broadbent and Felix Cohen and Frederik Hvilshøj and Eric Landau and Eren Sasoglu},
      year={2025},
      eprint={2511.14229},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2511.14229},
}