Image Model¶
21 Models
Non-instruction Model¶
facebook/dinov2-base¶
License: apache-2.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 86.6M | 768 | not specified | 330.0 MB | 2023-07-18 | eng-Latn |
Citation
@misc{oquab2023dinov2,
title={DINOv2: Learning Robust Visual Features without Supervision},
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
year={2023},
eprint={2304.07193},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/dinov2-giant¶
License: apache-2.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 1.1B | 1536 | not specified | 4.2 GB | 2023-07-18 | eng-Latn |
Citation
@misc{oquab2023dinov2,
title={DINOv2: Learning Robust Visual Features without Supervision},
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
year={2023},
eprint={2304.07193},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/dinov2-large¶
License: apache-2.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 304.4M | 1024 | not specified | 1.1 GB | 2023-07-18 | eng-Latn |
Citation
@misc{oquab2023dinov2,
title={DINOv2: Learning Robust Visual Features without Supervision},
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
year={2023},
eprint={2304.07193},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/dinov2-small¶
License: apache-2.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 22.1M | 384 | not specified | 84.0 MB | 2023-07-18 | eng-Latn |
Citation
@misc{oquab2023dinov2,
title={DINOv2: Learning Robust Visual Features without Supervision},
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
year={2023},
eprint={2304.07193},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino1b-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 1.1B | 1536 | not specified | 4.2 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino2b-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.1B | 2688 | not specified | 7.8 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino2b-heavy2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.1B | 2688 | not specified | 7.8 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino2b-light2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.1B | 2688 | not specified | 7.8 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino300m-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 303.7M | 1024 | not specified | 1.1 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino3b-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.9B | 3072 | not specified | 11.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino3b-heavy2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.9B | 3072 | not specified | 11.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino3b-light2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 2.9B | 3072 | not specified | 11.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino5b-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 4.9B | 3584 | not specified | 18.4 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino7b-full8b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 6.5B | 4096 | not specified | 24.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino7b-full8b-378¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 6.5B | 4096 | not specified | 24.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-dino7b-full8b-518¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 6.5B | 4096 | not specified | 24.0 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-mae1b-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 1.1B | 1536 | not specified | 4.2 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-mae300m-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 304.4M | 1024 | not specified | 1.1 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
facebook/webssl-mae700m-full2b-224¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 632.4M | 1280 | not specified | 2.4 GB | 2025-04-24 | eng-Latn |
Citation
@article{fan2025scaling,
title={Scaling Language-Free Visual Representation Learning},
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
year={2025},
eprint={2504.01017},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
nyu-visionx/moco-v3-vit-b¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 86.6M | 768 | not specified | 330.0 MB | 2024-06-03 | eng-Latn |
Citation
@Article{chen2021mocov3,
author = {Xinlei Chen* and Saining Xie* and Kaiming He},
title = {An Empirical Study of Training Self-Supervised Vision Transformers},
journal = {arXiv preprint arXiv:2104.02057},
year = {2021},
}
nyu-visionx/moco-v3-vit-l¶
License: cc-by-nc-4.0 • Learn more →
| Parameters | Emb. Dim | Max Tokens | Memory | Released | Languages |
|---|---|---|---|---|---|
| 304.0M | 1024 | not specified | 1.1 GB | 2024-06-03 | eng-Latn |
Citation
@Article{chen2021mocov3,
author = {Xinlei Chen* and Saining Xie* and Kaiming He},
title = {An Empirical Study of Training Self-Supervised Vision Transformers},
journal = {arXiv preprint arXiv:2104.02057},
year = {2021},
}