@inproceedings{7f8ba5f6920d4004a88d367c311ee534,
title = "BirdSAT: Cross-View Contrastive Masked Autoencoders for Bird Species Classification and Mapping",
abstract = "We propose a metadata-aware self-supervised learning (SSL) framework useful for fine-grained classification and ecological mapping of bird species around the world. Our framework unifies two SSL strategies: Contrastive Learning (CL) and Masked Image Modeling (MIM), while also enriching the embedding space with metadata available with ground-level imagery of birds. We separately train uni-modal and cross-modal ViT on a novel cross-view global bird species dataset containing ground-level imagery, metadata (location, time), and corresponding satellite imagery. We demonstrate that our models learn fine-grained and geographically conditioned features of birds, by evaluating on two downstream tasks: fine-grained visual classification (FGVC) and cross-modal retrieval. Pre-trained models learned using our framework achieve SotA performance on FGVC of iNAT-2021 birds and in transfer learning settings for CUB-200-2011 and NABirds datasets. Moreover, the impressive cross-modal retrieval performance of our model enables the creation of species distribution maps across any geographic region. The dataset and source code will be released at https://github.com/mvrl/BirdSAT.",
keywords = "Algorithms, Animals / Insects, Applications, Applications, Image recognition and understanding, Remote Sensing",
author = "Srikumar Sastry and Subash Khanal and Aayush Dhakal and Di Huang and Nathan Jacobs",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024 ; Conference date: 04-01-2024 Through 08-01-2024",
year = "2024",
month = jan,
day = "3",
doi = "10.1109/WACV57701.2024.00698",
language = "English",
series = "Proceedings - 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "7121--7130",
booktitle = "Proceedings - 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024",
}