@article{bb250700,
AUTHOR = "Li, F.L. and Wang, Z.Q. and Wang, T.S. and Zhu, L. and Chang, X.J.",
TITLE = "Generative Augmentation Hashing for Few-Shot Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12861-12873",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245535"}
@article{bb250701,
AUTHOR = "Gu, L.C. and Shen, X.J. and Sun, J. and Liu, Y. and Li, J. and Li, Z.H. and Cheung, S.C.S. and Wan, W.B.",
TITLE = "Dual Prototypes-Based Personalized Federated Adversarial Cross-Modal
Hashing",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12846-12860",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245536"}
@article{bb250702,
AUTHOR = "Ma, Q. and Jiang, Y. and Bai, C.",
TITLE = "Boundary mutual information hashing for cross-modal retrieval",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112658",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245537"}
@article{bb250703,
AUTHOR = "Wei, Y.H. and An, J.F.",
TITLE = "Flexible Dual Multi-Modal Hashing for Incomplete Multi-Modal Retrieval",
JOURNAL = IJIG,
VOLUME = "26",
YEAR = "2026",
NUMBER = "3",
MONTH = "May",
PAGES = "2650021",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245538"}
@article{bb250704,
AUTHOR = "Debnath, A. and Rao, K.S. and Das, P.P.",
TITLE = "Multi-Similarity Checking-Based Spoken Content Video Retrieval Using
Enhanced Mayfly Optimization-Based Weighted Feature Selection",
JOURNAL = IJIG,
VOLUME = "26",
YEAR = "2026",
NUMBER = "3",
MONTH = "May",
PAGES = "2650017",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245539"}
@article{bb250705,
AUTHOR = "Tang, L. and Wang, L. and Wang, G. and Sun, M.Y. and Yan, F.",
TITLE = "Feature Fusion Mamba Hashing via Decoupling for Cross-Modal Retrieval",
JOURNAL = SPLetters,
VOLUME = "33",
YEAR = "2026",
PAGES = "609-613",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245540"}
@article{bb250706,
AUTHOR = "Tan, W.T. and Li, F.L. and Zhu, L. and Guan, W. and Li, J.J. and Cheng, Z.Y. and Shen, H.T.",
TITLE = "Dynamic Bit-Wise Semantic Transformer Hashing for Multi-Modal
Retrieval",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "3",
MONTH = "March",
PAGES = "2954-2969",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245541"}
@article{bb250707,
AUTHOR = "Li, H.F. and Zhao, J. and Zhang, Y.F. and Wen, J.",
TITLE = "Bidirectional Cross-Modal Collaborative Alignment via Semantic-Guided
Visual Embeddings for Partially Relevant Video Retrieval",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "1423-1435",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245542"}
@article{bb250708,
AUTHOR = "Jin, M. and Hong, R.C.",
TITLE = "MDA-MAA: A Collaborative Augmentation Approach for Generalizing
Cross-Domain Retrieval",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "1595-1606",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245543"}
@article{bb250709,
AUTHOR = "Ibrahim, M.I.M.A. and Niu, D. and Cai, T. and Li, L. and Ahmad, B.",
TITLE = "DSAC-Hash: Distribution-Similarity-Aware Cross-modal Hashing",
JOURNAL = IVC,
VOLUME = "168",
YEAR = "2026",
PAGES = "105926",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245544"}
@article{bb250710,
AUTHOR = "Shu, X. and Guo, Y.K. and Ren, S.G.",
TITLE = "Adversarial supervised contrastive feature learning for cross-modal
retrieval",
JOURNAL = PR,
VOLUME = "176",
YEAR = "2026",
PAGES = "113256",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245545"}
@article{bb250711,
AUTHOR = "Shu, Z.Q. and Zhang, J. and Yu, Z.T.",
TITLE = "Adaptive centroid guided hashing for cross-modal retrieval",
JOURNAL = PR,
VOLUME = "176",
YEAR = "2026",
PAGES = "113186",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245546"}
@inproceedings{bb250712,
AUTHOR = "Gizdov, A. and Ullman, S. and Harari, D.",
TITLE = "Seeing more with less: human-like representations in vision models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "4408-4417",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245547"}
@inproceedings{bb250713,
AUTHOR = "Liu, Y.K. and Zhang, Y.J. and Cai, J.Y. and Jiang, X.L. and Hu, Y. and Yao, J.C. and Wang, Y.F. and Xie, W.",
TITLE = "LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "4015-4025",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245548"}
@inproceedings{bb250714,
AUTHOR = "Chen, W. and Li, L. and Yang, Y.Q. and Wen, B. and Yang, F. and Gao, T.T. and Wu, Y. and Chen, L.",
TITLE = "CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal
Understanding and Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8073-8082",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245549"}
@inproceedings{bb250715,
AUTHOR = "Kim, S. and Zhu, X.L. and Lin, X.F. and Bastan, M. and Gray, D. and Kwak, S.",
TITLE = "GENIUS: A Generative Framework for Universal Multimodal Search",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19659-19669",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245550"}
@inproceedings{bb250716,
AUTHOR = "Duan, S.Y. and Sun, Y. and Peng, D.Z. and Liu, Z. and Song, X.M. and Hu, P.",
TITLE = "Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "20747-20756",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245551"}
@inproceedings{bb250717,
AUTHOR = "Hur, C. and Hong, J.H. and Lee, D.H. and Kang, D. and Myeong, S. and Park, S.H. and Park, H.",
TITLE = "Narrating the Video: Boosting Text-Video Retrieval via Comprehensive
Utilization of Frame-Level Captions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24077-24086",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245552"}
@inproceedings{bb250718,
AUTHOR = "Zha, Q.X. and Liu, X. and Peng, S.J. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
TITLE = "ReCon: Enhancing True Correspondence Discrimination through Relation
Consistency for Robust Noisy Correspondence Learning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29680-29689",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245553"}
@inproceedings{bb250719,
AUTHOR = "Cui, Y.H. and Zu, X.X. and Zhang, W.H. and Zhao, Z.Z. and Gao, J.Y.",
TITLE = "Incorporating Dense Knowledge Alignment into Unified Multimodal
Representation Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29733-29743",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245554"}
@inproceedings{bb250720,
AUTHOR = "Lin, Z.R. and Wang, Z. and Qian, T.W. and Mu, P. and Chan, S. and Bai, C.",
TITLE = "NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9263-9273",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245555"}
@inproceedings{bb250721,
AUTHOR = "Zhang, X. and Zhang, Y.Z. and Xie, W. and Li, M.X. and Dai, Z.Q. and Long, D.K. and Xie, P.J. and Zhang, M. and Li, W.J. and Zhang, M.",
TITLE = "Bridging Modalities: Improving Universal Multimodal Retrieval by
Multimodal Large Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9274-9285",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245556"}
@inproceedings{bb250722,
AUTHOR = "Zhao, S. and Xia, Q.M. and Guo, X. and Zou, P. and Zheng, M. and Wu, H. and Wen, C. and Wang, C.",
TITLE = "SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate
Cross-Modal Semantic Prompts",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29374-29384",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245557"}
@inproceedings{bb250723,
AUTHOR = "Wei, C. and Chen, Y. and Chen, H.N. and Hu, H.X. and Zhang, G. and Fu, J. and Ritter, A. and Chen, W.",
TITLE = "UNIIR: Training and Benchmarking Universal Multimodal Information
Retrievers",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXVII: 387-404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245558"}
@inproceedings{bb250724,
AUTHOR = "Chen, S.J. and Chen, X. and Zhang, C. and Li, M.S. and Yu, G. and Fei, H. and Zhu, H.Y. and Fan, J.Y. and Chen, T.",
TITLE = "LL3DA: Visual Interactive Instruction Tuning for Omni-3D
Understanding, Reasoning, and Planning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26418-26428",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245559"}
@inproceedings{bb250725,
AUTHOR = "Xu, H.R. and Peng, P.X. and Tan, G. and Li, Y. and Xu, X.H. and Tian, Y.H.",
TITLE = "DMR: Decomposed Multi-Modality Representations for Frames and Events
Fusion in Visual Reinforcement Learning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26498-26508",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245560"}
@inproceedings{bb250726,
AUTHOR = "You, C.Y. and Mint, Y.F. and Dai, W.C. and Sekhon, J.S. and Staib, L. and Duncan, J.S.",
TITLE = "Calibrating Multi-modal Representations:
A Pursuit of Group Robustness without Annotations",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26140-26150",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245561"}
@inproceedings{bb250727,
AUTHOR = "Zhang, Z.H. and Cao, S.C. and Wang, Y.X.",
TITLE = "TAMM: TriAdapter Multi-Modal Learning for 3D Shape Understanding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "21413-21423",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245562"}
@inproceedings{bb250728,
AUTHOR = "Zhao, Z. and Chen, M.X. and Dai, T.J. and Yao, J.C. and Han, B. and Zhang, Y. and Wang, Y.F.",
TITLE = "Mitigating Noisy Correspondence by Geometrical Structure Consistency
Learning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "27371-27380",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245563"}
@inproceedings{bb250729,
AUTHOR = "Tuzcuoglu, O. and Koksal, A. and Sofu, B. and Kalkan, S. and Alatan, A.A.",
TITLE = "XoFTR: Cross-modal Feature Matching Transformer",
BOOKTITLE = IMW24,
YEAR = "2024",
PAGES = "4275-4286",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245564"}
@inproceedings{bb250730,
AUTHOR = "Wu, J.L. and Hu, X. and Wang, Y.Q. and Pang, B. and Soricut, R.",
TITLE = "Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture
of Low-Rank Experts",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14205-14215",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245565"}
@inproceedings{bb250731,
AUTHOR = "Sun, Q. and Cui, Y.F. and Zhang, X.S. and Zhang, F. and Yu, Q. and Wang, Y.Z. and Rao, Y.M. and Liu, J.J. and Huang, T.J. and Wang, X.L.",
TITLE = "Generative Multimodal Models are In-Context Learners",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14398-14409",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245566"}
@inproceedings{bb250732,
AUTHOR = "Zhao, S.T. and Li, Z.W. and Lu, Y.D. and Yuille, A.L. and Wang, Y.",
TITLE = "Causal-CoG: A Causal-Effect Look at Context Generation for Boosting
Multi-Modal Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13342-13351",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245567"}
@inproceedings{bb250733,
AUTHOR = "Li, Z. and Yang, B. and Liu, Q. and Ma, Z.Y. and Zhang, S. and Yang, J.X. and Sun, Y. and Liu, Y.L. and Bai, X.",
TITLE = "Monkey: Image Resolution and Text Label are Important Things for
Large Multi-Modal Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26753-26763",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245568"}
@inproceedings{bb250734,
AUTHOR = "Han, H.C. and Zheng, Q.H. and Dai, G. and Luo, M. and Wang, J.D.",
TITLE = "Learning to Rematch Mismatched Pairs for Robust Cross-Modal Retrieval",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26669-26678",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245569"}
@inproceedings{bb250735,
AUTHOR = "Yuan, J.L. and Yu, Y. and Mittal, G. and Hall, M. and Sajeev, S. and Chen, M.",
TITLE = "Rethinking Multimodal Content Moderation from an Asymmetric Angle
with Mixed-modality",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "8517-8527",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245570"}
@inproceedings{bb250736,
AUTHOR = "Shoshan, A. and Linial, O. and Bhonker, N. and Hirsch, E. and Zamir, L. and Kviatkovsky, I. and Medioni, G.",
TITLE = "Asymmetric Image Retrieval with Cross Model Compatible Ensembles",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "1-11",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245571"}
@inproceedings{bb250737,
AUTHOR = "Honig, R. and Ackermann, J. and Chi, M.Y.",
TITLE = "Bi-Encoder Cascades for Efficient Image Search",
BOOKTITLE = REDLCV23,
YEAR = "2023",
PAGES = "1350-1355",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245572"}
@inproceedings{bb250738,
AUTHOR = "Cao, Y.C. and Tang, Q.F. and Yang, F. and Su, X. and You, S. and Lu, X.B. and Xu, C.",
TITLE = "Re-mine, Learn and Reason: Exploring the Cross-modal Semantic
Correlations for Language-guided HOI detection",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "23435-23446",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245573"}
@inproceedings{bb250739,
AUTHOR = "Trinci, T. and Bianconcini, T. and Sarti, L. and Taccari, L. and Sambo, F.",
TITLE = "Cross-model temporal cooperation via saliency maps for efficient
frame classification",
BOOKTITLE = REDLCV23,
YEAR = "2023",
PAGES = "1156-1160",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245574"}
@inproceedings{bb250740,
AUTHOR = "Long, T. and van Noord, N.",
TITLE = "Cross-modal Scalable Hyperbolic Hierarchical Clustering",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "16609-16618",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245575"}
@inproceedings{bb250741,
AUTHOR = "Li, H. and Li, X.Y. and Hu, P.B. and Lei, Y. and Li, C.X. and Zhou, Y.",
TITLE = "Boosting Multi-modal Model Performance with Adaptive Gradient
Modulation",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "22157-22167",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245576"}
@inproceedings{bb250742,
AUTHOR = "Zhao, L.J. and Wang, Y. and Kato, J.",
TITLE = "Using Classifier Discrepancy for Cross-Domain Image Retrieval",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "3314-3318",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245577"}
@inproceedings{bb250743,
AUTHOR = "Era, Y. and Togo, R. and Maeda, K. and Ogawa, T. and Haseyama, M.",
TITLE = "Video-Music Retrieval with Fine-Grained Cross-Modal Alignment",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "2005-2009",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245578"}
@inproceedings{bb250744,
AUTHOR = "Yu, Y. and Chung, J. and Yun, H. and Hessel, J. and Park, J.S. and Lu, X.M. and Zellers, R. and Ammanabrolu, P. and Le Bras, R. and Kim, G. and Choi, Y.",
TITLE = "Fusing Pre-Trained Language Models with Multimodal Prompts through
Reinforcement Learning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "10845-10856",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245579"}
@inproceedings{bb250745,
AUTHOR = "Huang, S. and Gong, B. and Pan, Y.L. and Jiang, J.W. and Lv, Y.L. and Li, Y.Y. and Wang, D.L.",
TITLE = "VoP: Text-Video Co-Operative Prompt Tuning for Cross-Modal Retrieval",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6565-6574",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245580"}
@inproceedings{bb250746,
AUTHOR = "Chen, M.X. and Xing, L.Y. and Wang, Y. and Zhang, X.",
TITLE = "Enhanced Multimodal Representation Learning with Cross-Modal KD",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11766-11775",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245581"}
@inproceedings{bb250747,
AUTHOR = "Yang, S. and Xu, Z. and Wang, K. and You, Y. and Yao, H.X. and Liu, T.L. and Xu, M.",
TITLE = "BiCro: Noisy Correspondence Rectification for Multi-modality Data via
Bi-directional Cross-modal Similarity Consistency",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "19883-19892",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245582"}
@inproceedings{bb250748,
AUTHOR = "Kim, D. and Kim, N. and Kwak, S.",
TITLE = "Improving Cross-Modal Retrieval with Set of Diverse Embeddings",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23422-23431",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245583"}
@inproceedings{bb250749,
AUTHOR = "Kim, J.M. and Koepke, A.S. and Schmid, C. and Akata, Z.",
TITLE = "Exposing and Mitigating Spurious Correlations for Cross-Modal
Retrieval",
BOOKTITLE = MULA23,
YEAR = "2023",
PAGES = "2585-2595",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245584"}
@inproceedings{bb250750,
AUTHOR = "Tran, V. and Balasubramanian, N. and Hoai, M.",
TITLE = "From Within to Between: Knowledge Distillation for Cross Modality
Retrieval",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "IV:605-622",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245585"}
@inproceedings{bb250751,
AUTHOR = "Fragomeni, A. and Wray, M. and Damen, D.",
TITLE = "Contra: (con)text (tra)nsformer for Cross-modal Video Retrieval",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "IV:451-468",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245586"}
@inproceedings{bb250752,
AUTHOR = "Zheng, Y.C. and Zhang, X.W.",
TITLE = "Heterogeneous Interactive Learning Network for Unsupervised Cross-modal
Retrieval",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "IV:692-707",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245587"}
@inproceedings{bb250753,
AUTHOR = "Arnold, R. and Sauter, L. and Schuldt, H.",
TITLE = "Free-Form Multi-Modal Multimedia Retrieval (4MR)",
BOOKTITLE = MMMod23,
YEAR = "2023",
PAGES = "I: 678-683",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245588"}
@inproceedings{bb250754,
AUTHOR = "Xuan, H. and Chen, X.S.",
TITLE = "Dissecting Deep Metric Learning Losses for Image-Text Retrieval",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "2163-2172",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245589"}
@inproceedings{bb250755,
AUTHOR = "Ge, X. and Chen, F. and Xu, S. and Tao, F. and Jose, J.M.",
TITLE = "Cross-modal Semantic Enhanced Interaction for Image-Sentence
Retrieval",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "1022-1031",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245590"}
@inproceedings{bb250756,
AUTHOR = "Jawade, B. and Mohan, D.D. and Ali, N.M. and Setlur, S. and Govindaraju, V.",
TITLE = "NAPReg: Nouns As Proxies Regularization for Semantically Aware
Cross-Modal Embeddings",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "1135-1144",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245591"}
@inproceedings{bb250757,
AUTHOR = "Nakatsuka, T. and Hamasaki, M. and Goto, M.",
TITLE = "Content-Based Music-Image Retrieval Using Self- and Cross-Modal
Feature Embedding Memory",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "2173-2183",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245592"}
@inproceedings{bb250758,
AUTHOR = "Chen, Y.X. and Yuan, J.B. and Zhao, L. and Chen, T.L. and Luo, R. and Davis, L. and Metaxas, D.N.",
TITLE = "More Than Just Attention: Improving Cross-Modal Attentions with
Contrastive Constraints for Image-Text Matching",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "4421-4429",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245593"}
@inproceedings{bb250759,
AUTHOR = "Agarwal, A. and Karanam, S. and Srinivasan, B.V. and Banerjee, B.",
TITLE = "Contrastive Learning of Semantic Concepts for Open-set Cross-domain
Retrieval",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "4104-4113",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245594"}
@inproceedings{bb250760,
AUTHOR = "Yang, Y. and Shen, H. and Yang, M.",
TITLE = "Relation-Guided Network for Image-Text Retrieval",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "1856-1860",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245595"}
@inproceedings{bb250761,
AUTHOR = "Sumbul, G. and Muller, M. and Demir, B.",
TITLE = "A Novel Self-Supervised Cross-Modal Image Retrieval Method in Remote
Sensing",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "2426-2430",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245596"}
@inproceedings{bb250762,
AUTHOR = "Wang, H. and Zhang, J.P. and Chen, Y.H. and Ma, C.B. and Avery, J. and Hull, L. and Carneiro, G.",
TITLE = "Uncertainty-Aware Multi-modal Learning via Cross-Modal Random Network
Prediction",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVII:200-217",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245597"}
@inproceedings{bb250763,
AUTHOR = "de Almeida, L.B. and Valem, L.P. and Pedronette, D.C.G.",
TITLE = "Graph Convolutional Networks and Manifold Ranking for Multimodal
Video Retrieval",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "2811-2815",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245598"}
@inproceedings{bb250764,
AUTHOR = "Liang, T. and Lin, G.S. and Wan, M.Y. and Li, T.R. and Ma, G.J. and Lv, F.M.",
TITLE = "Expanding Large Pre-trained Unimodal Models with Multimodal
Information Injection for Image-Text Multimodal Classification",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15471-15480",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245599"}
@inproceedings{bb250765,
AUTHOR = "Yang, J.H. and Chen, X.Y. and Jiang, M. and Chen, S. and Wang, L. and Zhao, Q.",
TITLE = "VisualHow: Multimodal Problem Solving",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15606-15616",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245600"}
@inproceedings{bb250766,
AUTHOR = "Girdhar, R. and Singh, M. and Ravi, N. and van der Maaten, L. and Joulin, A. and Misra, I.",
TITLE = "Omnivore: A Single Model for Many Visual Modalities",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16081-16091",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245601"}
@inproceedings{bb250767,
AUTHOR = "Ma, M.M. and Ren, J. and Zhao, L. and Testuggine, D. and Peng, X.",
TITLE = "Are Multimodal Transformers Robust to Missing Modality?",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "18156-18165",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245602"}
@inproceedings{bb250768,
AUTHOR = "Han, Z.B. and Yang, F. and Huang, J.Z. and Zhang, C.Q. and Yao, J.H.",
TITLE = "Multimodal Dynamics: Dynamical Fusion for Trustworthy Multimodal
Classification",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "20675-20685",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245603"}
@inproceedings{bb250769,
AUTHOR = "Gupta, V. and Mittal, T. and Mathur, P. and Mishra, V. and Maheshwari, M. and Bera, A. and Mukherjee, D. and Manocha, D.",
TITLE = "3MASSIV: Multilingual, Multimodal and Multi-Aspect dataset of Social
Media Short Videos",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "21032-21043",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245604"}
@inproceedings{bb250770,
AUTHOR = "Bogolin, S.V. and Croitoru, I. and Jin, H.L. and Liu, Y. and Albanie, S.",
TITLE = "Cross Modal Retrieval with Querybank Normalisation",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5184-5195",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245605"}
@inproceedings{bb250771,
AUTHOR = "Yang, E. and Yao, D.R. and Liu, T.L. and Deng, C.",
TITLE = "Mutual Quantization for Cross-Modal Search with Noisy Labels",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "7541-7550",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245606"}
@inproceedings{bb250772,
AUTHOR = "Neculai, A. and Chen, Y.B. and Akata, Z.",
TITLE = "Probabilistic Compositional Embeddings for Multimodal Image Retrieval",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4546-4556",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245607"}
@inproceedings{bb250773,
AUTHOR = "Couairon, G. and Douze, M. and Cord, M. and Schwenk, H.",
TITLE = "Embedding Arithmetic of Multimodal Queries for Image Retrieval",
BOOKTITLE = ODRUM22,
YEAR = "2022",
PAGES = "4946-4954",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245608"}
@inproceedings{bb250774,
AUTHOR = "Li, Y.H. and Yu, J. and Cai, Z.P. and Pan, Y.",
TITLE = "Cross-modal Target Retrieval for Tracking by Natural Language",
BOOKTITLE = ODRUM22,
YEAR = "2022",
PAGES = "4927-4936",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245609"}
@inproceedings{bb250775,
AUTHOR = "Thomas, C. and Kovashka, A.",
TITLE = "Emphasizing Complementary Samples for Non-literal Cross-modal
Retrieval",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4631-4640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245610"}
@inproceedings{bb250776,
AUTHOR = "Xu, B. and Xiong, Y.H. and Zhang, R. and Feng, Y. and Wu, H.F.",
TITLE = "Natural Language-Based Vehicle Retrieval with Explicit Cross-Modal
Representation Learning",
BOOKTITLE = AICity22,
YEAR = "2022",
PAGES = "3141-3148",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245611"}
@inproceedings{bb250777,
AUTHOR = "Shvetsova, N. and Chen, B. and Rouditchenko, A. and Thomas, S. and Kingsbury, B. and Feris, R.S. and Harwath, D. and Glass, J. and Kuehne, H.",
TITLE = "Everything at Once - Multi-modal Fusion Transformer for Video
Retrieval",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19988-19997",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245612"}
@inproceedings{bb250778,
AUTHOR = "Andonian, A. and Chen, S.X. and Hamid, R.",
TITLE = "Robust Cross-Modal Representation Learning with Progressive
Self-Distillation",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16409-16420",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245613"}
@inproceedings{bb250779,
AUTHOR = "Lu, H.Y. and Fei, N. and Huo, Y.Q. and Gao, Y.Z. and Lu, Z.W. and Wen, J.R.",
TITLE = "COTS: Collaborative Two-Stream Vision-Language Pre-Training Model for
Cross-Modal Retrieval",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15671-15680",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245614"}
@inproceedings{bb250780,
AUTHOR = "Abdelnabi, S. and Hasan, R. and Fritz, M.",
TITLE = "Open-Domain, Content-based, Multi-modal Fact-checking of
Out-of-Context Images via Online Resources",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "14920-14929",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245615"}
@inproceedings{bb250781,
AUTHOR = "Wang, Y. and Zhang, T. and Zhang, X. and Cui, Z. and Huang, Y. and Shen, P.C. and Li, S.X. and Yang, J.",
TITLE = "Wasserstein Coupled Graph Learning for Cross-Modal Retrieval",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1793-1802",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245616"}
@inproceedings{bb250782,
AUTHOR = "Cai, G.Y. and Zhang, J. and Jiang, X.Y. and Gong, Y.F. and He, L.H. and Yu, F. and Peng, P. and Guo, X.W. and Huang, F.Y. and Sun, X.",
TITLE = "Ask amp;Confirm: Active Detail Enriching for Cross-Modal Retrieval
with Partial Query",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1815-1824",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245617"}
@inproceedings{bb250783,
AUTHOR = "Wen, K.Y. and Xia, J. and Huang, Y.Y. and Li, L.Y. and Xu, J.Y. and Shao, J.",
TITLE = "COOKIE: Contrastive Cross-Modal Knowledge Sharing Pre-training for
Vision-Language Representation",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2188-2197",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245618"}
@inproceedings{bb250784,
AUTHOR = "Patrick, M. and Huang, P.Y. and Misra, I. and Metze, F. and Vedaldi, A. and Asano, Y.M. and Henriques, J.",
TITLE = "Space-Time Crop & Attend:
Improving Cross-modal Video Representation Learning",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "10540-10552",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245619"}
@inproceedings{bb250785,
AUTHOR = "Lin, M.X. and Yang, J. and Wang, H. and Lai, Y.K. and Jia, R.F. and Zhao, B.Q. and Gao, L.",
TITLE = "Single Image 3D Shape Retrieval via Cross-Modal Instance and Category
Contrastive Learning",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "11385-11395",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245620"}
@inproceedings{bb250786,
AUTHOR = "Changpinyo, S. and Pont Tuset, J. and Ferrari, V. and Soricut, R.",
TITLE = "Telling the What while Pointing to the Where:
Multimodal Queries for Image Retrieval",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "12116-12126",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245621"}
@inproceedings{bb250787,
AUTHOR = "Gabeur, V. and Nagrani, A. and Sun, C. and Alahari, K. and Schmid, C.",
TITLE = "Masking Modalities for Cross-modal Video Retrieval",
BOOKTITLE = WACV22,
YEAR = "2022",
PAGES = "2111-2120",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245622"}
@inproceedings{bb250788,
AUTHOR = "Galanopoulos, D. and Mezaris, V.",
TITLE = "Hard-Negatives or Non-Negatives? A Hard-Negative Selection Strategy
for Cross-Modal Retrieval Using the Improved Marginal Ranking Loss",
BOOKTITLE = ViRaL21,
YEAR = "2021",
PAGES = "2312-2316",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245623"}
@inproceedings{bb250789,
AUTHOR = "Jing, L.L. and Vahdani, E. and Tan, J.X. and Tian, Y.L.",
TITLE = "Cross-Modal Center Loss for 3D Cross-Modal Retrieval",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "3141-3150",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245624"}
@inproceedings{bb250790,
AUTHOR = "Almazan, J. and Ko, B. and Gu, G. and Larlus, D. and Kalantidis, Y.",
TITLE = "Granularity-Aware Adaptation for Image Retrieval Over Multiple Tasks",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XIV:389-406",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245625"}
@inproceedings{bb250791,
AUTHOR = "Chun, S. and Oh, S.J. and Sampaio de Rezende, R. and Kalantidis, Y. and Larlus, D.",
TITLE = "Probabilistic Embeddings for Cross-Modal Retrieval",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "8411-8420",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245626"}
@inproceedings{bb250792,
AUTHOR = "Liu, Y. and Chen, Q.C. and Albanie, S.",
TITLE = "Adaptive Cross-Modal Prototypes for Cross-Domain Visual-Language
Retrieval",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "14949-14959",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245627"}
@inproceedings{bb250793,
AUTHOR = "Salvador, A. and Gundogdu, E. and Bazzani, L. and Donoser, M.",
TITLE = "Revamping Cross-Modal Recipe Retrieval with Hierarchical Transformers
and Self-supervised Learning",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "15470-15479",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245628"}
@inproceedings{bb250794,
AUTHOR = "Dzabraev, M. and Kalashnikov, M. and Komkov, S. and Petiushko, A.",
TITLE = "MDMMT: Multidomain Multimodal Transformer for Video Retrieval",
BOOKTITLE = HVU21,
YEAR = "2021",
PAGES = "3349-3358",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245629"}
@inproceedings{bb250795,
AUTHOR = "Wang, K. and Herranz, L. and van de Weijer, J.",
TITLE = "Continual learning in cross-modal retrieval",
BOOKTITLE = OmniCV21,
YEAR = "2021",
PAGES = "3623-3633",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245630"}
@inproceedings{bb250796,
AUTHOR = "Mafla, A. and Rezende, R.S. and Gomez, L. and Larlus, D. and Karatzas, D.",
TITLE = "StacMR: Scene-Text Aware Cross-Modal Retrieval",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "2219-2229",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245631"}
@inproceedings{bb250797,
AUTHOR = "Feng, C.T. and Li, D.G. and Zheng, J.W.",
TITLE = "Improving Supervised Cross-modal Retrieval with Semantic Graph
Embedding",
BOOKTITLE = MMMod21,
YEAR = "2021",
PAGES = "I:187-199",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245632"}
@inproceedings{bb250798,
AUTHOR = "Wen, Z.Y. and Feng, A.",
TITLE = "Deep Centralized Cross-modal Retrieval",
BOOKTITLE = MMMod21,
YEAR = "2021",
PAGES = "I:443-455",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245633"}
@inproceedings{bb250799,
AUTHOR = "Jin, C. and Zhang, T. and Liu, S.X. and Tie, Y. and Lv, X. and Li, J.G. and Yan, W.C. and Yan, M. and Xu, Q. and Guan, Y.C. and Yang, Z.G.",
TITLE = "Cross-modal Deep Learning Applications: Audio-visual Retrieval",
BOOKTITLE = MMDLCA20,
YEAR = "2020",
PAGES = "301-313",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT245634"}
Last update:Mar 28, 2026 at 17:09:41