@article{bb249800,
AUTHOR = "Parida, K.K. and Sharma, G.",
TITLE = "Discriminative semantic transitive consistency for cross-modal
learning",
JOURNAL = CVIU,
VOLUME = "219",
YEAR = "2022",
PAGES = "103404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244636"}
@article{bb249801,
AUTHOR = "Song, X. and Chen, J.J. and Wu, Z.X. and Jiang, Y.G.",
TITLE = "Spatial-Temporal Graphs for Cross-Modal Text2Video Retrieval",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "2914-2923",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244637"}
@article{bb249802,
AUTHOR = "Ma, X.H. and Yang, X.S. and Gao, J.Y. and Xu, C.S.",
TITLE = "The Model May Fit You: User-Generalized Cross-Modal Retrieval",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "2998-3012",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244638"}
@article{bb249803,
AUTHOR = "Shan, W. and Huang, D. and Wang, J.T. and Zou, F. and Li, S.",
TITLE = "Self-Attention based fine-grained cross-media hybrid network",
JOURNAL = PR,
VOLUME = "130",
YEAR = "2022",
PAGES = "108748",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244639"}
@article{bb249804,
AUTHOR = "Qian, S.S. and Xue, D.Z. and Fang, Q. and Xu, C.S.",
TITLE = "Adaptive Label-Aware Graph Convolutional Networks for Cross-Modal
Retrieval",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "3520-3532",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244640"}
@article{bb249805,
AUTHOR = "Wang, Y. and Peng, Y.X.",
TITLE = "MARS: Learning Modality-Agnostic Representation for Scalable
Cross-Media Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "7",
MONTH = "July",
PAGES = "4765-4777",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244641"}
@article{bb249806,
AUTHOR = "Liu, G.H. and Li, Z.Y. and Yang, J.Y. and Zhang, D.",
TITLE = "Exploiting sublimated deep features for image retrieval",
JOURNAL = PR,
VOLUME = "147",
YEAR = "2024",
PAGES = "110076",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244642"}
@article{bb249807,
AUTHOR = "Liu, G.H. and Li, Z.Y. and Zhang, D.",
TITLE = "Exploiting Hu invariant moments and deep features for image retrieval",
JOURNAL = PR,
VOLUME = "173",
YEAR = "2026",
PAGES = "112801",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244643"}
@article{bb249808,
AUTHOR = "Liu, Z. and Zhao, F.Y. and Zhang, M.M.",
TITLE = "An Efficient Multimodal Aggregation Network for Video-Text Retrieval",
JOURNAL = IEICE,
VOLUME = "E105-D",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "1825-1828",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244644"}
@article{bb249809,
AUTHOR = "Guo, D.J. and Su, X.M. and Lian, Y. and Liu, L.M. and Wang, H.B.",
TITLE = "Two-stage partial image-text clustering (TPIT-C)",
JOURNAL = IET-CV,
VOLUME = "16",
YEAR = "2022",
NUMBER = "8",
PAGES = "694-708",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244645"}
@article{bb249810,
AUTHOR = "Jin, M. and Zhang, H.X. and Zhu, L. and Sun, J.D. and Liu, L.",
TITLE = "Video Sampled Frame Category Aggregation and Consistent
Representation for Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "2",
MONTH = "February",
PAGES = "909-919",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244646"}
@article{bb249811,
AUTHOR = "Liao, L. and Yang, M. and Zhang, B.",
TITLE = "Deep Supervised Dual Cycle Adversarial Network for Cross-Modal
Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "2",
MONTH = "February",
PAGES = "920-934",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244647"}
@article{bb249812,
AUTHOR = "Gong, Y. and Cosma, G.",
TITLE = "Improving visual-semantic embeddings by learning
semantically-enhanced hard negatives for cross-modal information
retrieval",
JOURNAL = PR,
VOLUME = "137",
YEAR = "2023",
PAGES = "109272",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244648"}
@article{bb249813,
AUTHOR = "Li, W.H. and Wang, Y. and Su, Y.T. and Li, X.Y. and Liu, A.A. and Zhang, Y.D.",
TITLE = "Multi-Scale Fine-Grained Alignments for Image and Sentence Matching",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "543-556",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244649"}
@article{bb249814,
AUTHOR = "Hu, P. and Huang, Z.Y. and Peng, D.Z. and Wang, X. and Peng, X.",
TITLE = "Cross-Modal Retrieval With Partially Mismatched Pairs",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "8",
MONTH = "August",
PAGES = "9595-9610",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244650"}
@article{bb249815,
AUTHOR = "Liu, Y.X. and Wu, J.L. and Qu, L. and Gan, T. and Yin, J.H. and Nie, L.Q.",
TITLE = "Self-Supervised Correlation Learning for Cross-Modal Retrieval",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "2851-2863",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244651"}
@article{bb249816,
AUTHOR = "Sun, C. and Zhang, H.X. and Liu, L. and Liu, D.M. and Wang, L.",
TITLE = "Multi-Label Adversarial Fine-Grained Cross-Modal Retrieval",
JOURNAL = SP:IC,
VOLUME = "117",
YEAR = "2023",
PAGES = "117018",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244652"}
@article{bb249817,
AUTHOR = "Guo, S.T. and Zhang, H.X. and Liu, L. and Liu, D.M. and Lu, X. and Li, L.J.",
TITLE = "Hypergraph clustering based multi-label cross-modal retrieval",
JOURNAL = JVCIR,
VOLUME = "103",
YEAR = "2024",
PAGES = "104258",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244653"}
@article{bb249818,
AUTHOR = "Song, D. and Ling, Y.T. and Li, T. and Wang, T. and Li, X.Y.",
TITLE = "Hierarchical deep semantic alignment for cross-domain 3D model
retrieval",
JOURNAL = JVCIR,
VOLUME = "95",
YEAR = "2023",
PAGES = "103895",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244654"}
@article{bb249819,
AUTHOR = "Li, T.B. and Liu, A.A. and Song, D. and Li, W.H. and Li, X.Y. and Su, Y.T.",
TITLE = "Focus on Hard Samples: Hierarchical Unbiased Constraints for
Cross-Domain 3D Model Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "11",
MONTH = "November",
PAGES = "7036-7049",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244655"}
@article{bb249820,
AUTHOR = "Dong, X. and Zhan, X.L. and Wei, Y.C. and Wei, X.Y. and Wang, Y.W. and Lu, M.L. and Cao, X.C. and Liang, X.D.",
TITLE = "Entity-Graph Enhanced Cross-Modal Pretraining for Instance-Level
Product Retrieval",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "11",
MONTH = "November",
PAGES = "13117-13133",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244656"}
@inproceedings{bb249821,
AUTHOR = "Zhan, X.L. and Wu, Y.X. and Dong, X. and Wei, Y.C. and Lu, M.L. and Zhang, Y.C. and Xu, H. and Liang, X.D.",
TITLE = "Product1M: Towards Weakly Supervised Instance-Level Product Retrieval
via Cross-Modal Pretraining",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "11762-11771",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244657"}
@article{bb249822,
AUTHOR = "Zhang, X. and Li, W.P. and Wang, X. and Wang, L. and Zheng, F.Z. and Wang, L. and Zhang, H.",
TITLE = "A Fusion Encoder with Multi-Task Guidance for Cross-Modal Text-Image
Retrieval in Remote Sensing",
JOURNAL = RS,
VOLUME = "15",
YEAR = "2023",
NUMBER = "18",
PAGES = "4637",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244658"}
@article{bb249823,
AUTHOR = "Xu, L.X. and Wang, L. and Zhang, J.Z. and Ha, D. and Zhang, H.",
TITLE = "A Review of Cross-Modal Image-Text Retrieval in Remote Sensing",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "24",
PAGES = "3995",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244659"}
@article{bb249824,
AUTHOR = "Ye, Z.S. and Yao, L. and Zhang, Y. and Gustin, S.",
TITLE = "Self-supervised cross-modal visual retrieval from brain activities",
JOURNAL = PR,
VOLUME = "145",
YEAR = "2024",
PAGES = "109915",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244660"}
@article{bb249825,
AUTHOR = "Chen, Z.J. and Zhang, Y. and Mi, S.",
TITLE = "Assisting Multimodal Named Entity Recognition by cross-modal
auxiliary tasks",
JOURNAL = PRL,
VOLUME = "175",
YEAR = "2023",
PAGES = "52-58",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244661"}
@article{bb249826,
AUTHOR = "Li, Z.X. and Zhao, W.Z. and Du, X.Y. and Zhou, G.Y. and Zhang, S.L.",
TITLE = "Cross-Modal Retrieval and Semantic Refinement for Remote Sensing
Image Captioning",
JOURNAL = RS,
VOLUME = "16",
YEAR = "2024",
NUMBER = "1",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244662"}
@article{bb249827,
AUTHOR = "Xu, R.Q. and Mayer, W. and Chu, H.L. and Zhang, Y. and Zhang, H.Y. and Wang, Y.L. and Liu, Y. and Feng, Z.",
TITLE = "Automatic semantic modeling of structured data sources with
cross-modal retrieval",
JOURNAL = PRL,
VOLUME = "177",
YEAR = "2024",
PAGES = "7-14",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244663"}
@article{bb249828,
AUTHOR = "Okamura, D. and Harakawa, R. and Iwahashi, M.",
TITLE = "LCNME: Label Correction Using Network Prediction Based on
Memorization Effects for Cross-Modal Retrieval With Noisy Labels",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "1",
MONTH = "January",
PAGES = "590-602",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244664"}
@article{bb249829,
AUTHOR = "Zhang, L. and Chen, L. and Zhou, C. and Li, X. and Yang, F. and Yi, Z.",
TITLE = "Weighted Graph-Structured Semantics Constraint Network for
Cross-Modal Retrieval",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "1551-1564",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244665"}
@article{bb249830,
AUTHOR = "Wang, Y.B. and Wang, S.H. and Luo, H. and Dong, J.F. and Wang, F. and Han, M. and Wang, X. and Wang, M.",
TITLE = "Dual-View Curricular Optimal Transport for Cross-Lingual Cross-Modal
Retrieval",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "1522-1533",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244666"}
@article{bb249831,
AUTHOR = "Zhang, H. and Li, Y.D. and Li, X.L.",
TITLE = "Constrained Bipartite Graph Learning for Imbalanced Multi-Modal
Retrieval",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4502-4514",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244667"}
@article{bb249832,
AUTHOR = "Wang, Z. and Xu, X. and Wei, J. and Xie, N. and Yang, Y. and Shen, H.T.",
TITLE = "Semantics Disentangling for Cross-Modal Retrieval",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "2226-2237",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244668"}
@article{bb249833,
AUTHOR = "Ma, X.R. and Yang, M.X. and Li, Y.F. and Hu, P. and Lv, J.C. and Peng, X.",
TITLE = "Cross-Modal Retrieval With Noisy Correspondence via Consistency
Refining and Mining",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "2587-2598",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244669"}
@inproceedings{bb249834,
AUTHOR = "Feng, Y.L. and Zhu, H.Y. and Peng, D.Z. and Peng, X. and Hu, P.",
TITLE = "RONO: Robust Discriminative Learning with Noisy Labels for 2D-3D
Cross-Modal Retrieval",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11610-11619",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244670"}
@inproceedings{bb249835,
AUTHOR = "Hu, P. and Peng, X. and Zhu, H.Y. and Zhen, L.L. and Lin, J.",
TITLE = "Learning Cross-Modal Retrieval with Noisy Labels",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "5399-5409",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244671"}
@article{bb249836,
AUTHOR = "Ji, Z. and Lin, Z.G. and Wang, H.R. and Pang, Y.W. and Li, X.L.",
TITLE = "Multi-task hierarchical convolutional network for visual-semantic
cross-modal retrieval",
JOURNAL = PR,
VOLUME = "151",
YEAR = "2024",
PAGES = "110398",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244672"}
@article{bb249837,
AUTHOR = "Ji, Z. and Li, Z.H. and Zhang, Y. and Pang, Y.W. and Li, X.L.",
TITLE = "Visual Semantic Contextualization Network for Multi-Query Image
Retrieval",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "7067-7080",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244673"}
@article{bb249838,
AUTHOR = "Pang, S.M. and Zeng, Y.Y. and Zhao, J.W. and Xue, J.R.",
TITLE = "A Mutually Textual and Visual Refinement Network for Image-Text
Matching",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "7555-7566",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244674"}
@article{bb249839,
AUTHOR = "Yang, D.K. and Kuang, H.P. and Yang, K. and Li, M.C. and Zhang, L.H.",
TITLE = "Towards Asynchronous Multimodal Signal Interaction and Fusion via
Tailored Transformers",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1550-1554",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244675"}
@article{bb249840,
AUTHOR = "Hou, Y.L. and Zhong, X.J. and Cao, H. and Zhu, Z. and Zhou, Y.F. and Zhang, J.",
TITLE = "A shared-private sentiment analysis approach based on cross-modal
information interaction",
JOURNAL = PRL,
VOLUME = "183",
YEAR = "2024",
PAGES = "140-146",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244676"}
@article{bb249841,
AUTHOR = "Chen, S.W. and Liu, S. and Liu, J.",
TITLE = "Type-Specific Modality Alignment for Multi-Modal Information
Extraction",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "1525-1529",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244677"}
@article{bb249842,
AUTHOR = "Zheng, Z.Q. and Ren, H. and Wu, Y. and Zhang, W.C. and Lu, H. and Yang, Y. and Shen, H.T.",
TITLE = "Fully Unsupervised Domain-Agnostic Image Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "6",
MONTH = "June",
PAGES = "5077-5090",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244678"}
@article{bb249843,
AUTHOR = "Zhang, J.Z. and Wang, L. and Zheng, F.Z. and Wang, X. and Zhang, H.",
TITLE = "An Enhanced Feature Extraction Framework for Cross-Modal Image-Text
Retrieval",
JOURNAL = RS,
VOLUME = "16",
YEAR = "2024",
NUMBER = "12",
PAGES = "2201",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244679"}
@article{bb249844,
AUTHOR = "Cheng, Q.R. and Tan, Z.S. and Wen, K.Y. and Chen, C. and Gu, X.D.",
TITLE = "Semantic Pre-Alignment and Ranking Learning With Unified Framework
for Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "7",
MONTH = "July",
PAGES = "6503-6516",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244680"}
@article{bb249845,
AUTHOR = "Xue, P. and Niu, S.",
TITLE = "A novel active contour model based on features for image segmentation",
JOURNAL = PR,
VOLUME = "155",
YEAR = "2024",
PAGES = "110673",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244681"}
@article{bb249846,
AUTHOR = "Yan, J. and Deng, C. and Huang, H. and Liu, W.",
TITLE = "Causality-Invariant Interactive Mining for Cross-Modal Similarity
Learning",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "6216-6230",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244682"}
@article{bb249847,
AUTHOR = "Wu, W.J. and Zhao, Y.Z. and Li, Z. and Li, J.H. and Zhou, H. and Shou, M.Z. and Bai, X.",
TITLE = "A large cross-modal video retrieval dataset with reading
comprehension",
JOURNAL = PR,
VOLUME = "157",
YEAR = "2025",
PAGES = "110818",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244683"}
@article{bb249848,
AUTHOR = "Yuan, Z. and Wu, D. and Zhou, L.",
TITLE = "Achieving the Optimum Rate for Cross-Modal Source Coding",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "9722-9735",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244684"}
@article{bb249849,
AUTHOR = "Chen, R. and Tan, J.P. and Yang, Z.J. and Yang, X.J. and Dai, Q.Y. and Cheng, Y.Q. and Lin, L.",
TITLE = "DPHANet: Discriminative Parallel and Hierarchical Attention Network
for Natural Language Video Localization",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "9575-9590",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244685"}
@article{bb249850,
AUTHOR = "Zheng, A. and Yuan, F. and Zhang, H.C. and Wang, J.X. and Tang, C. and Li, C.L.",
TITLE = "Public-Private Attributes-Based Variational Adversarial Network for
Audio-Visual Cross-Modal Matching",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "8698-8709",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244686"}
@article{bb249851,
AUTHOR = "Li, D. and Du, S.L.",
TITLE = "ContextMatcher: Detector-Free Feature Matching With Cross-Modality
Context",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "9",
MONTH = "September",
PAGES = "7922-7934",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244687"}
@article{bb249852,
AUTHOR = "Zhang, F. and Zhou, H. and Hua, X.S. and Chen, C. and Luo, X.",
TITLE = "HOPE: A Hierarchical Perspective for Semi-Supervised 2D-3D
Cross-Modal Retrieval",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "8976-8993",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244688"}
@article{bb249853,
AUTHOR = "Zhu, Y. and Wu, Y. and Sebe, N. and Yan, Y.",
TITLE = "Vision + X: A Survey on Multimodal Learning in the Light of Data",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "9102-9122",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244689"}
@article{bb249854,
AUTHOR = "Li, Z. and Guo, C. and Wang, X. and Zhang, H. and Hu, L.",
TITLE = "Multi-View Visual Semantic Embedding for Cross-Modal Image-Text
Retrieval",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111088",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244690"}
@article{bb249855,
AUTHOR = "Jin, M. and Hu, W.B. and Zhu, L. and Wang, X. and Hong, R.C.",
TITLE = "Based on Spatial and Temporal Implicit Semantic Relational Inference
for Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "11",
MONTH = "November",
PAGES = "11286-11298",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244691"}
@article{bb249856,
AUTHOR = "Croitoru, I. and Bogolin, S.V. and Leordeanu, M. and Jin, H.L. and Zisserman, A. and Liu, Y. and Albanie, S.",
TITLE = "TeachText: CrossModal text-video retrieval through generalized
distillation",
JOURNAL = AI,
VOLUME = "338",
YEAR = "2025",
PAGES = "104235",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244692"}
@inproceedings{bb249857,
AUTHOR = "Croitoru, I. and Bogolin, S.V. and Leordeanu, M. and Jin, H.L. and Zisserman, A. and Albanie, S. and Liu, Y.",
TITLE = "TeachText:
CrossModal Generalized Distillation for Text-Video Retrieval",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "11563-11573",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244693"}
@article{bb249858,
AUTHOR = "Wang, T.S. and Li, F.L. and Zhu, L. and Li, J.J. and Zhang, Z. and Shen, H.T.",
TITLE = "Cross-Modal Retrieval: A Systematic Review of Methods and Future
Directions",
JOURNAL = PIEEE,
VOLUME = "112",
YEAR = "2024",
NUMBER = "11",
MONTH = "November",
PAGES = "1716-1754",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244694"}
@article{bb249859,
AUTHOR = "Luo, J.Y. and Zhao, Y.S. and Luo, X. and Xiao, Z.P. and Ju, W. and Shen, L. and Tao, D.C. and Zhang, M.",
TITLE = "Cross-Domain Diffusion With Progressive Alignment for Efficient
Adaptive Retrieval",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "1820-1834",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244695"}
@article{bb249860,
AUTHOR = "Zhang, H.W. and Yang, Y. and Qi, F. and Qian, S.S. and Xu, C.S.",
TITLE = "Active Supervised Cross-Modal Retrieval",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "5112-5126",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244696"}
@article{bb249861,
AUTHOR = "Dang, Z.H. and Luo, M. and Wang, J.H. and Jia, C.Y. and Han, H.C. and Wan, H. and Dai, G. and Chang, X.J. and Wang, J.D.",
TITLE = "Disentangled Noisy Correspondence Learning",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "2602-2615",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244697"}
@article{bb249862,
AUTHOR = "Si, L. and Guo, C. and Li, Z. and Yang, Y.",
TITLE = "A unified framework of data augmentation using large language models
for text-based cross-modal retrieval",
JOURNAL = PR,
VOLUME = "167",
YEAR = "2025",
PAGES = "111755",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244698"}
@article{bb249863,
AUTHOR = "Jin, M. and Hu, W.B. and Hong, R.C. and Zhu, L.",
TITLE = "Revealing Security Flaws in Cross-Modal Retrieval Models Through
Video Poisoning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "6",
MONTH = "June",
PAGES = "6184-6194",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244699"}
@article{bb249864,
AUTHOR = "Li, Y. and Deng, S. and Guan, C.M. and Gao, J.",
TITLE = "Complementary two-branch Transformer for multi-label image retrieval",
JOURNAL = PR,
VOLUME = "168",
YEAR = "2025",
PAGES = "111806",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244700"}
@article{bb249865,
AUTHOR = "Zheng, C.Y. and Li, X. and Liang, X.Y. and Huang, L. and Du, S. and Nie, J. and Dong, J.Y.",
TITLE = "Cross-Modal Progressive Perspective Matching Network for Remote
Sensing Image-Text Retrieval",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "3966-3978",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244701"}
@article{bb249866,
AUTHOR = "Pu, R. and Qin, Y. and Peng, D.Z. and Song, X.M. and Zheng, H.M.",
TITLE = "Deep Reversible Consistency Learning for Cross-Modal Retrieval",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "4095-4106",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244702"}
@article{bb249867,
AUTHOR = "Xu, Y. and Feng, Y.F. and Zhong, X. and Gao, Y. and Wu, Z.Z.",
TITLE = "Hypergraph-Based Remaining Prototype Alignment for Open-Set
Cross-Domain Image Retrieval",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "4627-4642",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244703"}
@article{bb249868,
AUTHOR = "Jiang, C. and Wang, Y.P. and Xiong, B.P.",
TITLE = "Dual similarity enhanced hybrid orthogonal fusion for multimodal
named entity recognition",
JOURNAL = PR,
VOLUME = "169",
YEAR = "2026",
PAGES = "111940",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244704"}
@article{bb249869,
AUTHOR = "Wang, Z. and Zhu, X.Z. and Yang, X. and Luo, G. and Li, H. and Tian, C.Y. and Dou, W.H. and Ge, J.Q. and Lu, L.W. and Qiao, Y. and Dai, J.F.",
TITLE = "Parameter-Inverted Image Pyramid Networks for Visual Perception and
Multimodal Understanding",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "10142-10159",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244705"}
@article{bb249870,
AUTHOR = "Jin, M. and Zhu, L. and Hong, R.C.",
TITLE = "BiSeR-LMA: A Bidirectional Semantic Reasoning and Large Model
Enhancement Approach for Text-Video Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11655-11666",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244706"}
@article{bb249871,
AUTHOR = "Jin, M. and Zhang, H.X. and Zhu, L. and Sun, J. and Liu, L.",
TITLE = "Video and text semantic center alignment for text-video cross-modal
retrieval",
JOURNAL = SP:IC,
VOLUME = "140",
YEAR = "2026",
PAGES = "117413",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244707"}
@article{bb249872,
AUTHOR = "Li, F.L. and Wang, Z.Q. and Wang, T.S. and Zhu, L. and Chang, X.J.",
TITLE = "Generative Augmentation Hashing for Few-Shot Cross-Modal Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12861-12873",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244708"}
@article{bb249873,
AUTHOR = "Gu, L.C. and Shen, X.J. and Sun, J. and Liu, Y. and Li, J. and Li, Z.H. and Cheung, S.C.S. and Wan, W.B.",
TITLE = "Dual Prototypes-Based Personalized Federated Adversarial Cross-Modal
Hashing",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12846-12860",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244709"}
@article{bb249874,
AUTHOR = "Ma, Q. and Jiang, Y. and Bai, C.",
TITLE = "Boundary mutual information hashing for cross-modal retrieval",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112658",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244710"}
@article{bb249875,
AUTHOR = "Wei, Y.H. and An, J.F.",
TITLE = "Flexible Dual Multi-Modal Hashing for Incomplete Multi-Modal Retrieval",
JOURNAL = IJIG,
VOLUME = "26",
YEAR = "2026",
NUMBER = "3",
MONTH = "May",
PAGES = "2650021",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244711"}
@article{bb249876,
AUTHOR = "Debnath, A. and Rao, K.S. and Das, P.P.",
TITLE = "Multi-Similarity Checking-Based Spoken Content Video Retrieval Using
Enhanced Mayfly Optimization-Based Weighted Feature Selection",
JOURNAL = IJIG,
VOLUME = "26",
YEAR = "2026",
NUMBER = "3",
MONTH = "May",
PAGES = "2650017",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244712"}
@article{bb249877,
AUTHOR = "Tang, L. and Wang, L. and Wang, G. and Sun, M.Y. and Yan, F.",
TITLE = "Feature Fusion Mamba Hashing via Decoupling for Cross-Modal Retrieval",
JOURNAL = SPLetters,
VOLUME = "33",
YEAR = "2026",
PAGES = "609-613",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244713"}
@article{bb249878,
AUTHOR = "Tan, W.T. and Li, F.L. and Zhu, L. and Guan, W. and Li, J.J. and Cheng, Z.Y. and Shen, H.T.",
TITLE = "Dynamic Bit-Wise Semantic Transformer Hashing for Multi-Modal
Retrieval",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "3",
MONTH = "March",
PAGES = "2954-2969",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244714"}
@article{bb249879,
AUTHOR = "Li, H.F. and Zhao, J. and Zhang, Y.F. and Wen, J.",
TITLE = "Bidirectional Cross-Modal Collaborative Alignment via Semantic-Guided
Visual Embeddings for Partially Relevant Video Retrieval",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "1423-1435",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244715"}
@article{bb249880,
AUTHOR = "Jin, M. and Hong, R.C.",
TITLE = "MDA-MAA: A Collaborative Augmentation Approach for Generalizing
Cross-Domain Retrieval",
JOURNAL = IP,
VOLUME = "35",
YEAR = "2026",
PAGES = "1595-1606",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244716"}
@inproceedings{bb249881,
AUTHOR = "Gizdov, A. and Ullman, S. and Harari, D.",
TITLE = "Seeing more with less: human-like representations in vision models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "4408-4417",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244717"}
@inproceedings{bb249882,
AUTHOR = "Liu, Y.K. and Zhang, Y.J. and Cai, J.Y. and Jiang, X.L. and Hu, Y. and Yao, J.C. and Wang, Y.F. and Xie, W.",
TITLE = "LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "4015-4025",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244718"}
@inproceedings{bb249883,
AUTHOR = "Chen, W. and Li, L. and Yang, Y.Q. and Wen, B. and Yang, F. and Gao, T.T. and Wu, Y. and Chen, L.",
TITLE = "CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal
Understanding and Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8073-8082",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244719"}
@inproceedings{bb249884,
AUTHOR = "Kim, S. and Zhu, X.L. and Lin, X.F. and Bastan, M. and Gray, D. and Kwak, S.",
TITLE = "GENIUS: A Generative Framework for Universal Multimodal Search",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19659-19669",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244720"}
@inproceedings{bb249885,
AUTHOR = "Duan, S.Y. and Sun, Y. and Peng, D.Z. and Liu, Z. and Song, X.M. and Hu, P.",
TITLE = "Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "20747-20756",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244721"}
@inproceedings{bb249886,
AUTHOR = "Hur, C. and Hong, J.H. and Lee, D.H. and Kang, D. and Myeong, S. and Park, S.H. and Park, H.",
TITLE = "Narrating the Video: Boosting Text-Video Retrieval via Comprehensive
Utilization of Frame-Level Captions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24077-24086",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244722"}
@inproceedings{bb249887,
AUTHOR = "Zha, Q.X. and Liu, X. and Peng, S.J. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
TITLE = "ReCon: Enhancing True Correspondence Discrimination through Relation
Consistency for Robust Noisy Correspondence Learning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29680-29689",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244723"}
@inproceedings{bb249888,
AUTHOR = "Cui, Y.H. and Zu, X.X. and Zhang, W.H. and Zhao, Z.Z. and Gao, J.Y.",
TITLE = "Incorporating Dense Knowledge Alignment into Unified Multimodal
Representation Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29733-29743",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244724"}
@inproceedings{bb249889,
AUTHOR = "Lin, Z.R. and Wang, Z. and Qian, T.W. and Mu, P. and Chan, S. and Bai, C.",
TITLE = "NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9263-9273",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244725"}
@inproceedings{bb249890,
AUTHOR = "Zhang, X. and Zhang, Y.Z. and Xie, W. and Li, M.X. and Dai, Z.Q. and Long, D.K. and Xie, P.J. and Zhang, M. and Li, W.J. and Zhang, M.",
TITLE = "Bridging Modalities: Improving Universal Multimodal Retrieval by
Multimodal Large Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9274-9285",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244726"}
@inproceedings{bb249891,
AUTHOR = "Zhao, S. and Xia, Q.M. and Guo, X. and Zou, P. and Zheng, M. and Wu, H. and Wen, C. and Wang, C.",
TITLE = "SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate
Cross-Modal Semantic Prompts",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29374-29384",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244727"}
@inproceedings{bb249892,
AUTHOR = "Wei, C. and Chen, Y. and Chen, H.N. and Hu, H.X. and Zhang, G. and Fu, J. and Ritter, A. and Chen, W.",
TITLE = "UNIIR: Training and Benchmarking Universal Multimodal Information
Retrievers",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXVII: 387-404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244728"}
@inproceedings{bb249893,
AUTHOR = "Chen, S.J. and Chen, X. and Zhang, C. and Li, M.S. and Yu, G. and Fei, H. and Zhu, H.Y. and Fan, J.Y. and Chen, T.",
TITLE = "LL3DA: Visual Interactive Instruction Tuning for Omni-3D
Understanding, Reasoning, and Planning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26418-26428",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244729"}
@inproceedings{bb249894,
AUTHOR = "Xu, H.R. and Peng, P.X. and Tan, G. and Li, Y. and Xu, X.H. and Tian, Y.H.",
TITLE = "DMR: Decomposed Multi-Modality Representations for Frames and Events
Fusion in Visual Reinforcement Learning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26498-26508",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244730"}
@inproceedings{bb249895,
AUTHOR = "You, C.Y. and Mint, Y.F. and Dai, W.C. and Sekhon, J.S. and Staib, L. and Duncan, J.S.",
TITLE = "Calibrating Multi-modal Representations:
A Pursuit of Group Robustness without Annotations",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26140-26150",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244731"}
@inproceedings{bb249896,
AUTHOR = "Zhang, Z.H. and Cao, S.C. and Wang, Y.X.",
TITLE = "TAMM: TriAdapter Multi-Modal Learning for 3D Shape Understanding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "21413-21423",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244732"}
@inproceedings{bb249897,
AUTHOR = "Zhao, Z. and Chen, M.X. and Dai, T.J. and Yao, J.C. and Han, B. and Zhang, Y. and Wang, Y.F.",
TITLE = "Mitigating Noisy Correspondence by Geometrical Structure Consistency
Learning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "27371-27380",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244733"}
@inproceedings{bb249898,
AUTHOR = "Tuzcuoglu, O. and Koksal, A. and Sofu, B. and Kalkan, S. and Alatan, A.A.",
TITLE = "XoFTR: Cross-modal Feature Matching Transformer",
BOOKTITLE = IMW24,
YEAR = "2024",
PAGES = "4275-4286",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244734"}
@inproceedings{bb249899,
AUTHOR = "Wu, J.L. and Hu, X. and Wang, Y.Q. and Pang, B. and Soricut, R.",
TITLE = "Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture
of Low-Rank Experts",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14205-14215",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat830cm1.html#TT244735"}
Last update:Feb 26, 2026 at 10:58:24