@inproceedings{bb240200,
        AUTHOR = "Cocchi, F. and Moratelli, N. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Augmenting Multimodal LLMs with Self-Reflective Tokens for
Knowledge-based Visual Question Answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9199-9209",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235117"}

@inproceedings{bb240201,
        AUTHOR = "Yang, Z. and Tao, Z. and Chen, Q. and Li, L. and Qi, Y.K. and van den Hengel, A.J. and Huang, Q.M.",
        TITLE = "Separation of powers: On segregating knowledge from observation in
LLM-enabled knowledge-based visual question answering",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24753-24762",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235118"}

@inproceedings{bb240202,
        AUTHOR = "Cai, M. and Huang, Z.Y. and Li, Y.H. and Ojha, U. and Wang, H.H. and Lee, Y.J.",
        TITLE = "An Investigation on LLMs' Visual Understanding Ability Using SVG for
Image-Text Bridging",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5377-5386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235119"}

@inproceedings{bb240203,
        AUTHOR = "Amoroso, R. and Zhang, G. and Koner, R. and Baraldi, L. and Cucchiara, R. and Tresp, V.",
        TITLE = "Perceive. Query & Reason: Enhancing Video QA with Question-Guided
Temporal Queries",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "8853-8862",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235120"}

@inproceedings{bb240204,
        AUTHOR = "Weng, W.X. and Zhang, R. and Meng, X.J. and Zhu, J. and Liu, Q. and Yuan, C.",
        TITLE = "Unsupervised Domain Adaptive Visual Question Answering in the Era of
Multi-Modal Large Language Models",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "6248-6258",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235121"}

@inproceedings{bb240205,
        AUTHOR = "Sun, G.H. and Qin, C. and Wang, J.M. and Chen, Z.Y. and Xu, R. and Tao, Z.Q.",
        TITLE = "SQ-LLAVA: Self-questioning for Large Vision-language Assistant",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "IX: 156-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235122"}

@inproceedings{bb240206,
        AUTHOR = "Ye, Q. and Yu, Z.T. and Shao, R. and Xie, X.Y. and Torr, P.H.S. and Cao, X.C.",
        TITLE = "CAT: Enhancing Multimodal Large Language Model to Answer Questions in
Dynamic Audio-visual Scenarios",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "X: 146-164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235123"}

@inproceedings{bb240207,
        AUTHOR = "Li, Z. and Jasani, B. and Tang, P. and Ghadar, S.",
        TITLE = "Synthesize Step-by-Step: Tools, Templates and LLMs as Data Generators
for Reasoning-Based Chart VQA",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13613-13623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235124"}

@inproceedings{bb240208,
        AUTHOR = "Ozdemir, O. and Akagunduz, E.",
        TITLE = "Enhancing Visual Question Answering through Question-Driven Image
Captions as Prompts",
        BOOKTITLE = Prompting24,
        YEAR = "2024",
        PAGES = "1562-1571",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235125"}

@inproceedings{bb240209,
        AUTHOR = "Ranasinghe, K. and Shukla, S.N. and Poursaeed, O. and Ryoo, M.S. and Lin, T.Y.",
        TITLE = "Learning to Localize Objects Improves Spatial Reasoning in
Visual-LLMs",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "12977-12987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235126"}

@inproceedings{bb240210,
        AUTHOR = "Blau, T. and Fogel, S. and Ronen, R. and Golts, A. and Tsiper, S. and Avraham, E.B. and Aberdam, A. and Ganz, R. and Litman, R.",
        TITLE = "GRAM: Global Reasoning for Multi-Page VQA",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "15598-15607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235127"}

@inproceedings{bb240211,
        AUTHOR = "Li, L. and Peng, J.W. and Chen, H. and Gao, C.Y. and Yang, X.",
        TITLE = "How to Configure Good In-Context Sequence for Visual Question
Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26700-26710",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235128"}

@inproceedings{bb240212,
        AUTHOR = "Agrawal, A. and Lezcano, C.M.S. and Heredia Marin, I.B. and Sethi, P.S.",
        TITLE = "Listen Then See: Video Alignment with Speaker Attention",
        BOOKTITLE = MULA24,
        YEAR = "2024",
        PAGES = "2018-2027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235129"}

@inproceedings{bb240213,
        AUTHOR = "Tan, R. and Sun, X. and Hu, P. and Wang, J.H. and Deilamsalehy, H. and Plummer, B.A. and Russell, B. and Saenko, K.",
        TITLE = "Koala: Key Frame-Conditioned Long Video-LLM",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13581-13591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235130"}

@inproceedings{bb240214,
        AUTHOR = "Ganz, R. and Kittenplon, Y. and Aberdam, A. and Avraham, E.B. and Nuriel, O. and Mazor, S. and Litman, R.",
        TITLE = "Question Aware Vision Transformer for Multimodal Reasoning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13861-13871",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235131"}

@inproceedings{bb240215,
        AUTHOR = "Bansal, H. and Bitton, Y. and Szpektor, I. and Chang, K.W. and Grover, A.",
        TITLE = "VideoCon: Robust Video-Language Alignment via Contrast Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13927-13937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235132"}

@inproceedings{bb240216,
        AUTHOR = "Wang, S.W. and Zhang, L.L. and Zhu, L.J. and Qin, T. and Yap, K.H. and Zhang, X.Y. and Liu, J.",
        TITLE = "CoG-DQA: Chain-of-Guiding Learning with Large Language Models for
Diagram Question Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13969-13979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235133"}

@inproceedings{bb240217,
        AUTHOR = "Khan, Z. and BG, V.K. and Schulter, S. and Fu, Y. and Chandraker, M.",
        TITLE = "Self-Training Large Language Models for Improved Visual Program
Synthesis With Visual Reinforcement",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14344-14353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235134"}

@inproceedings{bb240218,
        AUTHOR = "Liao, Z. and Li, J.T. and Niu, L. and Zhang, L.Q.",
        TITLE = "Align and Aggregate: Compositional Reasoning with Video Alignment and
Answer Aggregation for Video Question-Answering",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13395-13404",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235135"}

@inproceedings{bb240219,
        AUTHOR = "Pan, J.T. and Lin, Z. and Ge, Y.Y. and Zhu, X.T. and Zhang, R.R. and Wang, Y. and Qiao, Y. and Li, H.S.",
        TITLE = "Retrieving-to-Answer: Zero-Shot Video Question Answering with Frozen
Large Language Models",
        BOOKTITLE = MMFM23,
        YEAR = "2023",
        PAGES = "272-283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235136"}

@inproceedings{bb240220,
        AUTHOR = "Guo, J.X. and Li, J. and Li, D.X. and Tiong, A.M.H. and Li, B.Y. and Tao, D.C. and Hoi, S.",
        TITLE = "From Images to Textual Prompts: Zero-shot Visual Question Answering
with Frozen Large Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10867-10877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqallm5.html#TT235137"}

@article{bb240221,
        AUTHOR = "Li, H.D. and Zhang, X.F. and Qu, H.",
        TITLE = "DDFAV: Remote Sensing Large Vision Language Models Dataset and
Evaluation Benchmark",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "4",
        PAGES = "719",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235138"}

@article{bb240222,
        AUTHOR = "Xu, P. and Shao, W.Q. and Zhang, K.P. and Gao, P. and Liu, S. and Lei, M. and Meng, F.Q. and Huang, S.Y. and Qiao, Y. and Luo, P.",
        TITLE = "LVLM-EHub: A Comprehensive Evaluation Benchmark for Large
Vision-Language Models",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1877-1893",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235139"}

@inproceedings{bb240223,
        AUTHOR = "Wang, J. and Lv, C.H. and Li, X. and Dong, S.C. and Li, H.D. and Yao, K. and Li, C. and Shao, W.Q. and Luo, P.",
        TITLE = "Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite
for Large Vision Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4233-4245",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235140"}

@article{bb240224,
        AUTHOR = "Qin, Z. and Chen, D.Y. and Zhang, W.H. and Yao, L. and Huang, Y.L. and Ding, B.L. and Li, Y.L. and Deng, S.G.",
        TITLE = "The Synergy Between Data and Multi-Modal Large Language Models:
A Survey From Co-Development Perspective",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "8415-8434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235141"}

@inproceedings{bb240225,
        AUTHOR = "Zhang, Y.H. and Su, Y.C. and Liu, Y.M. and Wang, X.H. and Burgess, J. and Sui, E. and Wang, C.Y. and Aklilu, J. and Lozano, A. and Wei, A. and Schmidt, L. and Yeung Levy, S.",
        TITLE = "Automated Generation of Challenging Multiple-Choice Questions for
Vision Language Model Evaluation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29580-29590",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235142"}

@inproceedings{bb240226,
        AUTHOR = "Jiang, X. and Zheng, J.W. and Liu, R.P. and Li, J.H. and Zhang, J.M. and Matthiesen, S. and Stiefelhagen, R.",
        TITLE = "@BENCH: Benchmarking Vision-Language Models for Human-centered
Assistive Technology",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "3934-3943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235143"}

@inproceedings{bb240227,
        AUTHOR = "Xiong, T.Y. and Wang, X. and Guo, D. and Ye, Q.H. and Fan, H.Q. and Gu, Q.Q. and Huang, H. and Li, C.Y.",
        TITLE = "LLLaVA-Critic: Learning to Evaluate Multimodal Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13618-13628",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235144"}

@inproceedings{bb240228,
        AUTHOR = "Zhang, Q.H. and Ning, M. and Liu, Z. and Huang, Y. and Yang, S. and Wang, Y.B. and Ye, J.Y. and Chen, X. and Song, Y.B. and Yuan, L.",
        TITLE = "UPME: An Unsupervised Peer Review Framework for Multimodal Large
Language Model Evaluation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9165-9174",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235145"}

@inproceedings{bb240229,
        AUTHOR = "Fu, C. and Dai, Y.H. and Luo, Y.D. and Li, L. and Ren, S. and Zhang, R.R. and Wang, Z. and Zhou, C.Y. and Shen, Y.H. and Zhang, M.D. and Chen, P.X. and Li, Y.W. and Lin, S.H. and Zhao, S. and Li, K. and Xu, T. and Zheng, X. and Chen, E. and Shan, C.F. and He, R. and Sun, X.",
        TITLE = "Video-MME: The First-Ever Comprehensive Evaluation Benchmark of
Multi-modal LLMs in Video Analysis",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24108-24118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235146"}

@inproceedings{bb240230,
        AUTHOR = "Zhang, J.Y. and Yang, H. and Li, A. and Guo, X. and Wang, P. and Wang, H.M. and Chen, Y.R. and Li, H.",
        TITLE = "MLLM-LLaVA-FL: Multimodal Large Language Model Assisted Federated
Learning",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "4066-4076",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235147"}

@inproceedings{bb240231,
        AUTHOR = "Snæbjarnarson, V. and Du, K. and Stoehr, N. and Belongie, S. and Cotterell, R. and Lang, N. and Frank, S.",
        TITLE = "Taxonomy-Aware Evaluation of Vision-Language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9109-9120",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235148"}

@inproceedings{bb240232,
        AUTHOR = "Tu, H.Q. and Cui, C. and Wang, Z.J. and Zhou, Y.Y. and Zhao, B.C. and Han, J.L. and Zhou, W.C.S. and Yao, H.X. and Xie, C.",
        TITLE = "How Many Are in This Image A Safety Evaluation Benchmark for Vision
LLMs",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LI: 37-55",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235149"}

@inproceedings{bb240233,
        AUTHOR = "Liu, X. and Zhu, Y.C. and Gu, J.D. and Lan, Y. and Yang, C. and Qiao, Y.",
        TITLE = "MM-Safetybench: A Benchmark for Safety Evaluation of Multimodal Large
Language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LVI: 386-403",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235150"}

@inproceedings{bb240234,
        AUTHOR = "Roberts, J. and Luddecke, T. and Sheikh, R. and Han, K. and Albanie, S.",
        TITLE = "Charting New Territories: Exploring the Geographic and Geospatial
Capabilities of Multimodal LLMs",
        BOOKTITLE = EarthVision24,
        YEAR = "2024",
        PAGES = "554-563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235151"}

@inproceedings{bb240235,
        AUTHOR = "Verma, A.A. and Saeidi, A. and Hegde, S. and Therala, A. and Bardoliya, F.D. and Machavarapu, N. and Ravindhiran, S.A.K. and Malyala, S. and Chatterjee, A. and Yang, Y.Z. and Baral, C.",
        TITLE = "Evaluating Multimodal Large Language Models across Distribution
Shifts and Augmentations",
        BOOKTITLE = GenerativeFM24,
        YEAR = "2024",
        PAGES = "5314-5324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235152"}

@inproceedings{bb240236,
        AUTHOR = "Hu, Y.T. and Li, T. and Lu, Q. and Shao, W.Q. and He, J.J. and Qiao, Y. and Luo, P.",
        TITLE = "OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for
Medical LVLM",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "22170-22183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803revllm6.html#TT235153"}

@article{bb240237,
        AUTHOR = "Zhou, N. and Fan, J.P.",
        TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
        JOURNAL = PR,
        VOLUME = "48",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "205-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235154"}

@article{bb240238,
        AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.",
        TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text
Matching",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2008-2020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235155"}

@article{bb240239,
        AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.",
        TITLE = "Characterization and classification of semantic image-text relations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "March",
        PAGES = "31-45",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235156"}

@article{bb240240,
        AUTHOR = "Niu, K. and Huang, Y. and Wang, L.",
        TITLE = "Re-ranking image-text matching by adaptive metric fusion",
        JOURNAL = PR,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "107351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235157"}

@article{bb240241,
        AUTHOR = "Huang, Y. and Wang, Y.M. and Zeng, Y. and Huang, J.S. and Chai, Z.H. and Wang, L.",
        TITLE = "Unpaired Image-Text Matching via Multimodal Aligned Conceptual
Knowledge",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5160-5176",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235158"}

@article{bb240242,
        AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.",
        TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2866-2879",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235159"}

@article{bb240243,
        AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X.Y. and Liu, A.A.",
        TITLE = "Dual-Level Representation Enhancement on Characteristic and Context
for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "8037-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235160"}

@article{bb240244,
        AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.",
        TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive
Relational Network",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1840-1852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235161"}

@inproceedings{bb240245,
        AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.",
        TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1556-1564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235162"}

@article{bb240246,
        AUTHOR = "Lan, H. and Zhang, P.",
        TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text
Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "374-378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235163"}

@article{bb240247,
        AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.",
        TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "388-397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235164"}

@article{bb240248,
        AUTHOR = "Malali, N. and Keller, Y.",
        TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10252-10260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235165"}

@article{bb240249,
        AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "471-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235166"}

@article{bb240250,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic
Reasoning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "641-656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235167"}

@inproceedings{bb240251,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Visual Semantic Reasoning for Image-Text Matching",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4653-4661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235168"}

@article{bb240252,
        AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.",
        TITLE = "Plug-and-Play Regulators for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "2322-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235169"}

@article{bb240253,
        AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.",
        TITLE = "Bi-Attention enhanced representation learning for image-text matching",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235170"}

@article{bb240254,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.",
        TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for
Image-Text Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1320-1332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235171"}

@article{bb240255,
        AUTHOR = "Xiong, G.X. and Meng, M. and Zhang, T.Z. and Zhang, D.M. and Zhang, Y.D.",
        TITLE = "Reference-Aware Adaptive Network for Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9678-9691",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235172"}

@article{bb240256,
        AUTHOR = "Liu, Z.J. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.",
        TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2465-2476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235173"}

@article{bb240257,
        AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.",
        TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for
Image-Text Matching",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "38",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "41-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235174"}

@article{bb240258,
        AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H.S. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.",
        TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent
Multimodal Contrastive Training",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "3622-3633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235175"}

@article{bb240259,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.",
        TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3516-3528",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235176"}

@article{bb240260,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.",
        TITLE = "The Style Transformer With Common Knowledge Optimization for
Image-Text Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1197-1201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235177"}

@article{bb240261,
        AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.",
        TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6131-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235178"}

@article{bb240262,
        AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.",
        TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting
False Negatives",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "103-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235179"}

@article{bb240263,
        AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.",
        TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for
Image-Text Retrieval",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "595-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235180"}

@article{bb240264,
        AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.",
        TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised
Fine-Grained Alignment",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1361-1372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235181"}

@article{bb240265,
        AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
        TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained
Image-Text Matching",
        JOURNAL = Cyber,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "948-961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235182"}

@article{bb240266,
        AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X.Y. and Liu, A.A.",
        TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1867-1880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235183"}

@article{bb240267,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y.X. and Guo, L.",
        TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3827-3841",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235184"}

@article{bb240268,
        AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.",
        TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on
Domain-Selective Attention",
        JOURNAL = PR,
        VOLUME = "149",
        YEAR = "2024",
        PAGES = "110273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235185"}

@article{bb240269,
        AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.",
        TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic
Consistency of Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4912-4925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235186"}

@article{bb240270,
        AUTHOR = "Zhang, K. and Hu, B. and Zhang, H. and Li, Z. and Mao, Z.D.",
        TITLE = "Enhanced Semantic Similarity Learning Framework for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2973-2988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235187"}

@inproceedings{bb240271,
        AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
        TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15159-15168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235188"}

@article{bb240272,
        AUTHOR = "Diao, H. and Zhang, Y. and Gao, S. and Ruan, X. and Lu, H.C.",
        TITLE = "Deep Boosting Learning:
A Brand-New Cooperative Approach for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "3341-3352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235189"}

@inproceedings{bb240273,
        AUTHOR = "Zhang, Y. and Lu, H.C.",
        TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 707-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235190"}

@article{bb240274,
        AUTHOR = "Cao, M. and Bai, Y. and Cao, Z.Q. and Nie, L.Q. and Zhang, M.",
        TITLE = "Efficient Image-Text Retrieval via Keyword-Guided Pre-Screening",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5132-5145",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235191"}

@article{bb240275,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Improving Image-Text Matching With Bidirectional Consistency of
Cross-Modal Alignment",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6590-6607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235192"}

@article{bb240276,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Fast, Accurate, and Lightweight Memory-Enhanced Embedding Learning
Framework for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6542-6558",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235193"}

@article{bb240277,
        AUTHOR = "Cui, Z. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
        TITLE = "Context-aware relation enhancement and similarity reasoning for
image-text retrieval",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "5",
        PAGES = "652-665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235194"}

@article{bb240278,
        AUTHOR = "Pan, Z.X. and Mao, Y.C. and Xiong, L. and Pang, T.F. and Ping, P.",
        TITLE = "MFAE: Multimodal Fusion and Alignment for Entity-level Disinformation
Detection",
        JOURNAL = PRL,
        VOLUME = "184",
        YEAR = "2024",
        PAGES = "59-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235195"}

@article{bb240279,
        AUTHOR = "Pu, X. and Wang, Z.W. and Yuan, L. and Wu, Y. and Jing, L.P. and Gao, X.B.",
        TITLE = "GADNet: Improving image-text matching via graph-based aggregation and
disentanglement",
        JOURNAL = PR,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "110900",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235196"}

@article{bb240280,
        AUTHOR = "Pu, X. and Yang, P. and Yuan, L. and Gao, X.B.",
        TITLE = "Improving Image-Text Matching by Integrating Word Sense
Disambiguation",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2695-2699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235197"}

@article{bb240281,
        AUTHOR = "Li, W.R. and Xiong, R.Q. and Fan, X.P.",
        TITLE = "Multi-Layer Probabilistic Association Reasoning Network for
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9706-9717",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235198"}

@article{bb240282,
        AUTHOR = "Tian, M.X. and Yang, S. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Source-Free Image-Text Matching via Uncertainty-Aware Learning",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "3059-3063",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235199"}

@article{bb240283,
        AUTHOR = "Wang, D. and Tian, J. and Liang, X. and Tian, Y.M. and He, L.H.",
        TITLE = "Global-aware Fragment Representation Aggregation Network for
image-text retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235200"}

@article{bb240284,
        AUTHOR = "Ke, X. and Chen, B.T. and Yang, X. and Cai, Y.H. and Liu, H. and Guo, W.Z.",
        TITLE = "Cross-modal independent matching network for image-text retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235201"}

@article{bb240285,
        AUTHOR = "Li, Z. and Guo, C. and Wang, X. and Feng, Z. and Du, Z.T.",
        TITLE = "Selectively Hard Negative Mining for Alleviating Gradient Vanishing
in Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1921-1935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235202"}

@article{bb240286,
        AUTHOR = "Yang, X.Y. and Li, C. and Wang, Z.M. and Xie, H. and Mao, J. and Yin, G.Q.",
        TITLE = "Remote Sensing Cross-Modal Text-Image Retrieval Based on Attention
Correction and Filtering",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "3",
        PAGES = "503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235203"}

@article{bb240287,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Guo, L. and Liu, H.",
        TITLE = "Dual Stream Relation Learning Network for Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1551-1565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235204"}

@article{bb240288,
        AUTHOR = "Liu, Z. and Xu, J.H. and Gao, S.S. and Chen, Z.",
        TITLE = "CSA: Cross-scale alignment with adaptive semantic aggregation and
filter for image-text retrieval",
        JOURNAL = PR,
        VOLUME = "165",
        YEAR = "2025",
        PAGES = "111647",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235205"}

@article{bb240289,
        AUTHOR = "Zhang, Q. and Wang, D. and Yu, X.",
        TITLE = "RLita: A Region-Level Image-Text Alignment Method for Remote Sensing
Foundation Model",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "10",
        PAGES = "1661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235206"}

@article{bb240290,
        AUTHOR = "Wen, Z.T. and Liu, J.H. and Zhang, H. and Zuo, F.Y.",
        TITLE = "Exploring Fine-Grained Visual-Text Feature Alignment With Prompt
Tuning for Domain-Adaptive Object Detection",
        JOURNAL = Cyber,
        VOLUME = "55",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3220-3233",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235207"}

@article{bb240291,
        AUTHOR = "Guan, J.H. and Shu, Y. and Li, W. and Song, Z. and Zhang, Y.C.",
        TITLE = "PR-CLIP: Cross-Modal Positional Reconstruction for Remote Sensing
Image-Text Retrieval",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "13",
        PAGES = "2117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235208"}

@article{bb240292,
        AUTHOR = "Zhao, H. and Li, Z.Y. and Wang, S. and Zhang, Z.X. and Li, K.Q.",
        TITLE = "Robust Hashing With Bilinear Drift for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "7642-7654",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235209"}

@article{bb240293,
        AUTHOR = "Wang, P.Z. and Zhang, L. and Mao, Z.D. and Lyu, N. and Zhang, Y.D.",
        TITLE = "Matryoshka Learning With Metric Transfer for Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "9502-9516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235210"}

@article{bb240294,
        AUTHOR = "Koo, H. and Shin, J. and Kim, E.",
        TITLE = "Dual-branch scale disentanglement for text-video retrieval",
        JOURNAL = PRL,
        VOLUME = "196",
        YEAR = "2025",
        PAGES = "296-302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235211"}

@article{bb240295,
        AUTHOR = "Zhou, J. and Wang, M.",
        TITLE = "Unified learning for image-text alignment via multi-scale feature
fusion",
        JOURNAL = CVIU,
        VOLUME = "260",
        YEAR = "2025",
        PAGES = "104468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235212"}

@article{bb240296,
        AUTHOR = "Wen, J. and Chen, Y.F. and Shi, R.Q. and Ji, W. and Yang, M.L. and Gao, D.F. and Yuan, J.S. and Zimmermann, R.",
        TITLE = "HOVER: Hyperbolic Video-Text Retrieval",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "6192-6203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235213"}

@article{bb240297,
        AUTHOR = "Fang, J.Y. and Zhu, B. and Yuan, J.L. and Chen, Y.Y. and Tang, M. and Wang, J.Q.",
        TITLE = "AMITA: Attribute-Guided Masked Image-Text Alignment for Multi-Label
Image Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11432-11447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235214"}

@article{bb240298,
        AUTHOR = "Ji, L.L. and Liu, L.",
        TITLE = "Multi-Scale Feature Fusion Based on Piecewise Polynomial Activation
Function for Image-Text Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11627-11640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235215"}

@article{bb240299,
        AUTHOR = "Chen, R. and Su, T. and Wang, H. and Ni, Z.K.",
        TITLE = "Similarity Shuffled Criss-Cross Transformer With Angle Loss for
Image-Text Matching",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "9723-9734",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT235216"}

Last update:Jan 23, 2026 at 20:54:10