@inproceedings{bb212400,
        AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
        TITLE = "Stacked Attention Networks for Image Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "21-29",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207478"}

@inproceedings{bb212401,
        AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
        TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1456-1464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207479"}

@inproceedings{bb212402,
        AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
        TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
        BOOKTITLE = ICIP12,
        YEAR = "2012",
        PAGES = "1917-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207480"}

@inproceedings{bb212403,
        AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
        TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
        BOOKTITLE = "3DTV09",
        YEAR = "2009",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207481"}

@article{bb212404,
        AUTHOR = "Das, A. and Agrawal, H. and Zitnick, L. and Parikh, D. and Batra, D.",
        TITLE = "Human Attention in Visual Question Answering:
Do Humans and Deep Networks Look at the Same Regions?",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "90-100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207482"}

@article{bb212405,
        AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.",
        TITLE = "Ask Your Neurons: A Deep Learning Approach to Visual Question Answering",
        JOURNAL = IJCV,
        VOLUME = "125",
        YEAR = "2018",
        NUMBER = "1-3",
        MONTH = "December",
        PAGES = "110-135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207483"}

@inproceedings{bb212406,
        AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.",
        TITLE = "Ask Your Neurons:
A Neural-Based Approach to Answering Questions about Images",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207484"}

@inproceedings{bb212407,
        AUTHOR = "Dancette, C. and Whitehead, S. and Maheshwary, R. and Vedantam, R. and Scherer, S. and Chen, X.L. and Cord, M. and Rohrbach, M.",
        TITLE = "Improving Selective Visual Question Answering by Learning from Your
Peers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "24049-24059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207485"}

@article{bb212408,
        AUTHOR = "Huang, Y.Z. and Zhong, T.",
        TITLE = "Multitask learning for neural generative question answering",
        JOURNAL = RealTimeIP,
        VOLUME = "14",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1009-1017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207486"}

@article{bb212409,
        AUTHOR = "Ruwa, N. and Mao, Q. and Song, H.P. and Jia, H.J. and Dong, M.",
        TITLE = "Triple attention network for sentimental visual question answering",
        JOURNAL = CVIU,
        VOLUME = "189",
        YEAR = "2019",
        PAGES = "102829",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207487"}

@article{bb212410,
        AUTHOR = "Bai, Z.W. and Li, Y. and Wozniak, M. and Zhou, M.L. and Li, D.",
        TITLE = "DecomVQANet: Decomposing visual question answering deep network via
tensor decomposition and regression",
        JOURNAL = PR,
        VOLUME = "110",
        YEAR = "2021",
        PAGES = "107538",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207488"}

@article{bb212411,
        AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhang, H. and Zhu, S.C.",
        TITLE = "Mining deep And-Or object structures via cost-sensitive
question-answer-based active annotations",
        JOURNAL = CVIU,
        VOLUME = "176-177",
        YEAR = "2018",
        PAGES = "33-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207489"}

@article{bb212412,
        AUTHOR = "Zhang, Q.S. and Ren, J. and Huang, G. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining Interpretable AOG Representations From Convolutional Networks
via Active Question Answering",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3949-3963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207490"}

@inproceedings{bb212413,
        AUTHOR = "Zhang, Q.S. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining Object Parts from CNNs via Active Question-Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3890-3899",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207491"}

@inproceedings{bb212414,
        AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining And-Or Graphs for Graph Matching and Object Discovery",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "55-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207492"}

@article{bb212415,
        AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Lin, L.",
        TITLE = "Interpretable Visual Question Answering by Reasoning on Dependency
Trees",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "887-901",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207493"}

@inproceedings{bb212416,
        AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Li, G. and Lin, L.",
        TITLE = "Visual Question Reasoning on General Dependency Tree",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7249-7257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207494"}

@article{bb212417,
        AUTHOR = "Zhong, H.S. and Chen, J.Y. and Shen, C. and Zhang, H.W. and Huang, J.Q. and Hua, X.S.",
        TITLE = "Self-Adaptive Neural Module Transformer for Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "1264-1273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207495"}

@article{bb212418,
        AUTHOR = "Zheng, W.F. and Yin, L.R. and Chen, X.B. and Ma, Z. and Liu, S. and Yang, B.",
        TITLE = "Knowledge base graph embedding module design for Visual question
answering model",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207496"}

@article{bb212419,
        AUTHOR = "Sharma, H. and Jalal, A.S.",
        TITLE = "Visual question answering model based on graph neural network and
contextual attention",
        JOURNAL = IVC,
        VOLUME = "110",
        YEAR = "2021",
        PAGES = "104165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207497"}

@article{bb212420,
        AUTHOR = "Song, L.Y. and Li, J. and Liu, J. and Yang, Y. and Shang, X. and Sun, M.X.",
        TITLE = "Answering knowledge-based visual questions via the exploration of
Question Purpose",
        JOURNAL = PR,
        VOLUME = "133",
        YEAR = "2023",
        PAGES = "109015",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207498"}

@article{bb212421,
        AUTHOR = "MeshuWelde, T. and Liao, L.",
        TITLE = "Counting-based visual question answering with serial cascaded
attention deep learning",
        JOURNAL = PR,
        VOLUME = "144",
        YEAR = "2023",
        PAGES = "109850",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207499"}

@article{bb212422,
        AUTHOR = "Liu, Y. and Li, G.B. and Lin, L.",
        TITLE = "Cross-Modal Causal Relational Reasoning for Event-Level Visual
Question Answering",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "11624-11641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207500"}

@inproceedings{bb212423,
        AUTHOR = "Cao, Q.X. and Wan, W.T. and Wang, K. and Liang, X.D. and Lin, L.",
        TITLE = "Linguistically Routing Capsule Network for Out-of-distribution Visual
Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1594-1603",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207501"}

@article{bb212424,
        AUTHOR = "Yang, S.W. and Xiao, L. and Wu, X.J. and Xu, J.J. and Wang, L.L. and He, L.",
        TITLE = "Simple contrastive learning in a self-supervised manner for robust
visual question answering",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103976",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207502"}

@inproceedings{bb212425,
        AUTHOR = "Wang, Y. and Yasunaga, M. and Ren, H.Y. and Wada, S. and Leskovec, J.",
        TITLE = "VQA-GNN: Reasoning with Multimodal Knowledge via Graph Neural
Networks for Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21525-21535",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207503"}

@inproceedings{bb212426,
        AUTHOR = "Souza, B. and Aasan, M. and Pedrini, H. and Rivera, A.R.",
        TITLE = "SelfGraphVQA: A Self-Supervised Graph Neural Network for Scene-based
Question Answering",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4642-4647",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207504"}

@inproceedings{bb212427,
        AUTHOR = "Haisa, G. and Altenbek, G.",
        TITLE = "Question Classification Based on Weak Supervision and Interrogative
Pronouns Attention Mechanism",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "2273-2278",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207505"}

@inproceedings{bb212428,
        AUTHOR = "Nguyen, B.X. and Do, T. and Tran, H. and Tjiputra, E. and Tran, Q.D. and Nguyen, A.",
        TITLE = "Coarse-to-Fine Reasoning for Visual Question Answering",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4557-4565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207506"}

@inproceedings{bb212429,
        AUTHOR = "Liang, Y.Y. and Wang, X. and Duan, X.G. and Zhu, W.W.",
        TITLE = "Multi-modal Contextual Graph Neural Network for Text Visual Question
Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "3491-3498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207507"}

@inproceedings{bb212430,
        AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.",
        TITLE = "Deep Bayesian Network for Visual Question Generation",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1555-1565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207508"}

@inproceedings{bb212431,
        AUTHOR = "Singh, A.K. and Mishra, A. and Shekhar, S. and Chakraborty, A.",
        TITLE = "From Strings to Things: Knowledge-Enabled VQA Model That Can Read and
Reason",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4601-4611",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207509"}

@inproceedings{bb212432,
        AUTHOR = "Wilf, A. and Ma, M.Q. and Liang, P.P. and Zadeh, A. and Morency, L.P.",
        TITLE = "Face-to-Face Contrastive Learning for Social Intelligence
Question-Answering",
        BOOKTITLE = FG23,
        YEAR = "2023",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207510"}

@inproceedings{bb212433,
        AUTHOR = "Zadeh, A. and Chan, M. and Liang, P.P. and Tong, E. and Morency, L.P.",
        TITLE = "Social-IQ: A Question Answering Benchmark for Artificial Social
Intelligence",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8799-8809",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207511"}

@inproceedings{bb212434,
        AUTHOR = "Ma, C. and Shen, C. and Dick, A. and Wu, Q. and Wang, P. and van den Hengel, A.J. and Reid, I.D.",
        TITLE = "Visual Question Answering with Memory-Augmented Networks",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6975-6984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207512"}

@inproceedings{bb212435,
        AUTHOR = "Shin, A. and Ushiku, Y. and Harada, T.",
        TITLE = "Customized Image Narrative Generation via Interactive Visual Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "8925-8933",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207513"}

@inproceedings{bb212436,
        AUTHOR = "Teney, D. and Anderson, P. and He, X. and van den Hengel, A.J.",
        TITLE = "Tips and Tricks for Visual Question Answering:
Learnings from the 2017 Challenge",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4223-4232",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207514"}

@inproceedings{bb212437,
        AUTHOR = "Bai, Y.L. and Fu, J.L. and Zhao, T.J. and Mei, T.",
        TITLE = "Deep Attention Neural Tensor Network for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 21-37",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207515"}

@inproceedings{bb212438,
        AUTHOR = "Sinha, A. and Ayush, K.",
        TITLE = "Towards Mathematical Reasoning: A Multimodal Deep Learning Approach",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "4028-4032",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207516"}

@inproceedings{bb212439,
        AUTHOR = "Rosso Mateus, A. and Gonzalez, F.A. and Montes y Gomez, M.",
        TITLE = "A Two-Step Neural Network Approach to Passage Retrieval for Open Domain
Question Answering",
        BOOKTITLE = CIARP17,
        YEAR = "2017",
        PAGES = "566-574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207517"}

@inproceedings{bb212440,
        AUTHOR = "Zhu, C. and Zhao, Y. and Huang, S. and Tu, K. and Ma, Y.",
        TITLE = "Structured Attentions for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1300-1309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207518"}

@inproceedings{bb212441,
        AUTHOR = "Hu, R. and Andreas, J. and Rohrbach, M. and Darrell, T.J. and Saenko, K.",
        TITLE = "Learning to Reason:
End-to-End Module Networks for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "804-813",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207519"}

@inproceedings{bb212442,
        AUTHOR = "Peris, A. and Casacuberta, F.",
        TITLE = "Interactive-Predictive Neural Multimodal Systems",
        BOOKTITLE = IbPRIA19,
        YEAR = "2019",
        PAGES = "I:16-28",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207520"}

@inproceedings{bb212443,
        AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.",
        TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question
Answering",
        BOOKTITLE = IbPRIA17,
        YEAR = "2017",
        PAGES = "372-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207521"}

@inproceedings{bb212444,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "An Analysis of Visual Question Answering Algorithms",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1983-1991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207522"}

@inproceedings{bb212445,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "Answer-Type Prediction for Visual Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4976-4984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207523"}

@inproceedings{bb212446,
        AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.",
        TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to
Answer New Questions",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3909-3918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207524"}

@inproceedings{bb212447,
        AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.",
        TITLE = "Multi-level Attention Networks for Visual Question Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4187-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207525"}

@inproceedings{bb212448,
        AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.",
        TITLE = "An Empirical Evaluation of Visual Question Answering for Novel
Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7312-7321",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT207526"}

@article{bb212449,
        AUTHOR = "Tamaazousti, Y. and Le Borgne, H. and Popescu, A. and Gadeski, E. and Ginsca, A. and Hudelot, C.",
        TITLE = "Vision-language integration using constrained local semantic features",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "41-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207527"}

@article{bb212450,
        AUTHOR = "Gouthaman, K.V. and Nambiar, A. and Srinivas, K.S. and Mittal, A.",
        TITLE = "Linguistically-aware attention for reducing the semantic gap in
vision-language tasks",
        JOURNAL = PR,
        VOLUME = "112",
        YEAR = "2021",
        PAGES = "107812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207528"}

@article{bb212451,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Learning to Prompt for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2337-2348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207529"}

@inproceedings{bb212452,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Conditional Prompt Learning for Vision-Language Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16795-16804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207530"}

@article{bb212453,
        AUTHOR = "Ma, C.C. and Liu, Y. and Deng, J.K. and Xie, L.X. and Dong, W.M. and Xu, C.S.",
        TITLE = "Understanding and Mitigating Overfitting in Prompt Tuning for
Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4616-4629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207531"}

@article{bb212454,
        AUTHOR = "Zhu, Y.Q. and Li, X.Y. and Zheng, M. and Yang, J.H. and Wang, Z. and Guo, X.Q. and Chai, Z.F. and Yuan, Y.C. and Jiang, S.Q.",
        TITLE = "Focus and Align: Learning Tube Tokens for Video-Language Pre-Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8036-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207532"}

@article{bb212455,
        AUTHOR = "Chen, C.Q. and Han, D. and Chang, C.C.",
        TITLE = "MPCCT: Multimodal vision-language learning paradigm with
context-based compact Transformer",
        JOURNAL = PR,
        VOLUME = "147",
        YEAR = "2024",
        PAGES = "110084",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207533"}

@article{bb212456,
        AUTHOR = "Wu, W.H. and Sun, Z. and Song, Y.X. and Wang, J.D. and Ouyang, W.L.",
        TITLE = "Transferring Vision-Language Models for Visual Recognition:
A Classifier Perspective",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "392-409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207534"}

@article{bb212457,
        AUTHOR = "Ming, Y.F. and Li, Y.X.",
        TITLE = "How Does Fine-Tuning Impact Out-of-Distribution Detection for
Vision-Language Models?",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "596-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207535"}

@article{bb212458,
        AUTHOR = "Zhao, C.R. and Wang, Y. and Jiang, X.Y. and Shen, Y.F. and Song, K. and Li, D.S. and Miao, D.Q.",
        TITLE = "Learning Domain Invariant Prompt for Vision-Language Models",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1348-1360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207536"}

@article{bb212459,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Neural Logic Vision Language Explainer",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3331-3340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207537"}

@article{bb212460,
        AUTHOR = "Wang, Y.D. and Yu, Z.O. and Wang, J.D. and Heng, Q. and Chen, H. and Ye, W. and Xie, R. and Xie, X. and Zhang, S.K.",
        TITLE = "Exploring Vision-Language Models for Imbalanced Learning",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "224-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207538"}

@article{bb212461,
        AUTHOR = "Yu, Z.T. and Zhao, J. and Guo, C.L. and Yang, Y.",
        TITLE = "StableNet: Distinguishing the hard samples to overcome language
priors in visual question answering",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "2",
        PAGES = "315-327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207539"}

@inproceedings{bb212462,
        AUTHOR = "Ganz, R. and Nuriel, O. and Aberdam, A. and Kittenplon, Y. and Mazor, S. and Litman, R.",
        TITLE = "Towards Models that Can See and Read",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21661-21671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207540"}

@inproceedings{bb212463,
        AUTHOR = "Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C.",
        TITLE = "Exploring Temporal Concurrency for Video-Language Representation
Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15522-15532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207541"}

@inproceedings{bb212464,
        AUTHOR = "Shukor, M. and Dancette, C. and Cord, M.",
        TITLE = "eP-ALM: Efficient Perceptual Augmentation of Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21999-22012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207542"}

@inproceedings{bb212465,
        AUTHOR = "Schulter, S. and Kumar, B.G.V. and Suh, Y.M. and Dafnis, K.M. and Zhang, Z.X. and Zhao, S.Y. and Metaxas, D.N.",
        TITLE = "OmniLabel: A Challenging Benchmark for Language-Based Object
Detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11919-11928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207543"}

@inproceedings{bb212466,
        AUTHOR = "Chen, Z.L. and Huang, X. and Guan, Q.L. and Lin, L. and Luo, W.Q.",
        TITLE = "A Retrospect to Multi-prompt Learning across Vision and Language",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22133-22144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207544"}

@inproceedings{bb212467,
        AUTHOR = "Derakhshani, M.M. and Sanchez, E. and Bulat, A. and da Costa, V.G.T. and Snoek, C.G.M. and Tzimiropoulos, G. and Martinez, B.",
        TITLE = "Bayesian Prompt Learning for Image-Language Model Generalization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207545"}

@inproceedings{bb212468,
        AUTHOR = "Cascante Bonilla, P. and Shehada, K. and Smith, J.S. and Doveh, S. and Kim, D.H. and Panda, R. and Varol, G. and Oliva, A. and Ordonez, V. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Going Beyond Nouns With Vision & Language Models Using Synthetic
Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20098-20108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207546"}

@inproceedings{bb212469,
        AUTHOR = "Zara, G. and Conti, A. and Roy, S. and Lathuiliere, S. and Rota, P. and Ricci, E.",
        TITLE = "The Unreasonable Effectiveness of Large Language-Vision Models for
Source-free Video Domain Adaptation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10273-10283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207547"}

@inproceedings{bb212470,
        AUTHOR = "Upadhyay, U. and Karthik, S. and Mancini, M. and Akata, Z.",
        TITLE = "ProbVLM: Probabilistic Adapter for Frozen Vison-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1899-1910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207548"}

@inproceedings{bb212471,
        AUTHOR = "Chen, Z.H. and Diao, S.Z. and Wang, B. and Li, G.B. and Wan, X.",
        TITLE = "Towards Unifying Medical Vision-and-Language Pre-training via Soft
Prompts",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "23346-23356",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207549"}

@inproceedings{bb212472,
        AUTHOR = "Bitton Guetta, N. and Bitton, Y. and Hessel, J. and Schmidt, L. and Elovici, Y. and Stanovsky, G. and Schwartz, R.",
        TITLE = "Breaking Common Sense: WHOOPS! A Vision-and-Language Benchmark of
Synthetic and Compositional Images",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2616-2627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207550"}

@inproceedings{bb212473,
        AUTHOR = "Hu, Z.Y. and Li, Y. and Lyu, M.R. and Wang, L.W.",
        TITLE = "VL-PET: Vision-and-Language Parameter-Efficient Tuning via
Granularity Control",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2998-3008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207551"}

@inproceedings{bb212474,
        AUTHOR = "Slyman, E. and Kahng, M. and Lee, S.",
        TITLE = "VLSlice: Interactive Vision-and-Language Slice Discovery",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15245-15255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207552"}

@inproceedings{bb212475,
        AUTHOR = "Najibi, M. and Ji, J.W. and Zhou, Y. and Qi, C.R. and Yan, X.C. and Ettinger, S. and Anguelov, D.",
        TITLE = "Unsupervised 3D Perception with 2D Vision-Language Distillation for
Autonomous Driving",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "8568-8578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207553"}

@inproceedings{bb212476,
        AUTHOR = "Zheng, K. and Wu, W. and Feng, R. and Zhu, K. and Liu, J.W. and Zhao, D.L. and Zha, Z.J. and Chen, W. and Shen, Y.J.",
        TITLE = "Regularized Mask Tuning: Uncovering Hidden Knowledge in Pre-trained
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11629-11639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207554"}

@inproceedings{bb212477,
        AUTHOR = "Wang, T. and Lin, K. and Li, L.J. and Lin, C.C. and Yang, Z.Y. and Zhang, H.W. and Liu, Z.C. and Wang, L.J.",
        TITLE = "Equivariant Similarity for Vision-Language Foundation Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11964-11974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207555"}

@inproceedings{bb212478,
        AUTHOR = "Xu, H. and Xie, S. and Huang, P.Y. and Yu, L.C. and Howes, R. and Ghosh, G. and Zettlemoyer, L. and Feichtenhofer, C.",
        TITLE = "CiT: Curation in Training for Effective Vision-Language Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15134-15143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207556"}

@inproceedings{bb212479,
        AUTHOR = "Trager, M. and Perera, P. and Zancato, L. and Achille, A. and Bhatia, P. and Soatto, S.",
        TITLE = "Linear Spaces of Meanings: Compositional Structures in
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15349-15358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207557"}

@inproceedings{bb212480,
        AUTHOR = "Chen, Y.S. and Song, Y.Z. and Yeo, C.Y. and Liu, B. and Fu, J.L. and Shuai, H.H.",
        TITLE = "SINC: Self-Supervised In-Context Learning for Vision-Language Tasks",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15384-15396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207558"}

@inproceedings{bb212481,
        AUTHOR = "Wu, C.E. and Tian, Y. and Yu, H.C. and Wang, H. and Morgado, P. and Hu, Y.H. and Yang, L.J.",
        TITLE = "Why Is Prompt Tuning for Vision-Language Models Robust to Noisy
Labels?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15442-15451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207559"}

@inproceedings{bb212482,
        AUTHOR = "Ouali, Y. and Bulat, A. and Matinez, B. and Tzimiropoulos, G.",
        TITLE = "Black Box Few-Shot Adaptation for Vision-Language models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15488-15500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207560"}

@inproceedings{bb212483,
        AUTHOR = "Kan, B. and Wang, T. and Lu, W.P. and Zhen, X.T. and Guan, W. and Zheng, F.",
        TITLE = "Knowledge-Aware Prompt Tuning for Generalizable Vision-Language
Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15624-15634",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207561"}

@inproceedings{bb212484,
        AUTHOR = "Zhai, J.T. and Zhang, Q. and Wu, T. and Chen, X.Y. and Liu, J.J. and Cheng, M.M.",
        TITLE = "SLAN: Self-Locator Aided Network for Vision-Language Understanding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21892-21901",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207562"}

@inproceedings{bb212485,
        AUTHOR = "Long, S. and Zhao, Z. and Yuan, J. and Tan, Z.C. and Liu, J.J. and Zhou, L.P. and Wang, S.S. and Wang, J.D.",
        TITLE = "Task-Oriented Multi-Modal Mutual Learning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21902-21912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207563"}

@inproceedings{bb212486,
        AUTHOR = "Cho, E. and Kim, J. and Kim, H.W.J.",
        TITLE = "Distribution-Aware Prompt Tuning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21947-21956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207564"}

@inproceedings{bb212487,
        AUTHOR = "Varma, M. and Delbrouck, J.B. and Hooper, S. and Chaudhari, A. and Langlotz, C.",
        TITLE = "ViLLA: Fine-Grained Vision-Language Representation Learning from
Real-World Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22168-22178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207565"}

@inproceedings{bb212488,
        AUTHOR = "Zhu, H.G. and Wei, Y.C. and Liang, X.D. and Zhang, C.J. and Zhao, Y.",
        TITLE = "CTP: Towards Vision-Language Continual Pretraining via Compatible
Momentum Contrast and Topology Preservation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22200-22210",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207566"}

@inproceedings{bb212489,
        AUTHOR = "Salin, E. and Ayache, S. and Favre, B.",
        TITLE = "Towards an Exhaustive Evaluation of Vision-Language Foundation Models",
        BOOKTITLE = MMFM23,
        YEAR = "2023",
        PAGES = "339-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207567"}

@inproceedings{bb212490,
        AUTHOR = "Hu, Z. and Zhu, X.L. and Tran, S. and Vidal, R. and Dhua, A.",
        TITLE = "ProVLA: Compositional Image Search with Progressive Vision-Language
Alignment and Multimodal Fusion",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2764-2769",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207568"}

@inproceedings{bb212491,
        AUTHOR = "Hall, M. and Gustafson, L. and Adcock, A. and Misra, I. and Ross, C.",
        TITLE = "Vision-Language Models Performing Zero-Shot Tasks Exhibit Disparities
Between Gender Groups",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2770-2777",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207569"}

@inproceedings{bb212492,
        AUTHOR = "Agnolucci, L. and Baldrati, A. and Todino, F. and Becattini, F. and Bertini, M. and del Bimbo, A.",
        TITLE = "ECO: Ensembling Context Optimization for Vision-Language Models",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2803-2807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207570"}

@inproceedings{bb212493,
        AUTHOR = "Palit, V. and Pandey, R. and Arora, A. and Liang, P.P.",
        TITLE = "Towards Vision-Language Mechanistic Interpretability: A Causal
Tracing Tool for BLIP",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2848-2853",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207571"}

@inproceedings{bb212494,
        AUTHOR = "Sammani, F. and Deligiannis, N.",
        TITLE = "Uni-NLX: Unifying Textual Explanations for Vision and Vision-Language
Tasks",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4636-4641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207572"}

@inproceedings{bb212495,
        AUTHOR = "Lu, D. and Wang, Z.Q. and Wang, T. and Guan, W. and Gao, H. and Zheng, F.",
        TITLE = "Set-level Guidance Attack: Boosting Adversarial Transferability of
Vision-Language Pre-training Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "102-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207573"}

@inproceedings{bb212496,
        AUTHOR = "Lee, D.J. and Song, S. and Suh, J. and Choi, J. and Lee, S. and Kim, H.W.J.",
        TITLE = "Read-only Prompt Optimization for Vision-Language Few-shot Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1401-1411",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207574"}

@inproceedings{bb212497,
        AUTHOR = "Li, X. and Fang, Y.H. and Liu, M.H. and Ling, Z. and Tu, Z.W. and Su, H.",
        TITLE = "Distilling Large Vision-Language Model with Out-of-Distribution
Generalizability",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2492-2503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207575"}

@inproceedings{bb212498,
        AUTHOR = "Li, J.C. and Gao, M. and Wei, L. and Tang, S.L. and Zhang, W.Q. and Li, M. and Ji, W. and Tian, Q. and Chua, T.S. and Zhuang, Y.T.",
        TITLE = "Gradient-Regulated Meta-Prompt Learning for Generalizable
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2551-2562",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207576"}

@inproceedings{bb212499,
        AUTHOR = "Bi, J.Y. and Cheng, D. and Yao, P. and Pang, B. and Zhan, Y.F. and Yang, C.G. and Wang, Y.J. and Sun, H. and Deng, W.W. and Zhang, Q.",
        TITLE = "VL-Match: Enhancing Vision-Language Pretraining with Token-Level and
Instance-Level Matching",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2584-2593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT207577"}

Last update:Mar 25, 2024 at 16:07:51