@inproceedings{bb211700,
        AUTHOR = "Sha, F. and Chao, W. and Hu, H.",
        TITLE = "Learning Answer Embeddings for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5428-5436",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206781"}

@inproceedings{bb211701,
        AUTHOR = "Kafle, K. and Price, B. and Cohen, S. and Kanan, C.",
        TITLE = "DVQA: Understanding Data Visualizations via Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5648-5656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206782"}

@inproceedings{bb211702,
        AUTHOR = "Sha, F. and Hu, H. and Chao, W.",
        TITLE = "Cross-Dataset Adaptation for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5716-5725",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206783"}

@inproceedings{bb211703,
        AUTHOR = "Anderson, P. and He, X. and Buehler, C. and Teney, D. and Johnson, M. and Gould, S. and Zhang, L.",
        TITLE = "Bottom-Up and Top-Down Attention for Image Captioning and Visual
Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6077-6086",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206784"}

@inproceedings{bb211704,
        AUTHOR = "Nguyen, D. and Okatani, T.",
        TITLE = "Improved Fusion of Visual and Language Representations by Dense
Symmetric Co-attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6087-6096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206785"}

@inproceedings{bb211705,
        AUTHOR = "Ma, C. and Shen, C. and Dick, A. and Wu, Q. and Wang, P. and van den Hengel, A.J. and Reid, I.D.",
        TITLE = "Visual Question Answering with Memory-Augmented Networks",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6975-6984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206786"}

@inproceedings{bb211706,
        AUTHOR = "Patro, B. and Namboodiri, V.P.",
        TITLE = "Differential Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7680-7688",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206787"}

@inproceedings{bb211707,
        AUTHOR = "Su, Z. and Zhu, C. and Dong, Y.P. and Cai, D.Q. and Chen, Y.R. and Li, J.G.",
        TITLE = "Learning Visual Knowledge Memory Networks for Visual Question
Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7736-7745",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206788"}

@inproceedings{bb211708,
        AUTHOR = "Shin, A. and Ushiku, Y. and Harada, T.",
        TITLE = "Customized Image Narrative Generation via Interactive Visual Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "8925-8933",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206789"}

@inproceedings{bb211709,
        AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.",
        TITLE = "Embodied Question Answering",
        BOOKTITLE = DeepLearnRV18,
        YEAR = "2018",
        PAGES = "2135-213509",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206790"}

@inproceedings{bb211710,
        AUTHOR = "Cheng, W. and Huang, Y. and Wang, L.",
        TITLE = "Towards Unconstrained Pointing Problem of Visual Question Answering:
A Retrieval-based Method",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "3303-3308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206791"}

@inproceedings{bb211711,
        AUTHOR = "Teney, D. and Anderson, P. and He, X. and van den Hengel, A.J.",
        TITLE = "Tips and Tricks for Visual Question Answering:
Learnings from the 2017 Challenge",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4223-4232",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206792"}

@inproceedings{bb211712,
        AUTHOR = "Zhou, B. and Sun, Y. and Bau, D. and Torralba, A.B.",
        TITLE = "Interpretable Basis Decomposition for Visual Explanation",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VIII: 122-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206793"}

@inproceedings{bb211713,
        AUTHOR = "Shi, Y. and Furlanello, T. and Zha, S. and Anandkumar, A.",
        TITLE = "Question Type Guided Attention in Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 158-175",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206794"}

@inproceedings{bb211714,
        AUTHOR = "Narasimhan, M. and Schwing, A.G.",
        TITLE = "Straight to the Facts: Learning Knowledge Base Retrieval for Factual
Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VIII: 460-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206795"}

@inproceedings{bb211715,
        AUTHOR = "Malinowski, M. and Doersch, C. and Santoro, A. and Battaglia, P.",
        TITLE = "Learning Visual Question Answering by Bootstrapping Hard Attention",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VI: 3-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206796"}

@inproceedings{bb211716,
        AUTHOR = "Gu, J.X. and Cai, J.F. and Joty, S. and Niu, L. and Wang, G.",
        TITLE = "Look, Imagine and Match: Improving Textual-Visual Cross-Modal
Retrieval with Generative Models",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7181-7189",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206797"}

@inproceedings{bb211717,
        AUTHOR = "Li, Q. and Tao, Q.Y. and Joty, S. and Cai, J.F. and Luo, J.B.",
        TITLE = "VQA-E: Explaining, Elaborating, and Enhancing Your Answers for Visual
Questions",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VII: 570-586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206798"}

@inproceedings{bb211718,
        AUTHOR = "Bai, Y.L. and Fu, J.L. and Zhao, T.J. and Mei, T.",
        TITLE = "Deep Attention Neural Tensor Network for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 21-37",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206799"}

@inproceedings{bb211719,
        AUTHOR = "Sinha, A. and Ayush, K.",
        TITLE = "Towards Mathematical Reasoning: A Multimodal Deep Learning Approach",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "4028-4032",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206800"}

@inproceedings{bb211720,
        AUTHOR = "Yu, D. and Gao, X. and Xiong, H.",
        TITLE = "Structured Semantic Representation for Visual Question Answering",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "2286-2290",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206801"}

@inproceedings{bb211721,
        AUTHOR = "Huang, L. and Kulkarni, K. and Jha, A. and Lohit, S. and Jayasuriya, S. and Turaga, P.K.",
        TITLE = "CS-VQA: Visual Question Answering with Compressively Sensed Images",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "1283-1287",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206802"}

@inproceedings{bb211722,
        AUTHOR = "Desta, M.T. and Chen, L. and Kornuta, T.",
        TITLE = "Object-Based Reasoning in VQA",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1814-1823",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206803"}

@inproceedings{bb211723,
        AUTHOR = "Zhao, H. and Fan, Q. and Gutfreund, D. and Fu, Y.",
        TITLE = "Semantically Guided Visual Question Answering",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1852-1860",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206804"}

@inproceedings{bb211724,
        AUTHOR = "Wang, Z. and Liu, X. and Wang, L. and Qiao, Y. and Xie, X. and Fowlkes, C.C.",
        TITLE = "Structured Triplet Learning with POS-Tag Guided Attention for Visual
Question Answering",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1888-1896",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206805"}

@inproceedings{bb211725,
        AUTHOR = "Chowdhury, I. and Nguyen, K. and Fookes, C. and Sridharan, S.",
        TITLE = "A cascaded long short-term memory (LSTM) driven generic visual
question answering (VQA)",
        BOOKTITLE = ICIP17,
        YEAR = "2017",
        PAGES = "1842-1846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206806"}

@inproceedings{bb211726,
        AUTHOR = "Sheng, S. and Venkitasubramanian, A.N. and Moens, M.F.",
        TITLE = "A Markov Network Based Passage Retrieval Method for Multimodal Question
Answering in the Cultural Heritage Domain",
        BOOKTITLE = MMMod18,
        YEAR = "2018",
        PAGES = "I:3-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206807"}

@inproceedings{bb211727,
        AUTHOR = "Rosso Mateus, A. and Gonzalez, F.A. and Montes y Gomez, M.",
        TITLE = "A Two-Step Neural Network Approach to Passage Retrieval for Open Domain
Question Answering",
        BOOKTITLE = CIARP17,
        YEAR = "2017",
        PAGES = "566-574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206808"}

@inproceedings{bb211728,
        AUTHOR = "Yu, Z. and Yu, J. and Fan, J. and Tao, D.",
        TITLE = "Multi-modal Factorized Bilinear Pooling with Co-attention Learning
for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1839-1848",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206809"}

@inproceedings{bb211729,
        AUTHOR = "Ben younes, H. and Cadene, R. and Cord, M. and Thome, N.",
        TITLE = "MUTAN: Multimodal Tucker Fusion for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2631-2639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206810"}

@inproceedings{bb211730,
        AUTHOR = "Zhu, C. and Zhao, Y. and Huang, S. and Tu, K. and Ma, Y.",
        TITLE = "Structured Attentions for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1300-1309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206811"}

@inproceedings{bb211731,
        AUTHOR = "Hu, R. and Andreas, J. and Rohrbach, M. and Darrell, T.J. and Saenko, K.",
        TITLE = "Learning to Reason:
End-to-End Module Networks for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "804-813",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206812"}

@inproceedings{bb211732,
        AUTHOR = "Jain, U. and Zhang, Z.Y. and Schwing, A.",
        TITLE = "Creativity: Generating Diverse Questions Using Variational
Autoencoders",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "5415-5424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206813"}

@inproceedings{bb211733,
        AUTHOR = "Zhu, Y. and Lim, J.J. and Fei Fei, L.",
        TITLE = "Knowledge Acquisition for Visual Question Answering via Iterative
Querying",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6146-6155",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206814"}

@inproceedings{bb211734,
        AUTHOR = "Peris, A. and Casacuberta, F.",
        TITLE = "Interactive-Predictive Neural Multimodal Systems",
        BOOKTITLE = "IbPRIA",
        PAGES = "I:16-28",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206815"}

@inproceedings{bb211735,
        AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.",
        TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question
Answering",
        BOOKTITLE = IbPRIA17,
        YEAR = "2017",
        PAGES = "372-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206816"}

@inproceedings{bb211736,
        AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.",
        TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 485-501",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206817"}

@inproceedings{bb211737,
        AUTHOR = "Uehara, K. and Duan, N. and Harada, T.",
        TITLE = "Learning to Ask Informative Sub-Questions for Visual Question
Answering",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4680-4689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206818"}

@inproceedings{bb211738,
        AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.",
        TITLE = "Visual Question Generation as Dual Task of Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6116-6124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206819"}

@inproceedings{bb211739,
        AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.",
        TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual
Question Answering",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6632-6641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206820"}

@inproceedings{bb211740,
        AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.",
        TITLE = "Simple and effective visual question answering in a single modality",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "2276-2280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206821"}

@inproceedings{bb211741,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "An Analysis of Visual Question Answering Algorithms",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1983-1991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206822"}

@inproceedings{bb211742,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "Answer-Type Prediction for Visual Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4976-4984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206823"}

@inproceedings{bb211743,
        AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.",
        TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to
Answer New Questions",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3909-3918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206824"}

@inproceedings{bb211744,
        AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.",
        TITLE = "Multi-level Attention Networks for Visual Question Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4187-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206825"}

@inproceedings{bb211745,
        AUTHOR = "Kembhavi, A. and Seo, M. and Schwenk, D. and Choi, J. and Farhadi, A. and Hajishirzi, H.",
        TITLE = "Are You Smarter Than a Sixth Grader? Textbook Question Answering for
Multimodal Machine Comprehension",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "5376-5384",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206826"}

@inproceedings{bb211746,
        AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.",
        TITLE = "What's in a Question:
Using Visual Questions as a Form of Supervision",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6422-6431",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206827"}

@inproceedings{bb211747,
        AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.",
        TITLE = "An Empirical Evaluation of Visual Question Answering for Novel
Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7312-7321",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206828"}

@inproceedings{bb211748,
        AUTHOR = "Xu, H.J. and Saenko, K.",
        TITLE = "Ask, Attend and Answer:
Exploring Question-Guided Spatial Attention for Visual Question Answering",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VII: 451-466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206829"}

@inproceedings{bb211749,
        AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.",
        TITLE = "Revisiting Visual Question Answering Baselines",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "VIII: 727-739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206830"}

@inproceedings{bb211750,
        AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
        TITLE = "Stacked Attention Networks for Image Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "21-29",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206831"}

@inproceedings{bb211751,
        AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
        TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1456-1464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206832"}

@inproceedings{bb211752,
        AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
        TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
        BOOKTITLE = ICIP12,
        YEAR = "2012",
        PAGES = "1917-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206833"}

@inproceedings{bb211753,
        AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
        TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
        BOOKTITLE = "3DTV09",
        YEAR = "2009",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT206834"}

@article{bb211754,
        AUTHOR = "Tamaazousti, Y. and Le Borgne, H. and Popescu, A. and Gadeski, E. and Ginsca, A. and Hudelot, C.",
        TITLE = "Vision-language integration using constrained local semantic features",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "41-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206835"}

@article{bb211755,
        AUTHOR = "Gouthaman, K.V. and Nambiar, A. and Srinivas, K.S. and Mittal, A.",
        TITLE = "Linguistically-aware attention for reducing the semantic gap in
vision-language tasks",
        JOURNAL = PR,
        VOLUME = "112",
        YEAR = "2021",
        PAGES = "107812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206836"}

@article{bb211756,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Learning to Prompt for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2337-2348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206837"}

@inproceedings{bb211757,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Conditional Prompt Learning for Vision-Language Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16795-16804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206838"}

@article{bb211758,
        AUTHOR = "Ma, C.C. and Liu, Y. and Deng, J.K. and Xie, L.X. and Dong, W.M. and Xu, C.S.",
        TITLE = "Understanding and Mitigating Overfitting in Prompt Tuning for
Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4616-4629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206839"}

@article{bb211759,
        AUTHOR = "Zhu, Y.Q. and Li, X.Y. and Zheng, M. and Yang, J.H. and Wang, Z. and Guo, X.Q. and Chai, Z.F. and Yuan, Y.C. and Jiang, S.Q.",
        TITLE = "Focus and Align: Learning Tube Tokens for Video-Language Pre-Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8036-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206840"}

@article{bb211760,
        AUTHOR = "Chen, C.Q. and Han, D. and Chang, C.C.",
        TITLE = "MPCCT: Multimodal vision-language learning paradigm with
context-based compact Transformer",
        JOURNAL = PR,
        VOLUME = "147",
        YEAR = "2024",
        PAGES = "110084",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206841"}

@article{bb211761,
        AUTHOR = "Wu, W.H. and Sun, Z. and Song, Y.X. and Wang, J.D. and Ouyang, W.L.",
        TITLE = "Transferring Vision-Language Models for Visual Recognition:
A Classifier Perspective",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "392-409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206842"}

@article{bb211762,
        AUTHOR = "Ming, Y.F. and Li, Y.X.",
        TITLE = "How Does Fine-Tuning Impact Out-of-Distribution Detection for
Vision-Language Models?",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "596-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206843"}

@article{bb211763,
        AUTHOR = "Zhao, C.R. and Wang, Y. and Jiang, X.Y. and Shen, Y.F. and Song, K. and Li, D.S. and Miao, D.Q.",
        TITLE = "Learning Domain Invariant Prompt for Vision-Language Models",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1348-1360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206844"}

@article{bb211764,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Neural Logic Vision Language Explainer",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3331-3340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206845"}

@article{bb211765,
        AUTHOR = "Wang, Y.D. and Yu, Z.O. and Wang, J.D. and Heng, Q. and Chen, H. and Ye, W. and Xie, R. and Xie, X. and Zhang, S.K.",
        TITLE = "Exploring Vision-Language Models for Imbalanced Learning",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "224-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206846"}

@inproceedings{bb211766,
        AUTHOR = "Ganz, R. and Nuriel, O. and Aberdam, A. and Kittenplon, Y. and Mazor, S. and Litman, R.",
        TITLE = "Towards Models that Can See and Read",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21661-21671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206847"}

@inproceedings{bb211767,
        AUTHOR = "Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C.",
        TITLE = "Exploring Temporal Concurrency for Video-Language Representation
Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15522-15532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206848"}

@inproceedings{bb211768,
        AUTHOR = "Shukor, M. and Dancette, C. and Cord, M.",
        TITLE = "eP-ALM: Efficient Perceptual Augmentation of Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21999-22012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206849"}

@inproceedings{bb211769,
        AUTHOR = "Schulter, S. and Kumar, B.G.V. and Suh, Y.M. and Dafnis, K.M. and Zhang, Z.X. and Zhao, S.Y. and Metaxas, D.N.",
        TITLE = "OmniLabel: A Challenging Benchmark for Language-Based Object
Detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11919-11928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206850"}

@inproceedings{bb211770,
        AUTHOR = "Chen, Z.L. and Huang, X. and Guan, Q.L. and Lin, L. and Luo, W.Q.",
        TITLE = "A Retrospect to Multi-prompt Learning across Vision and Language",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22133-22144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206851"}

@inproceedings{bb211771,
        AUTHOR = "Derakhshani, M.M. and Sanchez, E. and Bulat, A. and da Costa, V.G.T. and Snoek, C.G.M. and Tzimiropoulos, G. and Martinez, B.",
        TITLE = "Bayesian Prompt Learning for Image-Language Model Generalization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206852"}

@inproceedings{bb211772,
        AUTHOR = "Cascante Bonilla, P. and Shehada, K. and Smith, J.S. and Doveh, S. and Kim, D.H. and Panda, R. and Varol, G. and Oliva, A. and Ordonez, V. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Going Beyond Nouns With Vision & Language Models Using Synthetic
Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20098-20108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206853"}

@inproceedings{bb211773,
        AUTHOR = "Zara, G. and Conti, A. and Roy, S. and Lathuiliere, S. and Rota, P. and Ricci, E.",
        TITLE = "The Unreasonable Effectiveness of Large Language-Vision Models for
Source-free Video Domain Adaptation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10273-10283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206854"}

@inproceedings{bb211774,
        AUTHOR = "Upadhyay, U. and Karthik, S. and Mancini, M. and Akata, Z.",
        TITLE = "ProbVLM: Probabilistic Adapter for Frozen Vison-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1899-1910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206855"}

@inproceedings{bb211775,
        AUTHOR = "Chen, Z.H. and Diao, S.Z. and Wang, B. and Li, G.B. and Wan, X.",
        TITLE = "Towards Unifying Medical Vision-and-Language Pre-training via Soft
Prompts",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "23346-23356",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206856"}

@inproceedings{bb211776,
        AUTHOR = "Bitton Guetta, N. and Bitton, Y. and Hessel, J. and Schmidt, L. and Elovici, Y. and Stanovsky, G. and Schwartz, R.",
        TITLE = "Breaking Common Sense: WHOOPS! A Vision-and-Language Benchmark of
Synthetic and Compositional Images",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2616-2627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206857"}

@inproceedings{bb211777,
        AUTHOR = "Hu, Z.Y. and Li, Y. and Lyu, M.R. and Wang, L.W.",
        TITLE = "VL-PET: Vision-and-Language Parameter-Efficient Tuning via
Granularity Control",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2998-3008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206858"}

@inproceedings{bb211778,
        AUTHOR = "Slyman, E. and Kahng, M. and Lee, S.",
        TITLE = "VLSlice: Interactive Vision-and-Language Slice Discovery",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15245-15255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206859"}

@inproceedings{bb211779,
        AUTHOR = "Najibi, M. and Ji, J.W. and Zhou, Y. and Qi, C.R. and Yan, X.C. and Ettinger, S. and Anguelov, D.",
        TITLE = "Unsupervised 3D Perception with 2D Vision-Language Distillation for
Autonomous Driving",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "8568-8578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206860"}

@inproceedings{bb211780,
        AUTHOR = "Zheng, K. and Wu, W. and Feng, R. and Zhu, K. and Liu, J.W. and Zhao, D.L. and Zha, Z.J. and Chen, W. and Shen, Y.J.",
        TITLE = "Regularized Mask Tuning: Uncovering Hidden Knowledge in Pre-trained
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11629-11639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206861"}

@inproceedings{bb211781,
        AUTHOR = "Wang, T. and Lin, K. and Li, L.J. and Lin, C.C. and Yang, Z.Y. and Zhang, H.W. and Liu, Z.C. and Wang, L.J.",
        TITLE = "Equivariant Similarity for Vision-Language Foundation Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11964-11974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206862"}

@inproceedings{bb211782,
        AUTHOR = "Xu, H. and Xie, S. and Huang, P.Y. and Yu, L.C. and Howes, R. and Ghosh, G. and Zettlemoyer, L. and Feichtenhofer, C.",
        TITLE = "CiT: Curation in Training for Effective Vision-Language Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15134-15143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206863"}

@inproceedings{bb211783,
        AUTHOR = "Trager, M. and Perera, P. and Zancato, L. and Achille, A. and Bhatia, P. and Soatto, S.",
        TITLE = "Linear Spaces of Meanings: Compositional Structures in
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15349-15358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206864"}

@inproceedings{bb211784,
        AUTHOR = "Chen, Y.S. and Song, Y.Z. and Yeo, C.Y. and Liu, B. and Fu, J.L. and Shuai, H.H.",
        TITLE = "SINC: Self-Supervised In-Context Learning for Vision-Language Tasks",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15384-15396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206865"}

@inproceedings{bb211785,
        AUTHOR = "Wu, C.E. and Tian, Y. and Yu, H.C. and Wang, H. and Morgado, P. and Hu, Y.H. and Yang, L.J.",
        TITLE = "Why Is Prompt Tuning for Vision-Language Models Robust to Noisy
Labels?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15442-15451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206866"}

@inproceedings{bb211786,
        AUTHOR = "Ouali, Y. and Bulat, A. and Matinez, B. and Tzimiropoulos, G.",
        TITLE = "Black Box Few-Shot Adaptation for Vision-Language models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15488-15500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206867"}

@inproceedings{bb211787,
        AUTHOR = "Kan, B. and Wang, T. and Lu, W.P. and Zhen, X.T. and Guan, W. and Zheng, F.",
        TITLE = "Knowledge-Aware Prompt Tuning for Generalizable Vision-Language
Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15624-15634",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206868"}

@inproceedings{bb211788,
        AUTHOR = "Zhai, J.T. and Zhang, Q. and Wu, T. and Chen, X.Y. and Liu, J.J. and Cheng, M.M.",
        TITLE = "SLAN: Self-Locator Aided Network for Vision-Language Understanding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21892-21901",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206869"}

@inproceedings{bb211789,
        AUTHOR = "Long, S. and Zhao, Z. and Yuan, J. and Tan, Z.C. and Liu, J.J. and Zhou, L.P. and Wang, S.S. and Wang, J.D.",
        TITLE = "Task-Oriented Multi-Modal Mutual Learning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21902-21912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206870"}

@inproceedings{bb211790,
        AUTHOR = "Cho, E. and Kim, J. and Kim, H.W.J.",
        TITLE = "Distribution-Aware Prompt Tuning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21947-21956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206871"}

@inproceedings{bb211791,
        AUTHOR = "Varma, M. and Delbrouck, J.B. and Hooper, S. and Chaudhari, A. and Langlotz, C.",
        TITLE = "ViLLA: Fine-Grained Vision-Language Representation Learning from
Real-World Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22168-22178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206872"}

@inproceedings{bb211792,
        AUTHOR = "Zhu, H.G. and Wei, Y.C. and Liang, X.D. and Zhang, C.J. and Zhao, Y.",
        TITLE = "CTP: Towards Vision-Language Continual Pretraining via Compatible
Momentum Contrast and Topology Preservation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22200-22210",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206873"}

@inproceedings{bb211793,
        AUTHOR = "Salin, E. and Ayache, S. and Favre, B.",
        TITLE = "Towards an Exhaustive Evaluation of Vision-Language Foundation Models",
        BOOKTITLE = MMFM23,
        YEAR = "2023",
        PAGES = "339-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206874"}

@inproceedings{bb211794,
        AUTHOR = "Hu, Z. and Zhu, X.L. and Tran, S. and Vidal, R. and Dhua, A.",
        TITLE = "ProVLA: Compositional Image Search with Progressive Vision-Language
Alignment and Multimodal Fusion",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2764-2769",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206875"}

@inproceedings{bb211795,
        AUTHOR = "Hall, M. and Gustafson, L. and Adcock, A. and Misra, I. and Ross, C.",
        TITLE = "Vision-Language Models Performing Zero-Shot Tasks Exhibit Disparities
Between Gender Groups",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2770-2777",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206876"}

@inproceedings{bb211796,
        AUTHOR = "Agnolucci, L. and Baldrati, A. and Todino, F. and Becattini, F. and Bertini, M. and del Bimbo, A.",
        TITLE = "ECO: Ensembling Context Optimization for Vision-Language Models",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2803-2807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206877"}

@inproceedings{bb211797,
        AUTHOR = "Palit, V. and Pandey, R. and Arora, A. and Liang, P.P.",
        TITLE = "Towards Vision-Language Mechanistic Interpretability: A Causal
Tracing Tool for BLIP",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2848-2853",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206878"}

@inproceedings{bb211798,
        AUTHOR = "Sammani, F. and Deligiannis, N.",
        TITLE = "Uni-NLX: Unifying Textual Explanations for Vision and Vision-Language
Tasks",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4636-4641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206879"}

@inproceedings{bb211799,
        AUTHOR = "Lu, D. and Wang, Z.Q. and Wang, T. and Guan, W. and Gao, H. and Zheng, F.",
        TITLE = "Set-level Guidance Attack: Boosting Adversarial Transferability of
Vision-Language Pre-training Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "102-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT206880"}

Last update:Feb 29, 2024 at 09:13:14