@inproceedings{bb215100,
        AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.",
        TITLE = "Stacked Attention Networks for Image Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "21-29",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT210163"}

@inproceedings{bb215101,
        AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.",
        TITLE = "VisKE: Visual knowledge extraction and question answering by visual
verification of relation phrases",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1456-1464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT210164"}

@inproceedings{bb215102,
        AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.",
        TITLE = "A robust multivariate reranking algorithm for Question Answering
enrichment",
        BOOKTITLE = ICIP12,
        YEAR = "2012",
        PAGES = "1917-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT210165"}

@inproceedings{bb215103,
        AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.",
        TITLE = "Question interface for 3D picture creation on an autostereoscopic
digital picture frame",
        BOOKTITLE = "3DTV09",
        YEAR = "2009",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT210166"}

@article{bb215104,
        AUTHOR = "Das, A. and Agrawal, H. and Zitnick, L. and Parikh, D. and Batra, D.",
        TITLE = "Human Attention in Visual Question Answering:
Do Humans and Deep Networks Look at the Same Regions?",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "90-100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210167"}

@article{bb215105,
        AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.",
        TITLE = "Ask Your Neurons: A Deep Learning Approach to Visual Question Answering",
        JOURNAL = IJCV,
        VOLUME = "125",
        YEAR = "2018",
        NUMBER = "1-3",
        MONTH = "December",
        PAGES = "110-135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210168"}

@inproceedings{bb215106,
        AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.",
        TITLE = "Ask Your Neurons:
A Neural-Based Approach to Answering Questions about Images",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210169"}

@inproceedings{bb215107,
        AUTHOR = "Dancette, C. and Whitehead, S. and Maheshwary, R. and Vedantam, R. and Scherer, S. and Chen, X.L. and Cord, M. and Rohrbach, M.",
        TITLE = "Improving Selective Visual Question Answering by Learning from Your
Peers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "24049-24059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210170"}

@article{bb215108,
        AUTHOR = "Huang, Y.Z. and Zhong, T.",
        TITLE = "Multitask learning for neural generative question answering",
        JOURNAL = RealTimeIP,
        VOLUME = "14",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1009-1017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210171"}

@article{bb215109,
        AUTHOR = "Ruwa, N. and Mao, Q. and Song, H.P. and Jia, H.J. and Dong, M.",
        TITLE = "Triple attention network for sentimental visual question answering",
        JOURNAL = CVIU,
        VOLUME = "189",
        YEAR = "2019",
        PAGES = "102829",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210172"}

@article{bb215110,
        AUTHOR = "Bai, Z.W. and Li, Y. and Wozniak, M. and Zhou, M.L. and Li, D.",
        TITLE = "DecomVQANet: Decomposing visual question answering deep network via
tensor decomposition and regression",
        JOURNAL = PR,
        VOLUME = "110",
        YEAR = "2021",
        PAGES = "107538",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210173"}

@article{bb215111,
        AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhang, H. and Zhu, S.C.",
        TITLE = "Mining deep And-Or object structures via cost-sensitive
question-answer-based active annotations",
        JOURNAL = CVIU,
        VOLUME = "176-177",
        YEAR = "2018",
        PAGES = "33-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210174"}

@article{bb215112,
        AUTHOR = "Zhang, Q.S. and Ren, J. and Huang, G. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining Interpretable AOG Representations From Convolutional Networks
via Active Question Answering",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3949-3963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210175"}

@inproceedings{bb215113,
        AUTHOR = "Zhang, Q.S. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining Object Parts from CNNs via Active Question-Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3890-3899",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210176"}

@inproceedings{bb215114,
        AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Mining And-Or Graphs for Graph Matching and Object Discovery",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "55-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210177"}

@article{bb215115,
        AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Lin, L.",
        TITLE = "Interpretable Visual Question Answering by Reasoning on Dependency
Trees",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "887-901",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210178"}

@inproceedings{bb215116,
        AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Li, G. and Lin, L.",
        TITLE = "Visual Question Reasoning on General Dependency Tree",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7249-7257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210179"}

@article{bb215117,
        AUTHOR = "Zhong, H.S. and Chen, J.Y. and Shen, C. and Zhang, H.W. and Huang, J.Q. and Hua, X.S.",
        TITLE = "Self-Adaptive Neural Module Transformer for Visual Question Answering",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "1264-1273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210180"}

@article{bb215118,
        AUTHOR = "Zheng, W.F. and Yin, L.R. and Chen, X.B. and Ma, Z. and Liu, S. and Yang, B.",
        TITLE = "Knowledge base graph embedding module design for Visual question
answering model",
        JOURNAL = PR,
        VOLUME = "120",
        YEAR = "2021",
        PAGES = "108153",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210181"}

@article{bb215119,
        AUTHOR = "Sharma, H. and Jalal, A.S.",
        TITLE = "Visual question answering model based on graph neural network and
contextual attention",
        JOURNAL = IVC,
        VOLUME = "110",
        YEAR = "2021",
        PAGES = "104165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210182"}

@article{bb215120,
        AUTHOR = "Song, L.Y. and Li, J. and Liu, J. and Yang, Y. and Shang, X. and Sun, M.X.",
        TITLE = "Answering knowledge-based visual questions via the exploration of
Question Purpose",
        JOURNAL = PR,
        VOLUME = "133",
        YEAR = "2023",
        PAGES = "109015",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210183"}

@article{bb215121,
        AUTHOR = "MeshuWelde, T. and Liao, L.",
        TITLE = "Counting-based visual question answering with serial cascaded
attention deep learning",
        JOURNAL = PR,
        VOLUME = "144",
        YEAR = "2023",
        PAGES = "109850",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210184"}

@article{bb215122,
        AUTHOR = "Liu, Y. and Li, G.B. and Lin, L.",
        TITLE = "Cross-Modal Causal Relational Reasoning for Event-Level Visual
Question Answering",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "11624-11641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210185"}

@inproceedings{bb215123,
        AUTHOR = "Cao, Q.X. and Wan, W.T. and Wang, K. and Liang, X.D. and Lin, L.",
        TITLE = "Linguistically Routing Capsule Network for Out-of-distribution Visual
Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1594-1603",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210186"}

@article{bb215124,
        AUTHOR = "Yang, S.W. and Xiao, L. and Wu, X.J. and Xu, J.J. and Wang, L.L. and He, L.",
        TITLE = "Simple contrastive learning in a self-supervised manner for robust
visual question answering",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103976",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210187"}

@article{bb215125,
        AUTHOR = "Wu, Y.L. and Pan, X. and Li, J.H. and Dou, S. and Wang, X.X.",
        TITLE = "Interpretable answer retrieval based on heterogeneous network
embedding",
        JOURNAL = PRL,
        VOLUME = "182",
        YEAR = "2024",
        PAGES = "9-16",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210188"}

@inproceedings{bb215126,
        AUTHOR = "Feng, C. and Danier, D. and Zhang, F. and Bull, D.",
        TITLE = "RankDVQA: Deep VQA based on Ranking-inspired Hybrid Training",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "1637-1647",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210189"}

@inproceedings{bb215127,
        AUTHOR = "Ishay, A. and Yang, Z. and Lee, J. and Kang, I. and Lim, D.J.",
        TITLE = "Think before You Simulate: Symbolic Reasoning to Orchestrate Neural
Computation for Counterfactual Question Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "6684-6693",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210190"}

@inproceedings{bb215128,
        AUTHOR = "Wang, Y. and Yasunaga, M. and Ren, H.Y. and Wada, S. and Leskovec, J.",
        TITLE = "VQA-GNN: Reasoning with Multimodal Knowledge via Graph Neural
Networks for Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21525-21535",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210191"}

@inproceedings{bb215129,
        AUTHOR = "Souza, B. and Aasan, M. and Pedrini, H. and Rivera, A.R.",
        TITLE = "SelfGraphVQA: A Self-Supervised Graph Neural Network for Scene-based
Question Answering",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4642-4647",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210192"}

@inproceedings{bb215130,
        AUTHOR = "Haisa, G. and Altenbek, G.",
        TITLE = "Question Classification Based on Weak Supervision and Interrogative
Pronouns Attention Mechanism",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "2273-2278",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210193"}

@inproceedings{bb215131,
        AUTHOR = "Nguyen, B.X. and Do, T. and Tran, H. and Tjiputra, E. and Tran, Q.D. and Nguyen, A.",
        TITLE = "Coarse-to-Fine Reasoning for Visual Question Answering",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4557-4565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210194"}

@inproceedings{bb215132,
        AUTHOR = "Liang, Y.Y. and Wang, X. and Duan, X.G. and Zhu, W.W.",
        TITLE = "Multi-modal Contextual Graph Neural Network for Text Visual Question
Answering",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "3491-3498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210195"}

@inproceedings{bb215133,
        AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.",
        TITLE = "Deep Bayesian Network for Visual Question Generation",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1555-1565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210196"}

@inproceedings{bb215134,
        AUTHOR = "Singh, A.K. and Mishra, A. and Shekhar, S. and Chakraborty, A.",
        TITLE = "From Strings to Things: Knowledge-Enabled VQA Model That Can Read and
Reason",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4601-4611",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210197"}

@inproceedings{bb215135,
        AUTHOR = "Wilf, A. and Ma, M.Q. and Liang, P.P. and Zadeh, A. and Morency, L.P.",
        TITLE = "Face-to-Face Contrastive Learning for Social Intelligence
Question-Answering",
        BOOKTITLE = FG23,
        YEAR = "2023",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210198"}

@inproceedings{bb215136,
        AUTHOR = "Zadeh, A. and Chan, M. and Liang, P.P. and Tong, E. and Morency, L.P.",
        TITLE = "Social-IQ: A Question Answering Benchmark for Artificial Social
Intelligence",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8799-8809",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210199"}

@inproceedings{bb215137,
        AUTHOR = "Ma, C. and Shen, C. and Dick, A. and Wu, Q. and Wang, P. and van den Hengel, A.J. and Reid, I.D.",
        TITLE = "Visual Question Answering with Memory-Augmented Networks",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6975-6984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210200"}

@inproceedings{bb215138,
        AUTHOR = "Shin, A. and Ushiku, Y. and Harada, T.",
        TITLE = "Customized Image Narrative Generation via Interactive Visual Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "8925-8933",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210201"}

@inproceedings{bb215139,
        AUTHOR = "Teney, D. and Anderson, P. and He, X. and van den Hengel, A.J.",
        TITLE = "Tips and Tricks for Visual Question Answering:
Learnings from the 2017 Challenge",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4223-4232",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210202"}

@inproceedings{bb215140,
        AUTHOR = "Bai, Y.L. and Fu, J.L. and Zhao, T.J. and Mei, T.",
        TITLE = "Deep Attention Neural Tensor Network for Visual Question Answering",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 21-37",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210203"}

@inproceedings{bb215141,
        AUTHOR = "Sinha, A. and Ayush, K.",
        TITLE = "Towards Mathematical Reasoning: A Multimodal Deep Learning Approach",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "4028-4032",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210204"}

@inproceedings{bb215142,
        AUTHOR = "Rosso Mateus, A. and Gonzalez, F.A. and Montes y Gomez, M.",
        TITLE = "A Two-Step Neural Network Approach to Passage Retrieval for Open Domain
Question Answering",
        BOOKTITLE = CIARP17,
        YEAR = "2017",
        PAGES = "566-574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210205"}

@inproceedings{bb215143,
        AUTHOR = "Zhu, C. and Zhao, Y. and Huang, S. and Tu, K. and Ma, Y.",
        TITLE = "Structured Attentions for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1300-1309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210206"}

@inproceedings{bb215144,
        AUTHOR = "Hu, R. and Andreas, J. and Rohrbach, M. and Darrell, T.J. and Saenko, K.",
        TITLE = "Learning to Reason:
End-to-End Module Networks for Visual Question Answering",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "804-813",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210207"}

@inproceedings{bb215145,
        AUTHOR = "Peris, A. and Casacuberta, F.",
        TITLE = "Interactive-Predictive Neural Multimodal Systems",
        BOOKTITLE = IbPRIA19,
        YEAR = "2019",
        PAGES = "I:16-28",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210208"}

@inproceedings{bb215146,
        AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.",
        TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question
Answering",
        BOOKTITLE = IbPRIA17,
        YEAR = "2017",
        PAGES = "372-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210209"}

@inproceedings{bb215147,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "An Analysis of Visual Question Answering Algorithms",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1983-1991",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210210"}

@inproceedings{bb215148,
        AUTHOR = "Kafle, K. and Kanan, C.",
        TITLE = "Answer-Type Prediction for Visual Question Answering",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4976-4984",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210211"}

@inproceedings{bb215149,
        AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.",
        TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to
Answer New Questions",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3909-3918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210212"}

@inproceedings{bb215150,
        AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.",
        TITLE = "Multi-level Attention Networks for Visual Question Answering",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4187-4195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210213"}

@inproceedings{bb215151,
        AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.",
        TITLE = "An Empirical Evaluation of Visual Question Answering for Novel
Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7312-7321",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT210214"}

@article{bb215152,
        AUTHOR = "Tamaazousti, Y. and Le Borgne, H. and Popescu, A. and Gadeski, E. and Ginsca, A. and Hudelot, C.",
        TITLE = "Vision-language integration using constrained local semantic features",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "41-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210215"}

@article{bb215153,
        AUTHOR = "Gouthaman, K.V. and Nambiar, A. and Srinivas, K.S. and Mittal, A.",
        TITLE = "Linguistically-aware attention for reducing the semantic gap in
vision-language tasks",
        JOURNAL = PR,
        VOLUME = "112",
        YEAR = "2021",
        PAGES = "107812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210216"}

@article{bb215154,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Learning to Prompt for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2337-2348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210217"}

@inproceedings{bb215155,
        AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.",
        TITLE = "Conditional Prompt Learning for Vision-Language Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16795-16804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210218"}

@article{bb215156,
        AUTHOR = "Ma, C.C. and Liu, Y. and Deng, J.K. and Xie, L.X. and Dong, W.M. and Xu, C.S.",
        TITLE = "Understanding and Mitigating Overfitting in Prompt Tuning for
Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4616-4629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210219"}

@article{bb215157,
        AUTHOR = "Zhu, Y.Q. and Li, X.Y. and Zheng, M. and Yang, J.H. and Wang, Z.H. and Guo, X.Q. and Chai, Z.F. and Yuan, Y.C. and Jiang, S.Q.",
        TITLE = "Focus and Align: Learning Tube Tokens for Video-Language Pre-Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8036-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210220"}

@article{bb215158,
        AUTHOR = "Chen, C.Q. and Han, D. and Chang, C.C.",
        TITLE = "MPCCT: Multimodal vision-language learning paradigm with
context-based compact Transformer",
        JOURNAL = PR,
        VOLUME = "147",
        YEAR = "2024",
        PAGES = "110084",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210221"}

@article{bb215159,
        AUTHOR = "Wu, W.H. and Sun, Z. and Song, Y.X. and Wang, J.D. and Ouyang, W.L.",
        TITLE = "Transferring Vision-Language Models for Visual Recognition:
A Classifier Perspective",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "392-409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210222"}

@article{bb215160,
        AUTHOR = "Ming, Y.F. and Li, Y.X.",
        TITLE = "How Does Fine-Tuning Impact Out-of-Distribution Detection for
Vision-Language Models?",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "596-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210223"}

@article{bb215161,
        AUTHOR = "Zhao, C.R. and Wang, Y. and Jiang, X.Y. and Shen, Y.F. and Song, K. and Li, D.S. and Miao, D.Q.",
        TITLE = "Learning Domain Invariant Prompt for Vision-Language Models",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1348-1360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210224"}

@article{bb215162,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Neural Logic Vision Language Explainer",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3331-3340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210225"}

@article{bb215163,
        AUTHOR = "Wang, Y.D. and Yu, Z.O. and Wang, J.D. and Heng, Q. and Chen, H. and Ye, W. and Xie, R. and Xie, X. and Zhang, S.K.",
        TITLE = "Exploring Vision-Language Models for Imbalanced Learning",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "224-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210226"}

@article{bb215164,
        AUTHOR = "Yu, Z.T. and Zhao, J. and Guo, C.L. and Yang, Y.",
        TITLE = "StableNet: Distinguishing the hard samples to overcome language
priors in visual question answering",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "2",
        PAGES = "315-327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210227"}

@article{bb215165,
        AUTHOR = "Zeng, Y. and Zhang, X. and Li, H. and Wang, J.W. and Zhang, J.P. and Zhou, W.",
        TITLE = "X2-VLM: All-in-One Pre-Trained Model for Vision-Language Tasks",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3156-3168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210228"}

@article{bb215166,
        AUTHOR = "Zheng, Y.Z. and Zhong, B. and Liang, Q.H. and Li, G.R. and Ji, R.R. and Li, X.X.",
        TITLE = "Toward Unified Token Learning for Vision-Language Tracking",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2125-2135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210229"}

@article{bb215167,
        AUTHOR = "Ye, P. and Xiao, G. and Liu, J.",
        TITLE = "Multimodal Features Alignment for Vision-Language Object Tracking",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "7",
        PAGES = "1168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210230"}

@article{bb215168,
        AUTHOR = "Bazi, Y. and Bashmal, L. and Rahhal, M.M.A. and Ricci, R. and Melgani, F.",
        TITLE = "RS-LLaVA: A Large Vision-Language Model for Joint Captioning and
Question Answering in Remote Sensing Imagery",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "9",
        PAGES = "1477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210231"}

@article{bb215169,
        AUTHOR = "Kong, D. and Kong, K. and Kang, S.J.",
        TITLE = "Image clustering using generated text centroids",
        JOURNAL = SP:IC,
        VOLUME = "125",
        YEAR = "2024",
        PAGES = "117128",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210232"}

@article{bb215170,
        AUTHOR = "Chen, X.Y. and Yang, J.H. and Chen, S. and Wang, L. and Jiang, M. and Zhao, Q.",
        TITLE = "Every Problem, Every Step, All in Focus: Learning to Solve
Vision-Language Problems With Integrated Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4720-4735",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210233"}

@article{bb215171,
        AUTHOR = "Menon, S. and Chandratreya, I.P. and Vondrick, C.",
        TITLE = "Task Bias in Contrastive Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2026-2040",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210234"}

@article{bb215172,
        AUTHOR = "Wang, W.X. and He, X.J. and Zhang, Y. and Guo, L.T. and Shen, J.C. and Li, J.Y. and Liu, J.",
        TITLE = "CM-MaskSD: Cross-Modality Masked Self-Distillation for Referring
Image Segmentation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "6906-6916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210235"}

@inproceedings{bb215173,
        AUTHOR = "Sahin, U. and Li, H. and Khan, Q. and Cremers, D. and Tresp, V.",
        TITLE = "Enhancing Multimodal Compositional Reasoning of Visual Language
Models with Generative Negative Mining",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5551-5561",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210236"}

@inproceedings{bb215174,
        AUTHOR = "Yang, C. and Xu, R. and Guo, Y. and Huang, P.X. and Chen, Y. and Ding, W. and Wang, Z.Y. and Zhou, H.",
        TITLE = "Improving Vision-and-Language Reasoning via Spatial Relations
Modeling",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "758-767",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210237"}

@inproceedings{bb215175,
        AUTHOR = "Shen, S. and Yang, S. and Zhang, T.J. and Zhai, B. and Gonzalez, J.E. and Keutzer, K. and Darrell, T.J.",
        TITLE = "Multitask Vision-Language Prompt Tuning",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5644-5655",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210238"}

@inproceedings{bb215176,
        AUTHOR = "Zhang, G. and Zhang, Y.R. and Zhang, K. and Tresp, V.",
        TITLE = "Can Vision-Language Models be a Good Guesser? Exploring VLMs for
Times and Location Reasoning",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "625-634",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210239"}

@inproceedings{bb215177,
        AUTHOR = "Feinglass, J. and Yang, Y.Z.",
        TITLE = "Towards Addressing the Misalignment of Object Proposal Evaluation for
Vision-Language Tasks via Semantic Grounding",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "4385-4395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210240"}

@inproceedings{bb215178,
        AUTHOR = "Nadeem, A. and Hilton, A. and Dawes, R. and Thomas, G. and Mustafa, A.",
        TITLE = "CAD: Contextual Multi-modal Alignment for Dynamic AVQA",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "7236-7248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210241"}

@inproceedings{bb215179,
        AUTHOR = "Wu, W. and Li, Q. and Zhong, W.L. and Huang, J.Z.",
        TITLE = "MIVC: Multiple Instance Visual Component for Visual-Language Models",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "8102-8111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210242"}

@inproceedings{bb215180,
        AUTHOR = "Ganz, R. and Nuriel, O. and Aberdam, A. and Kittenplon, Y. and Mazor, S. and Litman, R.",
        TITLE = "Towards Models that Can See and Read",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21661-21671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210243"}

@inproceedings{bb215181,
        AUTHOR = "Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C.",
        TITLE = "Exploring Temporal Concurrency for Video-Language Representation
Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15522-15532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210244"}

@inproceedings{bb215182,
        AUTHOR = "Shukor, M. and Dancette, C. and Cord, M.",
        TITLE = "eP-ALM: Efficient Perceptual Augmentation of Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21999-22012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210245"}

@inproceedings{bb215183,
        AUTHOR = "Schulter, S. and Kumar, B.G.V. and Suh, Y.M. and Dafnis, K.M. and Zhang, Z.X. and Zhao, S.Y. and Metaxas, D.N.",
        TITLE = "OmniLabel: A Challenging Benchmark for Language-Based Object
Detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11919-11928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210246"}

@inproceedings{bb215184,
        AUTHOR = "Chen, Z.L. and Huang, X. and Guan, Q.L. and Lin, L. and Luo, W.Q.",
        TITLE = "A Retrospect to Multi-prompt Learning across Vision and Language",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22133-22144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210247"}

@inproceedings{bb215185,
        AUTHOR = "Derakhshani, M.M. and Sanchez, E. and Bulat, A. and da Costa, V.G.T. and Snoek, C.G.M. and Tzimiropoulos, G. and Martinez, B.",
        TITLE = "Bayesian Prompt Learning for Image-Language Model Generalization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210248"}

@inproceedings{bb215186,
        AUTHOR = "Cascante Bonilla, P. and Shehada, K. and Smith, J.S. and Doveh, S. and Kim, D.H. and Panda, R. and Varol, G. and Oliva, A. and Ordonez, V. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Going Beyond Nouns With Vision & Language Models Using Synthetic
Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20098-20108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210249"}

@inproceedings{bb215187,
        AUTHOR = "Zara, G. and Conti, A. and Roy, S. and Lathuiliere, S. and Rota, P. and Ricci, E.",
        TITLE = "The Unreasonable Effectiveness of Large Language-Vision Models for
Source-free Video Domain Adaptation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10273-10283",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210250"}

@inproceedings{bb215188,
        AUTHOR = "Upadhyay, U. and Karthik, S. and Mancini, M. and Akata, Z.",
        TITLE = "ProbVLM: Probabilistic Adapter for Frozen Vison-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1899-1910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210251"}

@inproceedings{bb215189,
        AUTHOR = "Chen, Z.H. and Diao, S.Z. and Wang, B. and Li, G.B. and Wan, X.",
        TITLE = "Towards Unifying Medical Vision-and-Language Pre-training via Soft
Prompts",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "23346-23356",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210252"}

@inproceedings{bb215190,
        AUTHOR = "Bitton Guetta, N. and Bitton, Y. and Hessel, J. and Schmidt, L. and Elovici, Y. and Stanovsky, G. and Schwartz, R.",
        TITLE = "Breaking Common Sense: WHOOPS! A Vision-and-Language Benchmark of
Synthetic and Compositional Images",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2616-2627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210253"}

@inproceedings{bb215191,
        AUTHOR = "Hu, Z.Y. and Li, Y. and Lyu, M.R. and Wang, L.W.",
        TITLE = "VL-PET: Vision-and-Language Parameter-Efficient Tuning via
Granularity Control",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2998-3008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210254"}

@inproceedings{bb215192,
        AUTHOR = "Slyman, E. and Kahng, M. and Lee, S.",
        TITLE = "VLSlice: Interactive Vision-and-Language Slice Discovery",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15245-15255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210255"}

@inproceedings{bb215193,
        AUTHOR = "Najibi, M. and Ji, J.W. and Zhou, Y. and Qi, C.R. and Yan, X.C. and Ettinger, S. and Anguelov, D.",
        TITLE = "Unsupervised 3D Perception with 2D Vision-Language Distillation for
Autonomous Driving",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "8568-8578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210256"}

@inproceedings{bb215194,
        AUTHOR = "Zheng, K. and Wu, W. and Feng, R. and Zhu, K. and Liu, J.W. and Zhao, D.L. and Zha, Z.J. and Chen, W. and Shen, Y.J.",
        TITLE = "Regularized Mask Tuning: Uncovering Hidden Knowledge in Pre-trained
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11629-11639",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210257"}

@inproceedings{bb215195,
        AUTHOR = "Wang, T. and Lin, K. and Li, L.J. and Lin, C.C. and Yang, Z.Y. and Zhang, H.W. and Liu, Z.C. and Wang, L.J.",
        TITLE = "Equivariant Similarity for Vision-Language Foundation Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11964-11974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210258"}

@inproceedings{bb215196,
        AUTHOR = "Xu, H. and Xie, S. and Huang, P.Y. and Yu, L.C. and Howes, R. and Ghosh, G. and Zettlemoyer, L. and Feichtenhofer, C.",
        TITLE = "CiT: Curation in Training for Effective Vision-Language Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15134-15143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210259"}

@inproceedings{bb215197,
        AUTHOR = "Trager, M. and Perera, P. and Zancato, L. and Achille, A. and Bhatia, P. and Soatto, S.",
        TITLE = "Linear Spaces of Meanings: Compositional Structures in
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15349-15358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210260"}

@inproceedings{bb215198,
        AUTHOR = "Chen, Y.S. and Song, Y.Z. and Yeo, C.Y. and Liu, B. and Fu, J.L. and Shuai, H.H.",
        TITLE = "SINC: Self-Supervised In-Context Learning for Vision-Language Tasks",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15384-15396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210261"}

@inproceedings{bb215199,
        AUTHOR = "Wu, C.E. and Tian, Y. and Yu, H.C. and Wang, H. and Morgado, P. and Hu, Y.H. and Yang, L.J.",
        TITLE = "Why Is Prompt Tuning for Vision-Language Models Robust to Noisy
Labels?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15442-15451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT210262"}

Last update:Jun 17, 2024 at 21:38:11