@article{bb227400,
        AUTHOR = "Shen, K. and Wu, L.F. and Tang, S.L. and Xu, F. and Long, B. and Zhuang, Y.T. and Pei, J.",
        TITLE = "Ask Questions With Double Hints: Visual Question Generation With
Answer-Awareness and Region-Reference",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9648-9660",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222397"}

@article{bb227401,
        AUTHOR = "Yamada, M. and d'Amario, V. and Takemoto, K. and Boix, X. and Sasaki, T.",
        TITLE = "Transformer Module Networks for Systematic Generalization in Visual
Question Answering",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10096-10105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222398"}

@article{bb227402,
        AUTHOR = "Guo, Y.Y. and Jiao, F. and Shen, Z.Q. and Nie, L.Q. and Kankanhalli, M.",
        TITLE = "UNK-VQA: A Dataset and a Probe Into the Abstention Ability of
Multi-Modal Large Models",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10284-10296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222399"}

@article{bb227403,
        AUTHOR = "Chen, F.Y. and Tang, X.S. and Hao, K.R.",
        TITLE = "GEXMERT: Geometrically enhanced cross-modality encoder representations
from transformers inspired by higher-order visual percepts",
        JOURNAL = PR,
        VOLUME = "158",
        YEAR = "2025",
        PAGES = "111047",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222400"}

@article{bb227404,
        AUTHOR = "Zhang, B. and Li, J.X. and Shi, Y.C. and Han, Y. and Hu, Q.H.",
        TITLE = "VADS: Visuo-Adaptive DualStrike attack on visual question answer",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222401"}

@article{bb227405,
        AUTHOR = "Liu, J. and Xie, J. and Zhou, F.Y. and He, S.F.",
        TITLE = "Question Type-Aware Debiasing for Test-Time Visual Question Answering
Model Adaptation",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10805-10816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222402"}

@article{bb227406,
        AUTHOR = "Cao, R.L. and Li, Z.X. and Tang, Z.J. and Zhang, C.L. and Ma, H.F.",
        TITLE = "Enhancing robust VQA via contrastive and self-supervised learning",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222403"}

@article{bb227407,
        AUTHOR = "Liu, F. and Dai, W.W. and Zhang, C.Y. and Zhu, J. and Yao, L. and Li, X.",
        TITLE = "Co-LLaVA: Efficient Remote Sensing Visual Question Answering via
Model Collaboration",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "3",
        PAGES = "466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222404"}

@article{bb227408,
        AUTHOR = "Peng, D. and Li, Z.X.",
        TITLE = "Unbiased VQA via modal information interaction and question
transformation",
        JOURNAL = PR,
        VOLUME = "162",
        YEAR = "2025",
        PAGES = "111394",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222405"}

@inproceedings{bb227409,
        AUTHOR = "Zhao, X.Y. and Bai, Z.W. and Zhou, M.L. and Ren, X.C. and Wang, Y.Q. and Wang, L.C.",
        TITLE = "Integrating Dynamic Routing with Reinforcement Learning and
Multimodal Techniques for Visual Question Answering",
        BOOKTITLE = ICIVC24,
        YEAR = "2024",
        PAGES = "295-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222406"}

@inproceedings{bb227410,
        AUTHOR = "Ge, J.X. and Subramanian, S. and Shi, B. and Herzig, R. and Darrell, T.J.",
        TITLE = "Recursive Visual Programming",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLIII: 1-18",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222407"}

@inproceedings{bb227411,
        AUTHOR = "Lu, C. and Lu, Q. and Luo, J.",
        TITLE = "An Explainable Vision Question Answer Model via Diffusion
Chain-of-thought",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVII: 146-162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222408"}

@inproceedings{bb227412,
        AUTHOR = "Wang, H.B. and Ge, W.F.",
        TITLE = "Q&A Prompts: Discovering Rich Visual Clues through Mining
Question-answer Prompts for VQA requiring Diverse World Knowledge",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLII: 274-292",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222409"}

@inproceedings{bb227413,
        AUTHOR = "Liu, H. and Ma, X. and Zhong, C. and Zhang, Y. and Lin, W.Y.",
        TITLE = "Timecraft: Navigate Weakly-supervised Temporal Grounded Video Question
Answering via Bi-directional Reasoning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "V: 92-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222410"}

@inproceedings{bb227414,
        AUTHOR = "Park, K.R. and Lee, H.J. and Kim, J.U.",
        TITLE = "Learning Trimodal Relation for Audio-visual Question Answering with
Missing Modality",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XV: 42-59",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222411"}

@inproceedings{bb227415,
        AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
        TITLE = "Grace: Graph-based Contextual Debiasing for Fair Visual Question
Answering",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XVII: 176-194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222412"}

@inproceedings{bb227416,
        AUTHOR = "Mishra, A. and Agarwala, A. and Tiwari, U. and Rajendiran, V.N. and Miriyala, S.S.",
        TITLE = "Efficient Visual Question Answering on Embedded Devices:
Cross-Modality Attention with Evolutionary Quantization",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2142-2148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222413"}

@inproceedings{bb227417,
        AUTHOR = "Jiang, X. and Wang, G.M. and Guo, J.H. and Li, J.C. and Zhang, W.Q. and Lu, R.X. and Tang, S.L.",
        TITLE = "DIEM: Decomposition-Integration Enhancing Multimodal Insights",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "27294-27303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222414"}

@inproceedings{bb227418,
        AUTHOR = "Agrawal, A. and Lezcano, C.M.S. and Heredia Marin, I.B. and Sethi, P.S.",
        TITLE = "Listen Then See: Video Alignment with Speaker Attention",
        BOOKTITLE = MULA24,
        YEAR = "2024",
        PAGES = "2018-2027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222415"}

@inproceedings{bb227419,
        AUTHOR = "Liu, X.L. and Dong, Z.K. and Zhang, P.",
        TITLE = "Tackling Data Bias in MUSIC-AVQA: Crafting a Balanced Dataset for
Unbiased Question-Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "4466-4475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222416"}

@inproceedings{bb227420,
        AUTHOR = "Shi, X.X. and Lee, S.",
        TITLE = "Benchmarking Out-of-Distribution Detection in Visual Question
Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5473-5483",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222417"}

@inproceedings{bb227421,
        AUTHOR = "Venkataraman, S.R. and Rao, R.S. and Balasubramanian, S. and Sarma, R.R. and Vorugunti, C.S.",
        TITLE = "Can you even tell left from right? Presenting a new challenge for VQA",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "4486-4495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222418"}

@inproceedings{bb227422,
        AUTHOR = "Sahu, P.P. and Raut, A. and Samant, J.S. and Gorijala, M. and Lakshminarayanan, V. and Bhaskar, P.",
        TITLE = "POP-VQA: Privacy preserving, On-device, Personalized Visual Question
Answering",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "8455-8464",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222419"}

@inproceedings{bb227423,
        AUTHOR = "Li, J.P. and Wei, P. and Han, W.J. and Fan, L.F.",
        TITLE = "IntentQA: Context-aware Video Intent Reasoning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11929-11940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222420"}

@inproceedings{bb227424,
        AUTHOR = "Hu, Y.S. and Hua, H. and Yang, Z.Y. and Shi, W.J. and Smith, N.A. and Luo, J.B.",
        TITLE = "PromptCap: Prompt-Guided Image Captioning for VQA with GPT-3",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2951-2963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222421"}

@inproceedings{bb227425,
        AUTHOR = "Reichman, B. and Heck, L.",
        TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual
Question Answering",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2829-2834",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222422"}

@inproceedings{bb227426,
        AUTHOR = "Naik, N. and Potts, C. and Kreiss, E.",
        TITLE = "Context-VQA: Towards Context-Aware and Purposeful Visual Question
Answering",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2813-2817",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222423"}

@inproceedings{bb227427,
        AUTHOR = "Hu, Y.S. and Liu, B. and Kasai, J. and Wang, Y.Z. and Ostendorf, M. and Krishna, R. and Smith, N.A.",
        TITLE = "TIFA: Accurate and Interpretable Text-to-Image Faithfulness
Evaluation with Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20349-20360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222424"}

@inproceedings{bb227428,
        AUTHOR = "Zhang, Y.W. and Ho, C.H. and Vasconcelos, N.M.",
        TITLE = "Toward Unsupervised Realistic Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15567-15578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222425"}

@inproceedings{bb227429,
        AUTHOR = "Liang, K. and Albanie, S.",
        TITLE = "Simple Baselines for Interactive Video Retrieval with Questions and
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11057-11067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222426"}

@inproceedings{bb227430,
        AUTHOR = "Mensink, T. and Uijlings, J. and Castrejon, L. and Goel, A. and Cadar, F. and Zhou, H. and Sha, F. and Araujo, A. and Ferrari, V.",
        TITLE = "Encyclopedic VQA: Visual questions about detailed properties of
fine-grained categories",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3090-3101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222427"}

@inproceedings{bb227431,
        AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.",
        TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual
Visual Question Answering",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2941-2950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222428"}

@inproceedings{bb227432,
        AUTHOR = "Bruni, P. and Falcon, A. and Radeva, P.",
        TITLE = "Time-aware Circulant Matrices for Question-based Temporal Localization",
        BOOKTITLE = CIAP23,
        YEAR = "2023",
        PAGES = "II:182-195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222429"}

@inproceedings{bb227433,
        AUTHOR = "Ferreira, B.C.L. and Oliveira, H.G. and Silva, C.",
        TITLE = "Leveraging Question Answering for Domain-Agnostic Information
Extraction",
        BOOKTITLE = CIARP23,
        YEAR = "2023",
        PAGES = "I:244-256",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222430"}

@inproceedings{bb227434,
        AUTHOR = "Wu, Z.H. and Shu, X.Y. and Yan, S.Y. and Lu, Z.Y.",
        TITLE = "FGCVQA: Fine-Grained Cross-Attention for Medical VQA",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "975-979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222431"}

@inproceedings{bb227435,
        AUTHOR = "Zhu, H. and Togo, R. and Ogawa, T. and Haseyama, M.",
        TITLE = "Interpretable Visual Question Answering Referring to Outside
Knowledge",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "2140-2144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222432"}

@inproceedings{bb227436,
        AUTHOR = "Parelli, M. and Mallis, D. and Diomataris, M. and Pitsikalis, V.",
        TITLE = "Interpretable Visual Question Answering Via Reasoning Supervision",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "2525-2529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222433"}

@inproceedings{bb227437,
        AUTHOR = "Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
        TITLE = "Making the V in Text-VQA Matter",
        BOOKTITLE = ODRUM23,
        YEAR = "2023",
        PAGES = "5580-5588",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222434"}

@inproceedings{bb227438,
        AUTHOR = "Suo, W. and Sun, M.Y. and Liu, W.S. and Gao, Y.Q. and Wang, P. and Zhang, Y.N. and Wu, Q.",
        TITLE = "S3C: Semi-Supervised VQA Natural Language Explanation via
Self-Critical Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2646-2656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222435"}

@inproceedings{bb227439,
        AUTHOR = "Alampalle, C. and Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
        TITLE = "Weakly Supervised Visual Question Answer Generation",
        BOOKTITLE = ODRUM23,
        YEAR = "2023",
        PAGES = "5589-5597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222436"}

@inproceedings{bb227440,
        AUTHOR = "Jiang, J.J. and Zheng, N.N.",
        TITLE = "MixPHM: Redundancy-Aware Parameter-Efficient Tuning for Low-Resource
Visual Question Answering",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "24203-24213",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222437"}

@inproceedings{bb227441,
        AUTHOR = "Wang, Y. and Pfeiffer, J. and Carion, N. and Le Cun, Y.L. and Kamath, A.",
        TITLE = "Adapting Grounded Visual Question Answering Models to Low Resource
Languages",
        BOOKTITLE = MULA23,
        YEAR = "2023",
        PAGES = "2596-2605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222438"}

@inproceedings{bb227442,
        AUTHOR = "Wang, M. and Mahjoubfar, A. and Joshi, A.",
        TITLE = "FashionVQA: A Domain-Specific Visual Question Answering System",
        BOOKTITLE = CVFAD23,
        YEAR = "2023",
        PAGES = "3514-3519",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222439"}

@inproceedings{bb227443,
        AUTHOR = "Tascon Morales, S. and Marquez Neila, P. and Sznitman, R.",
        TITLE = "Logical Implications for Visual Question Answering Consistency",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6725-6735",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222440"}

@inproceedings{bb227444,
        AUTHOR = "Chen, S. and Zhao, Q.",
        TITLE = "Divide and Conquer: Answering Questions with Object Factorization and
Compositional Reasoning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6736-6745",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222441"}

@inproceedings{bb227445,
        AUTHOR = "Basu, A. and Addepalli, S. and Babu, R.V.",
        TITLE = "RMLVQA: A Margin Loss Approach For Visual Question Answering with
Language Biases",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11671-11680",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222442"}

@inproceedings{bb227446,
        AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.",
        TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual
Question Answering",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:658-674",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222443"}

@inproceedings{bb227447,
        AUTHOR = "Vivoli, E. and Biten, A.F. and Mafla, A. and Karatzas, D. and Gomez, L.",
        TITLE = "MUST-VQA: Multilingual Scene-Text VQA",
        BOOKTITLE = TextEvery22,
        YEAR = "2022",
        PAGES = "345-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222444"}

@inproceedings{bb227448,
        AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.",
        TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal
Interaction",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 255-266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222445"}

@inproceedings{bb227449,
        AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.",
        TITLE = "Health-Oriented Multimodal Food Question Answering",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 191-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222446"}

@inproceedings{bb227450,
        AUTHOR = "Bongini, P. and Becattini, F. and del Bimbo, A.",
        TITLE = "Is GPT-3 All You Need for Visual Question Answering in Cultural
Heritage?",
        BOOKTITLE = VisArt22,
        YEAR = "2022",
        PAGES = "268-281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222447"}

@inproceedings{bb227451,
        AUTHOR = "Jha, A. and Patro, B. and Van Gool, L.J. and Tuytelaars, T.",
        TITLE = "Barlow constrained optimization for Visual Question Answering",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1084-1093",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222448"}

@inproceedings{bb227452,
        AUTHOR = "Ravi, S. and Chinchure, A. and Sigal, L. and Liao, R.J. and Shwartz, V.",
        TITLE = "VLC-BERT: Visual Question Answering with Contextualized Commonsense
Knowledge",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1155-1165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222449"}

@inproceedings{bb227453,
        AUTHOR = "Etesam, Y. and Kochiev, L. and Chang, A.X.",
        TITLE = "3DVQA: Visual Question Answering for 3D Environments",
        BOOKTITLE = CRV22,
        YEAR = "2022",
        PAGES = "233-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222450"}

@inproceedings{bb227454,
        AUTHOR = "Ramamurthy, P. and Aakur, S.N.",
        TITLE = "ISD-QA: Iterative Distillation of Commonsense Knowledge from General
Language Models for Unsupervised Question Answering",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1229-1235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222451"}

@inproceedings{bb227455,
        AUTHOR = "Zhang, H.T. and Wu, W.",
        TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1471-1477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222452"}

@inproceedings{bb227456,
        AUTHOR = "Liu, L. and Su, X.D. and Guo, H. and Zhu, D.",
        TITLE = "A Transformer-based Medical Visual Question Answering Model",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1712-1718",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222453"}

@inproceedings{bb227457,
        AUTHOR = "Wu, X.Y. and Lu, J.F. and Li, Z.F. and Xiong, F.C.",
        TITLE = "Ques-to-Visual Guided Visual Question Answering",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "4193-4197",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222454"}

@inproceedings{bb227458,
        AUTHOR = "Sarkar, A. and Rahnemoonfar, M.",
        TITLE = "Grad-Cam Aware Supervised Attention for Visual Question Answering for
Post-Disaster Damage Assessment",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3783-3787",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222455"}

@inproceedings{bb227459,
        AUTHOR = "Whitehead, S. and Petryk, S. and Shakib, V. and Gonzalez, J. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
        TITLE = "Reliable Visual Question Answering: Abstain Rather Than Answer
Incorrectly",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:148-166",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222456"}

@inproceedings{bb227460,
        AUTHOR = "Chen, L. and Zheng, Y.H. and Xiao, J.",
        TITLE = "Rethinking Data Augmentation for Robust Visual Question Answering",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:95-112",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222457"}

@inproceedings{bb227461,
        AUTHOR = "Zhang, H.T. and Wu, W.",
        TITLE = "Context Relation Fusion Model for Visual Question Answering",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2112-2116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222458"}

@inproceedings{bb227462,
        AUTHOR = "Biten, A.F. and Litman, R. and Xie, Y.S. and Appalaraju, S. and Manmatha, R.",
        TITLE = "LaTr: Layout-Aware Transformer for Scene-Text VQA",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "16527-16537",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222459"}

@inproceedings{bb227463,
        AUTHOR = "Lu, J.Y. and Ye, X. and Ren, Y. and Yang, Y.Z.",
        TITLE = "Good, Better, Best: Textual Distractors Generation for
Multiple-Choice Visual Question Answering via Reinforcement Learning",
        BOOKTITLE = ODRUM22,
        YEAR = "2022",
        PAGES = "4917-4926",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222460"}

@inproceedings{bb227464,
        AUTHOR = "Ding, Y.H. and Huang, Z. and Wang, R. and Zhang, Y.H. and Chen, X. and Ma, Y.Z. and Chung, H. and Han, S.C.",
        TITLE = "V-Doc: Visual questions answers with Documents",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "21460-21466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222461"}

@inproceedings{bb227465,
        AUTHOR = "Azuma, D. and Miyanishi, T. and Kurita, S.H. and Kawanabe, M.",
        TITLE = "ScanQA: 3D Question Answering for Spatial Scene Understanding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19107-19117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222462"}

@inproceedings{bb227466,
        AUTHOR = "Li, G.Y. and Wei, Y. and Tian, Y. and Xu, C.L. and Wen, J.R. and Hu, D.",
        TITLE = "Learning to Answer Questions in Dynamic Audio-Visual Scenarios",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19086-19096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222463"}

@inproceedings{bb227467,
        AUTHOR = "Chen, C. and Anjum, S. and Gurari, D.",
        TITLE = "Grounding Answers for Visual Questions Asked by Visually Impaired
People",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19076-19085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222464"}

@inproceedings{bb227468,
        AUTHOR = "Li, C.H. and Li, Z. and Jing, C.C. and Wu, Y.W. and Zhai, M.L. and Jia, Y.D.",
        TITLE = "Compositional Substitutivity of Visual Reasoning for Visual Question
Answering",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XLVIII: 143-160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222465"}

@inproceedings{bb227469,
        AUTHOR = "Jing, C.C. and Jia, Y.D. and Wu, Y.W. and Liu, X.Y. and Wu, Q.",
        TITLE = "Maintaining Reasoning Consistency in Compositional Visual Question
Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5089-5098",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222466"}

@inproceedings{bb227470,
        AUTHOR = "Cascante Bonilla, P. and Wu, H. and Wang, L. and Feris, R.S. and Ordonez, V.",
        TITLE = "Sim VQA: Exploring Simulated Environments for Visual Question
Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5046-5056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222467"}

@inproceedings{bb227471,
        AUTHOR = "Gupta, V. and Li, Z.W. and Kortylewski, A. and Zhang, C.Y. and Li, Y.W. and Yuille, A.L.",
        TITLE = "SwapMix: Diagnosing and Regularizing the Over-Reliance on Visual
Context in Visual Question Answering",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5068-5078",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222468"}

@inproceedings{bb227472,
        AUTHOR = "Burghouts, G.J. and Huizinga, W.",
        TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional
Refinement",
        BOOKTITLE = CIAP22,
        YEAR = "2022",
        PAGES = "II:418-428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222469"}

@inproceedings{bb227473,
        AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.",
        TITLE = "Contrast and Classify: Training Robust VQA Models",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1584-1593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222470"}

@inproceedings{bb227474,
        AUTHOR = "Han, X.Z. and Wang, S.H. and Su, C. and Huang, Q.M. and Tian, Q.",
        TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1564-1573",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222471"}

@inproceedings{bb227475,
        AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.",
        TITLE = "Beyond Question-Based Biases:
Assessing Multimodal Shortcut Learning in Visual Question Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1554-1563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222472"}

@inproceedings{bb227476,
        AUTHOR = "Zhou, Y. and Ren, T. and Zhu, C.Y. and Sun, X.S. and Liu, J.Z. and Ding, X.H. and Xu, M.L. and Ji, R.R.",
        TITLE = "TRAR: Routing the Attention Spans in Transformer for Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2054-2064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222473"}

@inproceedings{bb227477,
        AUTHOR = "Yang, X. and Gao, C.Y. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Parsing Network for Image Captioning and Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2177-2187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222474"}

@inproceedings{bb227478,
        AUTHOR = "Banerjee, P. and Gokhale, T. and Yang, Y.Z. and Baral, C.",
        TITLE = "Weakly Supervised Relative Spatial Reasoning for Visual Question
Answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1888-1898",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222475"}

@inproceedings{bb227479,
        AUTHOR = "Li, L.J. and Lei, J. and Gan, Z. and Liu, J.J.",
        TITLE = "Adversarial VQA:
A New Benchmark for Evaluating the Robustness of VQA Models",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2022-2031",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222476"}

@inproceedings{bb227480,
        AUTHOR = "Askarian, N. and Abbasnejad, E. and Zukerman, I. and Buntine, W. and Haffari, G.",
        TITLE = "Inductive Biases for Low Data VQA: A Data Augmentation Approach",
        BOOKTITLE = Novelty22,
        YEAR = "2022",
        PAGES = "231-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222477"}

@inproceedings{bb227481,
        AUTHOR = "Mathew, M. and Bagal, V. and Tito, R. and Karatzas, D. and Valveny, E. and Jawahar, C.V.",
        TITLE = "InfographicVQA",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2582-2591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222478"}

@inproceedings{bb227482,
        AUTHOR = "Kumar, S. and Patro, B.N. and Namboodiri, V.P.",
        TITLE = "Auto QA: The Question Is Not Only What, but Also Where",
        BOOKTITLE = Novelty22,
        YEAR = "2022",
        PAGES = "272-281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222479"}

@inproceedings{bb227483,
        AUTHOR = "Kolling, C. and More, M. and Gavenski, N. and Pooch, E. and Parraga, O. and Barros, R.C.",
        TITLE = "Efficient Counterfactual Debiasing for Visual Question Answering",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2572-2581",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222480"}

@inproceedings{bb227484,
        AUTHOR = "Jung, S.J. and Byun, J.Y. and Shim, K. and Hwang, S.Y. and Kim, C.",
        TITLE = "Understanding VQA for Negative Answers Through Visual and Linguistic
Inference",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2873-2877",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222481"}

@inproceedings{bb227485,
        AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.",
        TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the
International Conference on Digital Image Computing: Techniques and
Applications (DICTA 2021)",
        BOOKTITLE = DICTA21,
        YEAR = "2021",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222482"}

@inproceedings{bb227486,
        AUTHOR = "Le, T. and Nguyen, H.T. and Nguyen, M.L.",
        TITLE = "Vision and Text Transformer for Predicting Answerability on Visual
Question Answering",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "934-938",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222483"}

@inproceedings{bb227487,
        AUTHOR = "Huang, Z.Q. and Zhu, H.Y. and Sun, Y. and Choi, D. and Tan, C. and Lim, J.H.",
        TITLE = "A Diagnostic Study of Visual Question Answering With Analogical
Reasoning",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2463-2467",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222484"}

@inproceedings{bb227488,
        AUTHOR = "Chen, H.Y. and Liu, R.F. and Peng, B.",
        TITLE = "Cross-modal Relational Reasoning Network for Visual Question
Answering",
        BOOKTITLE = MAIR2-21,
        YEAR = "2021",
        PAGES = "3939-3948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222485"}

@inproceedings{bb227489,
        AUTHOR = "Wang, Z.X. and Miao, Y. and Specia, L.",
        TITLE = "Latent Variable Models for Visual Question Answering",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3137-3141",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222486"}

@inproceedings{bb227490,
        AUTHOR = "Hirota, Y. and Garcia, N. and Otani, M. and Chu, C. and Nakashima, Y. and Taniguchi, I. and Onoye, T.",
        TITLE = "Visual Question Answering with Textual Representations for Images",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3147-3150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222487"}

@inproceedings{bb227491,
        AUTHOR = "Ye, K. and Kovashka, A.",
        TITLE = "Linguistic Structures as Weak Supervision for Visual Scene Graph
Generation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8285-8295",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222488"}

@inproceedings{bb227492,
        AUTHOR = "Xiao, J.B. and Shang, X. and Yao, A. and Chua, T.S.",
        TITLE = "NExT-QA: Next Phase of Question-Answering to Explaining Temporal
Actions",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9772-9781",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222489"}

@inproceedings{bb227493,
        AUTHOR = "Chen, X.Y. and Jiang, M. and Zhao, Q.",
        TITLE = "Predicting Human Scanpaths in Visual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "10871-10880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222490"}

@inproceedings{bb227494,
        AUTHOR = "Qi, Y.G. and Zhang, K. and Sain, A. and Song, Y.Z.",
        TITLE = "PQA: Perceptual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12051-12059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222491"}

@inproceedings{bb227495,
        AUTHOR = "Yuan, Y.Y. and Wang, S. and Jiang, M.Y. and Chen, T.Y.",
        TITLE = "Perception Matters: Detecting Perception Failures of VQA Models Using
Metamorphic Testing",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16903-16912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222492"}

@inproceedings{bb227496,
        AUTHOR = "Marino, K. and Chen, X.L. and Parikh, D. and Gupta, A. and Rohrbach, M.",
        TITLE = "KRISP: Integrating Implicit and Symbolic Knowledge for Open-Domain
Knowledge-Based VQA",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14106-14116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222493"}

@inproceedings{bb227497,
        AUTHOR = "Niu, Y. and Tang, K. and Zhang, H.W. and Lu, Z.W. and Hua, X.S. and Wen, J.R.",
        TITLE = "Counterfactual VQA: A Cause-Effect Look at Language Bias",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12695-12705",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222494"}

@inproceedings{bb227498,
        AUTHOR = "Yang, Z.Y. and Lu, Y.J. and Wang, J.F. and Yin, X. and Florencio, D. and Wang, L.J. and Zhang, C. and Zhang, L. and Luo, J.B.",
        TITLE = "TAP: Text-Aware Pre-training for Text-VQA and Text-Caption",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8747-8757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222495"}

@inproceedings{bb227499,
        AUTHOR = "Kervadec, C. and Jaunet, T. and Antipov, G. and Baccouche, M. and Vuillemot, R. and Wolf, C.",
        TITLE = "How Transferable are Reasoning Patterns in VQA?",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4205-4214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT222496"}

Last update:Mar 29, 2025 at 10:46:14