@article{bb242500,
AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.",
TITLE = "Integrating Neural-Symbolic Reasoning With Variational Causal
Inference Network for Explanatory Visual Question Answering",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "7893-7908",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237411"}
@inproceedings{bb242501,
AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.",
TITLE = "Variational Causal Inference Network for Explanatory Visual Question
Answering",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "2515-2525",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237412"}
@article{bb242502,
AUTHOR = "Xue, D. and Qian, S.S. and Fang, Q. and Xu, C.S.",
TITLE = "LININ: Logic Integrated Neural Inference Network for Explanatory
Visual Question Answering",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "16-27",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237413"}
@article{bb242503,
AUTHOR = "Shen, K. and Wu, L.F. and Tang, S.L. and Xu, F.L. and Long, B. and Zhuang, Y.T. and Pei, J.",
TITLE = "Ask Questions With Double Hints: Visual Question Generation With
Answer-Awareness and Region-Reference",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "9648-9660",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237414"}
@article{bb242504,
AUTHOR = "Yamada, M. and d'Amario, V. and Takemoto, K. and Boix, X. and Sasaki, T.",
TITLE = "Transformer Module Networks for Systematic Generalization in Visual
Question Answering",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "10096-10105",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237415"}
@article{bb242505,
AUTHOR = "Liu, J. and Xie, J. and Zhou, F.Y. and He, S.F.",
TITLE = "Question Type-Aware Debiasing for Test-Time Visual Question Answering
Model Adaptation",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "11",
MONTH = "November",
PAGES = "10805-10816",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237416"}
@article{bb242506,
AUTHOR = "Cao, R.L. and Li, Z.X. and Tang, Z.J. and Zhang, C.L. and Ma, H.F.",
TITLE = "Enhancing robust VQA via contrastive and self-supervised learning",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111129",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237417"}
@article{bb242507,
AUTHOR = "Liu, F. and Dai, W.W. and Zhang, C.Y. and Zhu, J. and Yao, L. and Li, X.",
TITLE = "Co-LLaVA: Efficient Remote Sensing Visual Question Answering via
Model Collaboration",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "3",
PAGES = "466",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237418"}
@article{bb242508,
AUTHOR = "Gao, Y.Q. and Bai, Z.W. and Zhou, M.L. and Jia, B.L. and Gao, P.Q. and Zhu, R.",
TITLE = "Adaptive Conditional Reasoning for Remote Sensing Visual Question
Answering",
JOURNAL = RS,
VOLUME = "17",
YEAR = "2025",
NUMBER = "8",
PAGES = "1338",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237419"}
@article{bb242509,
AUTHOR = "Lan, N. and Ou, B.S. and Xie, X.M. and Shi, G.M.",
TITLE = "Visual Environment-Interactive Planning for Embodied Complex-Question
Answering",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "6481-6493",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237420"}
@article{bb242510,
AUTHOR = "Welde, T.M. and Liao, L.",
TITLE = "Counting in Visual Question Answering:
Methods, Datasets, and Future Work",
JOURNAL = IJIG,
VOLUME = "25",
YEAR = "2025",
NUMBER = "5",
MONTH = "September",
PAGES = "2550044",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237421"}
@article{bb242511,
AUTHOR = "Bi, C. and Wang, S.H. and Li, N. and Huang, Q.M.",
TITLE = "Inferential and Commonsense Visual Question Generation",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "7796-7809",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237422"}
@article{bb242512,
AUTHOR = "Zhao, Z.C. and Zhou, C.F. and Zhang, Y. and Li, C.L. and Ma, X.L. and Tang, J.",
TITLE = "Text-Guided Coarse-to-Fine Fusion Network for robust remote sensing
visual question answering",
JOURNAL = PandRS,
VOLUME = "230",
YEAR = "2025",
PAGES = "1-17",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237423"}
@article{bb242513,
AUTHOR = "Lu, Z. and Zeng, Q.J. and Lu, M.K. and Chen, G. and Xia, Y.",
TITLE = "Bridging the Semantic Gap in Medical Visual Question Answering with
Prompt Learning",
JOURNAL = MedImg,
VOLUME = "44",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "4605-4616",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237424"}
@article{bb242514,
AUTHOR = "Du, Z. and Yuan, Z.Q. and Wu, X. and Xu, C.S.",
TITLE = "Disentanglement-Based Equivariant Learning for Compositional VQA",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "8160-8173",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237425"}
@article{bb242515,
AUTHOR = "Jin, Z.X. and Qin, P. and Lin, S. and Qin, J. and Zhai, S.J. and Zeng, J.C. and Yin, X.C.",
TITLE = "Robust scene text understanding with OCR token and word alignment for
Text-VQA and text-caption",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112362",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237426"}
@article{bb242516,
AUTHOR = "Mao, A. and Wen, S.Y. and Chen, F. and Yi, R. and Liu, Y.J.",
TITLE = "Robust 3D Visual Question Answering via Bias Learning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "12",
MONTH = "December",
PAGES = "12492-12507",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237427"}
@article{bb242517,
AUTHOR = "Liu, J. and Zhang, L.Z. and Mu, C. and Lu, G.X. and Zhang, B. and Li, J.S.",
TITLE = "Question-Guided Multigranular Visual Augmentation for Knowledge-Based
Visual Question Answering",
JOURNAL = CVIU,
VOLUME = "263",
YEAR = "2026",
PAGES = "104569",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237428"}
@article{bb242518,
AUTHOR = "Zhang, X.Y. and Zhang, L.L. and Wu, Y.R. and Wang, S.W. and Wu, W.J. and Huang, M. and Wang, Q.Y. and Liu, J.",
TITLE = "Memory-enriched thought-by-thought framework for complex Diagram
Question Answering",
JOURNAL = CVIU,
VOLUME = "264",
YEAR = "2026",
PAGES = "104608",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237429"}
@article{bb242519,
AUTHOR = "Chen, T.Y. and Liu, H.B. and Wang, Y. and Chen, Y.H. and He, T.Y. and Gan, C.F. and He, H.Y. and Lin, W.Y.",
TITLE = "MECD+: Unlocking Event-Level Causal Graph Discovery for Video
Reasoning",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "3",
MONTH = "March",
PAGES = "2628-2645",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237430"}
@article{bb242520,
AUTHOR = "Liang, J.F. and Jiang, S.X. and Tang, W. and Wang, N. and Wang, Z.K. and Mao, X. and Lv, K. and Liu, M. and Qin, B.",
TITLE = "APSam: An Aggregating-Then-Pruning Sampler for Question-Conditional
Denoising",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "1754-1765",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237431"}
@article{bb242521,
AUTHOR = "Wang, F.J. and Liu, J.Y. and Zhang, R.N. and Li, Z.X. and Zhang, F. and An, G.Y.",
TITLE = "CKCR: Context-aware knowledge construction and retrieval for
knowledge-based visual question answering",
JOURNAL = JVCIR,
VOLUME = "116",
YEAR = "2026",
PAGES = "104711",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237432"}
@article{bb242522,
AUTHOR = "Lu, J.H. and You, S. and Bao, B.K.",
TITLE = "Question Understanding and Temporality Guiding for Video Question
Answering",
JOURNAL = MultMed,
VOLUME = "28",
YEAR = "2026",
PAGES = "2772-2783",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237433"}
@article{bb242523,
AUTHOR = "Li, Z.F. and Qiu, F. and Wang, Y.R. and Xia, Y.J. and Xiao, K. and Zhang, M. and Zhang, Y.",
TITLE = "Integrating Object Interaction Self-Attention and GAN-Based Debiasing
for Visual Question Answering",
JOURNAL = MultMed,
VOLUME = "28",
YEAR = "2026",
PAGES = "3228-3241",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237434"}
@article{bb242524,
AUTHOR = "Xu, Q.X. and Zhou, L. and Zhong, X. and Zhang, F.F. and Tian, J. and Yu, X.H. and Huang, R.",
TITLE = "Refined generation-based framework for consistent and reliable visual
question answering",
JOURNAL = PR,
VOLUME = "178",
YEAR = "2026",
PAGES = "113421",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237435"}
@article{bb242525,
AUTHOR = "Xu, P.F. and Yan, F.",
TITLE = "RSSR: Efficient knowledge transfer and deep spatial modeling for
remote sensing visual question answering",
JOURNAL = CVIU,
VOLUME = "268",
YEAR = "2026",
PAGES = "104753",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237436"}
@article{bb242526,
AUTHOR = "Zhu, L. and Ying, M.X. and Zhang, C.Y. and Liu, D. and Wu, L.Y.B. and Zhang, S.C. and Li, X.L.",
TITLE = "Multi-Modal Refined Prompting for Advancing Knowledge-Based Visual
Question Answering",
JOURNAL = MultMed,
VOLUME = "28",
YEAR = "2026",
PAGES = "3444-3457",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237437"}
@article{bb242527,
AUTHOR = "Suo, W. and Ma, J. and Sun, M.Y. and Zhang, H.W. and Wang, P. and Zhang, Y.N. and Wu, Q.",
TITLE = "Semi-Supervised VQA Multi-Modal Explanation via Self-Critical
Learning",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "7",
MONTH = "July",
PAGES = "8361-8377",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237438"}
@inproceedings{bb242528,
AUTHOR = "Suo, W. and Sun, M.Y. and Liu, W.S. and Gao, Y.Q. and Wang, P. and Zhang, Y.N. and Wu, Q.",
TITLE = "S3C: Semi-Supervised VQA Natural Language Explanation via
Self-Critical Learning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "2646-2656",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237439"}
@inproceedings{bb242529,
AUTHOR = "Ren, H.Y. and Chen, W. and Liu, C.P. and Ji, Y. and Li, Y.",
TITLE = "MedKI: Knowledge Dual Injections for Medical Visual Question
Answering",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "79-84",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237440"}
@inproceedings{bb242530,
AUTHOR = "Shah, K. and J, S. and Bhutani, G. and Singh, K. and Singh, S.",
TITLE = "Visual Prompting Through Image Mines",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "713-718",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237441"}
@inproceedings{bb242531,
AUTHOR = "He, Z.L. and Pan, Y.J. and Li, H. and Ma, F.P. and Peng, Y.S. and Wu, S.Y. and Sun, X.Y.",
TITLE = "Enhancing Visual Question Answering Via Clustered In-Context Sequence
Configuration",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "935-940",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237442"}
@inproceedings{bb242532,
AUTHOR = "Mijiyawa, A. and Sadat, F.",
TITLE = "Visual Question Answering Using Multimodal Data Augmentation for
Hausa",
BOOKTITLE = IVCNZ25,
YEAR = "2025",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237443"}
@inproceedings{bb242533,
AUTHOR = "Cheng, Y. and Goel, A. and Bilen, H.",
TITLE = "Visually Interpretable Subtask Reasoning for Visual Question
Answering",
BOOKTITLE = XAI4CV25,
YEAR = "2025",
PAGES = "2751-2771",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237444"}
@inproceedings{bb242534,
AUTHOR = "Jang, Y. and Kong, H. and Kim, G. and Lee, Y. and Choi, J. and Bae, K.",
TITLE = "ICT-QA: Question Answering Over Multi-Modal Contexts Including Image,
Chart, and Text Modalities",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "138-148",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237445"}
@inproceedings{bb242535,
AUTHOR = "Li, K. and Vosselman, G. and Yang, M.Y.",
TITLE = "Multimodal Rationales for Explainable Visual Question Answering",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "191-201",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237446"}
@inproceedings{bb242536,
AUTHOR = "Zou, Y.H. and Yin, Z.Z.",
TITLE = "MVCM: Enhancing Multi-View and Cross-Modality Alignment for Medical
Visual Question Answering and Medical Image-Text Retrieval",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "180-190",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237447"}
@inproceedings{bb242537,
AUTHOR = "Luo, J.Z. and Liu, Y. and Chen, W.X. and Li, Z. and Wang, Y.W. and Li, G.B. and Lin, L.",
TITLE = "DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "14169-14178",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237448"}
@inproceedings{bb242538,
AUTHOR = "Zou, Y.H. and Yin, Z.Z.",
TITLE = "Alignment, Mining and Fusion: Representation Alignment with Hard
Negative Mining and Selective Knowledge Fusion for Medical Visual
Question Answering",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29623-29633",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237449"}
@inproceedings{bb242539,
AUTHOR = "Liu, H. and Ilievski, F. and Snoek, C.G.M.",
TITLE = "Commonsense Video Question Answering through Video-Grounded
Entailment Tree Reasoning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3262-3271",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237450"}
@inproceedings{bb242540,
AUTHOR = "Choudhary, N. and Goyal, P. and Siwatch, D. and Chandak, A. and Mahajan, H. and Khurana, V. and Kumar, Y.",
TITLE = "AdQuestA: Knowledge-Guided Visual Question Answer Framework for
Advertisements",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5812-5821",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237451"}
@inproceedings{bb242541,
AUTHOR = "Ge, J.X. and Subramanian, S. and Shi, B.F. and Herzig, R. and Darrell, T.J.",
TITLE = "Recursive Visual Programming",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLIII: 1-18",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237452"}
@inproceedings{bb242542,
AUTHOR = "Lu, C. and Lu, Q. and Luo, J.",
TITLE = "An Explainable Vision Question Answer Model via Diffusion
Chain-of-thought",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXVII: 146-162",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237453"}
@inproceedings{bb242543,
AUTHOR = "Wang, H.B. and Ge, W.F.",
TITLE = "Q&A Prompts: Discovering Rich Visual Clues through Mining
Question-answer Prompts for VQA requiring Diverse World Knowledge",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLII: 274-292",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237454"}
@inproceedings{bb242544,
AUTHOR = "Liu, H. and Ma, X. and Zhong, C. and Zhang, Y. and Lin, W.Y.",
TITLE = "Timecraft: Navigate Weakly-supervised Temporal Grounded Video Question
Answering via Bi-directional Reasoning",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "V: 92-107",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237455"}
@inproceedings{bb242545,
AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
TITLE = "Grace: Graph-based Contextual Debiasing for Fair Visual Question
Answering",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XVII: 176-194",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237456"}
@inproceedings{bb242546,
AUTHOR = "Liu, X.L. and Dong, Z.K. and Zhang, P.",
TITLE = "Tackling Data Bias in MUSIC-AVQA: Crafting a Balanced Dataset for
Unbiased Question-Answering",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "4466-4475",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237457"}
@inproceedings{bb242547,
AUTHOR = "Shi, X.X. and Lee, S.",
TITLE = "Benchmarking Out-of-Distribution Detection in Visual Question
Answering",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "5473-5483",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237458"}
@inproceedings{bb242548,
AUTHOR = "Venkataraman, S.R. and Rao, R.S. and Balasubramanian, S. and Sarma, R.R. and Vorugunti, C.S.",
TITLE = "Can you even tell left from right? Presenting a new challenge for VQA",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "4486-4495",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237459"}
@inproceedings{bb242549,
AUTHOR = "Sahu, P.P. and Raut, A. and Samant, J.S. and Gorijala, M. and Lakshminarayanan, V. and Bhaskar, P.",
TITLE = "POP-VQA: Privacy preserving, On-device, Personalized Visual Question
Answering",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "8455-8464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237460"}
@inproceedings{bb242550,
AUTHOR = "Li, J.P. and Wei, P. and Han, W.J. and Fan, L.F.",
TITLE = "IntentQA: Context-aware Video Intent Reasoning",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "11929-11940",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237461"}
@inproceedings{bb242551,
AUTHOR = "Hu, Y.S. and Hua, H. and Yang, Z.Y. and Shi, W.J. and Smith, N.A. and Luo, J.B.",
TITLE = "PromptCap: Prompt-Guided Image Captioning for VQA with GPT-3",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "2951-2963",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237462"}
@inproceedings{bb242552,
AUTHOR = "Naik, N. and Potts, C. and Kreiss, E.",
TITLE = "Context-VQA: Towards Context-Aware and Purposeful Visual Question
Answering",
BOOKTITLE = CLVL23,
YEAR = "2023",
PAGES = "2813-2817",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237463"}
@inproceedings{bb242553,
AUTHOR = "Hu, Y.S. and Liu, B. and Kasai, J. and Wang, Y.Z. and Ostendorf, M. and Krishna, R. and Smith, N.A.",
TITLE = "TIFA: Accurate and Interpretable Text-to-Image Faithfulness
Evaluation with Question Answering",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "20349-20360",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237464"}
@inproceedings{bb242554,
AUTHOR = "Zhang, Y.W. and Ho, C.H. and Vasconcelos, N.M.",
TITLE = "Toward Unsupervised Realistic Visual Question Answering",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15567-15578",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237465"}
@inproceedings{bb242555,
AUTHOR = "Liang, K. and Albanie, S.",
TITLE = "Simple Baselines for Interactive Video Retrieval with Questions and
Answers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "11057-11067",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237466"}
@inproceedings{bb242556,
AUTHOR = "Mensink, T. and Uijlings, J. and Castrejon, L. and Goel, A. and Cadar, F. and Zhou, H. and Sha, F. and Araujo, A. and Ferrari, V.",
TITLE = "Encyclopedic VQA: Visual questions about detailed properties of
fine-grained categories",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "3090-3101",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237467"}
@inproceedings{bb242557,
AUTHOR = "Bruni, P. and Falcon, A. and Radeva, P.",
TITLE = "Time-aware Circulant Matrices for Question-based Temporal Localization",
BOOKTITLE = CIAP23,
YEAR = "2023",
PAGES = "II:182-195",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237468"}
@inproceedings{bb242558,
AUTHOR = "Ferreira, B.C.L. and Oliveira, H.G. and Silva, C.",
TITLE = "Leveraging Question Answering for Domain-Agnostic Information
Extraction",
BOOKTITLE = CIARP23,
YEAR = "2023",
PAGES = "I:244-256",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237469"}
@inproceedings{bb242559,
AUTHOR = "Wu, Z.H. and Shu, X.Y. and Yan, S.Y. and Lu, Z.Y.",
TITLE = "FGCVQA: Fine-Grained Cross-Attention for Medical VQA",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "975-979",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237470"}
@inproceedings{bb242560,
AUTHOR = "Zhu, H. and Togo, R. and Ogawa, T. and Haseyama, M.",
TITLE = "Interpretable Visual Question Answering Referring to Outside
Knowledge",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "2140-2144",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237471"}
@inproceedings{bb242561,
AUTHOR = "Parelli, M. and Mallis, D. and Diomataris, M. and Pitsikalis, V.",
TITLE = "Interpretable Visual Question Answering Via Reasoning Supervision",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "2525-2529",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237472"}
@inproceedings{bb242562,
AUTHOR = "Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
TITLE = "Making the V in Text-VQA Matter",
BOOKTITLE = ODRUM23,
YEAR = "2023",
PAGES = "5580-5588",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237473"}
@inproceedings{bb242563,
AUTHOR = "Alampalle, C. and Hegde, S. and Jahagirdar, S. and Gangisetty, S.",
TITLE = "Weakly Supervised Visual Question Answer Generation",
BOOKTITLE = ODRUM23,
YEAR = "2023",
PAGES = "5589-5597",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237474"}
@inproceedings{bb242564,
AUTHOR = "Jiang, J.J. and Zheng, N.N.",
TITLE = "MixPHM: Redundancy-Aware Parameter-Efficient Tuning for Low-Resource
Visual Question Answering",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "24203-24213",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237475"}
@inproceedings{bb242565,
AUTHOR = "Wang, Y. and Pfeiffer, J. and Carion, N. and Le Cun, Y.L. and Kamath, A.",
TITLE = "Adapting Grounded Visual Question Answering Models to Low Resource
Languages",
BOOKTITLE = MULA23,
YEAR = "2023",
PAGES = "2596-2605",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237476"}
@inproceedings{bb242566,
AUTHOR = "Wang, M. and Mahjoubfar, A. and Joshi, A.",
TITLE = "FashionVQA: A Domain-Specific Visual Question Answering System",
BOOKTITLE = CVFAD23,
YEAR = "2023",
PAGES = "3514-3519",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237477"}
@inproceedings{bb242567,
AUTHOR = "Tascon Morales, S. and Marquez Neila, P. and Sznitman, R.",
TITLE = "Logical Implications for Visual Question Answering Consistency",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6725-6735",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237478"}
@inproceedings{bb242568,
AUTHOR = "Chen, S. and Zhao, Q.",
TITLE = "Divide and Conquer: Answering Questions with Object Factorization and
Compositional Reasoning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6736-6745",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237479"}
@inproceedings{bb242569,
AUTHOR = "Basu, A. and Addepalli, S. and Babu, R.V.",
TITLE = "RMLVQA: A Margin Loss Approach For Visual Question Answering with
Language Biases",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11671-11680",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237480"}
@inproceedings{bb242570,
AUTHOR = "Vivoli, E. and Biten, A.F. and Mafla, A. and Karatzas, D. and Gomez, L.",
TITLE = "MUST-VQA: Multilingual Scene-Text VQA",
BOOKTITLE = TextEvery22,
YEAR = "2022",
PAGES = "345-358",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237481"}
@inproceedings{bb242571,
AUTHOR = "Bongini, P. and Becattini, F. and del Bimbo, A.",
TITLE = "Is GPT-3 All You Need for Visual Question Answering in Cultural
Heritage?",
BOOKTITLE = VisArt22,
YEAR = "2022",
PAGES = "268-281",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237482"}
@inproceedings{bb242572,
AUTHOR = "Jha, A. and Patro, B. and Van Gool, L.J. and Tuytelaars, T.",
TITLE = "Barlow constrained optimization for Visual Question Answering",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "1084-1093",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237483"}
@inproceedings{bb242573,
AUTHOR = "Ravi, S. and Chinchure, A. and Sigal, L. and Liao, R.J. and Shwartz, V.",
TITLE = "VLC-BERT: Visual Question Answering with Contextualized Commonsense
Knowledge",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "1155-1165",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237484"}
@inproceedings{bb242574,
AUTHOR = "Etesam, Y. and Kochiev, L. and Chang, A.X.",
TITLE = "3DVQA: Visual Question Answering for 3D Environments",
BOOKTITLE = CRV22,
YEAR = "2022",
PAGES = "233-240",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237485"}
@inproceedings{bb242575,
AUTHOR = "Ramamurthy, P. and Aakur, S.N.",
TITLE = "ISD-QA: Iterative Distillation of Commonsense Knowledge from General
Language Models for Unsupervised Question Answering",
BOOKTITLE = "ICPR22",
YEAR = "2022",
PAGES = "1229-1235",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237486"}
@inproceedings{bb242576,
AUTHOR = "Liu, L. and Su, X.D. and Guo, H. and Zhu, D.",
TITLE = "A Transformer-based Medical Visual Question Answering Model",
BOOKTITLE = "ICPR22",
YEAR = "2022",
PAGES = "1712-1718",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237487"}
@inproceedings{bb242577,
AUTHOR = "Wu, X.Y. and Lu, J.F. and Li, Z.F. and Xiong, F.C.",
TITLE = "Ques-to-Visual Guided Visual Question Answering",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "4193-4197",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237488"}
@inproceedings{bb242578,
AUTHOR = "Sarkar, A. and Rahnemoonfar, M.",
TITLE = "Grad-Cam Aware Supervised Attention for Visual Question Answering for
Post-Disaster Damage Assessment",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "3783-3787",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237489"}
@inproceedings{bb242579,
AUTHOR = "Whitehead, S. and Petryk, S. and Shakib, V. and Gonzalez, J. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
TITLE = "Reliable Visual Question Answering: Abstain Rather Than Answer
Incorrectly",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVI:148-166",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237490"}
@inproceedings{bb242580,
AUTHOR = "Chen, L. and Zheng, Y.H. and Xiao, J.",
TITLE = "Rethinking Data Augmentation for Robust Visual Question Answering",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVI:95-112",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237491"}
@inproceedings{bb242581,
AUTHOR = "Zhang, H.T. and Wu, W.",
TITLE = "Context Relation Fusion Model for Visual Question Answering",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "2112-2116",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237492"}
@inproceedings{bb242582,
AUTHOR = "Biten, A.F. and Litman, R. and Xie, Y.S. and Appalaraju, S. and Manmatha, R.",
TITLE = "LaTr: Layout-Aware Transformer for Scene-Text VQA",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16527-16537",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237493"}
@inproceedings{bb242583,
AUTHOR = "Lu, J.Y. and Ye, X. and Ren, Y. and Yang, Y.Z.",
TITLE = "Good, Better, Best: Textual Distractors Generation for
Multiple-Choice Visual Question Answering via Reinforcement Learning",
BOOKTITLE = ODRUM22,
YEAR = "2022",
PAGES = "4917-4926",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237494"}
@inproceedings{bb242584,
AUTHOR = "Ding, Y.H. and Huang, Z. and Wang, R. and Zhang, Y.H. and Chen, X. and Ma, Y.Z. and Chung, H. and Han, S.C.",
TITLE = "V-Doc: Visual questions answers with Documents",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "21460-21466",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237495"}
@inproceedings{bb242585,
AUTHOR = "Azuma, D. and Miyanishi, T. and Kurita, S.H. and Kawanabe, M.",
TITLE = "ScanQA: 3D Question Answering for Spatial Scene Understanding",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19107-19117",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237496"}
@inproceedings{bb242586,
AUTHOR = "Li, G.Y. and Wei, Y. and Tian, Y.P. and Xu, C.L. and Wen, J.R. and Hu, D.",
TITLE = "Learning to Answer Questions in Dynamic Audio-Visual Scenarios",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19086-19096",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237497"}
@inproceedings{bb242587,
AUTHOR = "Chen, C.Y. and Anjum, S. and Gurari, D.",
TITLE = "Grounding Answers for Visual Questions Asked by Visually Impaired
People",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19076-19085",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237498"}
@inproceedings{bb242588,
AUTHOR = "Li, C.H. and Li, Z. and Jing, C.C. and Wu, Y.W. and Zhai, M.L. and Jia, Y.D.",
TITLE = "Compositional Substitutivity of Visual Reasoning for Visual Question
Answering",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 143-160",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237499"}
@inproceedings{bb242589,
AUTHOR = "Jing, C.C. and Jia, Y.D. and Wu, Y.W. and Liu, X.Y. and Wu, Q.",
TITLE = "Maintaining Reasoning Consistency in Compositional Visual Question
Answering",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5089-5098",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237500"}
@inproceedings{bb242590,
AUTHOR = "Cascante Bonilla, P. and Wu, H. and Wang, L. and Feris, R.S. and Ordonez, V.",
TITLE = "Sim VQA: Exploring Simulated Environments for Visual Question
Answering",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5046-5056",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237501"}
@inproceedings{bb242591,
AUTHOR = "Gupta, V. and Li, Z.W. and Kortylewski, A. and Zhang, C.Y. and Li, Y.W. and Yuille, A.L.",
TITLE = "SwapMix: Diagnosing and Regularizing the Over-Reliance on Visual
Context in Visual Question Answering",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5068-5078",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237502"}
@inproceedings{bb242592,
AUTHOR = "Burghouts, G.J. and Huizinga, W.",
TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional
Refinement",
BOOKTITLE = CIAP22,
YEAR = "2022",
PAGES = "II:418-428",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237503"}
@inproceedings{bb242593,
AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.",
TITLE = "Contrast and Classify: Training Robust VQA Models",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1584-1593",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237504"}
@inproceedings{bb242594,
AUTHOR = "Han, X.Z. and Wang, S.H. and Su, C. and Huang, Q.M. and Tian, Q.",
TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1564-1573",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237505"}
@inproceedings{bb242595,
AUTHOR = "Zhou, Y.Y. and Ren, T.H. and Zhu, C.Y. and Sun, X.S. and Liu, J.Z. and Ding, X.H. and Xu, M.L. and Ji, R.R.",
TITLE = "TRAR: Routing the Attention Spans in Transformer for Visual Question
Answering",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2054-2064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237506"}
@inproceedings{bb242596,
AUTHOR = "Yang, X. and Gao, C.Y. and Zhang, H.W. and Cai, J.F.",
TITLE = "Auto-Parsing Network for Image Captioning and Visual Question
Answering",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2177-2187",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237507"}
@inproceedings{bb242597,
AUTHOR = "Banerjee, P. and Gokhale, T. and Yang, Y.Z. and Baral, C.",
TITLE = "Weakly Supervised Relative Spatial Reasoning for Visual Question
Answering",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1888-1898",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237508"}
@inproceedings{bb242598,
AUTHOR = "Li, L.J. and Lei, J. and Gan, Z. and Liu, J.J.",
TITLE = "Adversarial VQA:
A New Benchmark for Evaluating the Robustness of VQA Models",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2022-2031",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237509"}
@inproceedings{bb242599,
AUTHOR = "Askarian, N. and Abbasnejad, E. and Zukerman, I. and Buntine, W. and Haffari, G.",
TITLE = "Inductive Biases for Low Data VQA: A Data Augmentation Approach",
BOOKTITLE = Novelty22,
YEAR = "2022",
PAGES = "231-240",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT237510"}
Last update:Jun 13, 2026 at 20:41:05