@inproceedings{bb212200, AUTHOR = "Cho, J.W. and Kim, D.J. and Ryu, H. and Kweon, I.S.", TITLE = "Generative Bias for Robust Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11681-11690", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207278"} @article{bb212201, AUTHOR = "Liu, Y.H. and Wei, W. and Peng, D. and Mao, X.L. and He, Z.Y. and Zhou, P.", TITLE = "Depth-Aware and Semantic Guided Relational Attention Network for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "5344-5357", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207279"} @article{bb212202, AUTHOR = "Mao, A. and Yang, Z. and Lin, K. and Xuan, J. and Liu, Y.J.", TITLE = "Positional Attention Guided Transformer-Like Architecture for Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "6997-7009", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207280"} @article{bb212203, AUTHOR = "Sun, H. and Wang, S. and Zhu, Y.Q. and Yuan, W. and Zou, Z.Q.", TITLE = "Question Classification for Intelligent Question Answering: A Comprehensive Survey", JOURNAL = IJGI, VOLUME = "12", YEAR = "2023", NUMBER = "10", PAGES = "415", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207281"} @article{bb212204, AUTHOR = "Cao, B.W. and Cao, J.X. and Gui, J. and Shen, J. and Liu, B. and He, L. and Tang, Y.Y. and Kwok, J.T.Y.", TITLE = "AlignVE: Visual Entailment Recognition Based on Alignment Relations", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "7378-7387", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207282"} @article{bb212205, AUTHOR = "Mashrur, A. and Luo, W. and Zaidi, N.A. and Robles Kelly, A.", TITLE = "Robust visual question answering via semantic cross modal augmentation", JOURNAL = CVIU, VOLUME = "238", YEAR = "2024", PAGES = "103862", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207283"} @article{bb212206, AUTHOR = "Yu, Z. and Jin, Z. and Yu, J. and Xu, M.L. and Wang, H.B. and Fan, J.P.", TITLE = "Bilaterally Slimmable Transformer for Elastic and Efficient Visual Question Answering", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "9543-9556", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207284"} @article{bb212207, AUTHOR = "Yao, H.B. and Wang, L.P. and Cai, C.T. and Sun, Y.X. and Zhang, Z. and Luo, Y.K.", TITLE = "Multi-modal spatial relational attention networks for visual question answering", JOURNAL = IVC, VOLUME = "140", YEAR = "2023", PAGES = "104840", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207285"} @article{bb212208, AUTHOR = "Huang, X.F. and Gong, H.F.", TITLE = "A Dual-Attention Learning Network With Word and Sentence Embedding for Medical Visual Question Answering", JOURNAL = MedImg, VOLUME = "43", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "832-845", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207286"} @article{bb212209, AUTHOR = "Zheng, W.B. and Yan, L. and Wang, F.Y.", TITLE = "So Many Heads, So Many Wits: Multimodal Graph Reasoning for Text-Based Visual Question Answering", JOURNAL = SMCS, VOLUME = "54", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "854-865", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207287"} @article{bb212210, AUTHOR = "Bi, Y.D. and Jiang, H. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.", TITLE = "See and Learn More: Dense Caption-Aware Representation for Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "1135-1146", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207288"} @article{bb212211, AUTHOR = "Song, Y. and Yang, X.S. and Wang, Y. and Xu, C.S.", TITLE = "Recovering Generalization via Pre-Training-Like Knowledge Distillation for Out-of-Distribution Visual Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "837-851", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207289"} @article{bb212212, AUTHOR = "Wu, S. and Zhao, G. and Qian, X.M.", TITLE = "Resolving Zero-Shot and Fact-Based Visual Question Answering via Enhanced Fact Retrieval", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "1790-1800", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207290"} @article{bb212213, AUTHOR = "Wen, Z.Q. and Niu, S.C. and Li, G. and Wu, Q.Y. and Tan, M.K. and Wu, Q.", TITLE = "Test-Time Model Adaptation for Visual Question Answering With Debiased Self-Supervisions", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2137-2147", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207291"} @article{bb212214, AUTHOR = "Huai, T.Y. and Yang, S.W. and Zhang, J.H. and Zhao, J.B. and He, L.", TITLE = "Debiased Visual Question Answering via the perspective of question types", JOURNAL = PRL, VOLUME = "178", YEAR = "2024", PAGES = "181-187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207292"} @article{bb212215, AUTHOR = "Jiang, J.J. and Liu, Z.Y. and Zheng, N.N.", TITLE = "Correlation Information Bottleneck: Towards Adapting Pretrained Multimodal Models for Robust Visual Question Answering", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "1", MONTH = "January", PAGES = "185-207", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207293"} @inproceedings{bb212216, AUTHOR = "Li, J.P. and Wei, P. and Han, W.J. and Fan, L.F.", TITLE = "IntentQA: Context-aware Video Intent Reasoning", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "11929-11940", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207294"} @inproceedings{bb212217, AUTHOR = "Hu, Y.S. and Hua, H. and Yang, Z.Y. and Shi, W.J. and Smith, N.A. and Luo, J.B.", TITLE = "PromptCap: Prompt-Guided Image Captioning for VQA with GPT-3", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2951-2963", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207295"} @inproceedings{bb212218, AUTHOR = "Reichman, B. and Heck, L.", TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2829-2834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207296"} @inproceedings{bb212219, AUTHOR = "Naik, N. and Potts, C. and Kreiss, E.", TITLE = "Context-VQA: Towards Context-Aware and Purposeful Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2813-2817", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207297"} @inproceedings{bb212220, AUTHOR = "Hu, Y.S. and Liu, B. and Kasai, J. and Wang, Y.Z. and Ostendorf, M. and Krishna, R. and Smith, N.A.", TITLE = "TIFA: Accurate and Interpretable Text-to-Image Faithfulness Evaluation with Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20349-20360", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207298"} @inproceedings{bb212221, AUTHOR = "Zhang, Y.W. and Ho, C.H. and Vasconcelos, N.M.", TITLE = "Toward Unsupervised Realistic Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15567-15578", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207299"} @inproceedings{bb212222, AUTHOR = "Liang, K. and Albanie, S.", TITLE = "Simple Baselines for Interactive Video Retrieval with Questions and Answers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "11057-11067", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207300"} @inproceedings{bb212223, AUTHOR = "Mensink, T. and Uijlings, J. and Castrejon, L. and Goel, A. and Cadar, F. and Zhou, H. and Sha, F. and Araujo, A. and Ferrari, V.", TITLE = "Encyclopedic VQA: Visual questions about detailed properties of fine-grained categories", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "3090-3101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207301"} @inproceedings{bb212224, AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.", TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2941-2950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207302"} @inproceedings{bb212225, AUTHOR = "Xue, D. and Qian, S.S. and Xu, C.S.", TITLE = "Variational Causal Inference Network for Explanatory Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2515-2525", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207303"} @inproceedings{bb212226, AUTHOR = "Bruni, P. and Falcon, A. and Radeva, P.", TITLE = "Time-aware Circulant Matrices for Question-based Temporal Localization", BOOKTITLE = CIAP23, YEAR = "2023", PAGES = "II:182-195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207304"} @inproceedings{bb212227, AUTHOR = "Ferreira, B.C.L. and Oliveira, H.G. and Silva, C.", TITLE = "Leveraging Question Answering for Domain-Agnostic Information Extraction", BOOKTITLE = CIARP23, YEAR = "2023", PAGES = "I:244-256", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207305"} @inproceedings{bb212228, AUTHOR = "Wu, Z.H. and Shu, X. and Yan, S. and Lu, Z.Y.", TITLE = "FGCVQA: Fine-Grained Cross-Attention for Medical VQA", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "975-979", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207306"} @inproceedings{bb212229, AUTHOR = "Zhu, H. and Togo, R. and Ogawa, T. and Haseyama, M.", TITLE = "Interpretable Visual Question Answering Referring to Outside Knowledge", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "2140-2144", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207307"} @inproceedings{bb212230, AUTHOR = "Parelli, M. and Mallis, D. and Diomataris, M. and Pitsikalis, V.", TITLE = "Interpretable Visual Question Answering Via Reasoning Supervision", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "2525-2529", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207308"} @inproceedings{bb212231, AUTHOR = "Hegde, S. and Jahagirdar, S. and Gangisetty, S.", TITLE = "Making the V in Text-VQA Matter", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5580-5588", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207309"} @inproceedings{bb212232, AUTHOR = "Suo, W. and Sun, M.Y. and Liu, W.S. and Gao, Y.Q. and Wang, P. and Zhang, Y.N. and Wu, Q.", TITLE = "S3C: Semi-Supervised VQA Natural Language Explanation via Self-Critical Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2646-2656", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207310"} @inproceedings{bb212233, AUTHOR = "Alampalle, C. and Hegde, S. and Jahagirdar, S. and Gangisetty, S.", TITLE = "Weakly Supervised Visual Question Answer Generation", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5589-5597", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207311"} @inproceedings{bb212234, AUTHOR = "Jiang, J.J. and Zheng, N.N.", TITLE = "MixPHM: Redundancy-Aware Parameter-Efficient Tuning for Low-Resource Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "24203-24213", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207312"} @inproceedings{bb212235, AUTHOR = "Wang, Y. and Pfeiffer, J. and Carion, N. and Le Cun, Y.L. and Kamath, A.", TITLE = "Adapting Grounded Visual Question Answering Models to Low Resource Languages", BOOKTITLE = MULA23, YEAR = "2023", PAGES = "2596-2605", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207313"} @inproceedings{bb212236, AUTHOR = "Wang, M. and Mahjoubfar, A. and Joshi, A.", TITLE = "FashionVQA: A Domain-Specific Visual Question Answering System", BOOKTITLE = CVFAD23, YEAR = "2023", PAGES = "3514-3519", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207314"} @inproceedings{bb212237, AUTHOR = "Shao, Z.W. and Yu, Z. and Wang, M. and Yu, J.", TITLE = "Prompting Large Language Models with Answer Heuristics for Knowledge-Based Visual Question Answering", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14974-14983", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207315"} @inproceedings{bb212238, AUTHOR = "Tascon Morales, S. and Marquez Neila, P. and Sznitman, R.", TITLE = "Logical Implications for Visual Question Answering Consistency", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6725-6735", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207316"} @inproceedings{bb212239, AUTHOR = "Chen, S. and Zhao, Q.", TITLE = "Divide and Conquer: Answering Questions with Object Factorization and Compositional Reasoning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6736-6745", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207317"} @inproceedings{bb212240, AUTHOR = "Guo, J.X. and Li, J. and Li, D.X. and Tiong, A.M.H. and Li, B.Y. and Tao, D.C. and Hoi, S.", TITLE = "From Images to Textual Prompts: Zero-shot Visual Question Answering with Frozen Large Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10867-10877", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207318"} @inproceedings{bb212241, AUTHOR = "Basu, A. and Addepalli, S. and Babu, R.V.", TITLE = "RMLVQA: A Margin Loss Approach For Visual Question Answering with Language Biases", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11671-11680", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207319"} @inproceedings{bb212242, AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.", TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual Question Answering", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:658-674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207320"} @inproceedings{bb212243, AUTHOR = "Vivoli, E. and Biten, A.F. and Mafla, A. and Karatzas, D. and Gomez, L.", TITLE = "MUST-VQA: Multilingual Scene-Text VQA", BOOKTITLE = TextEvery22, YEAR = "2022", PAGES = "345-358", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207321"} @inproceedings{bb212244, AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.", TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal Interaction", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 255-266", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207322"} @inproceedings{bb212245, AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.", TITLE = "Health-Oriented Multimodal Food Question Answering", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 191-203", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207323"} @inproceedings{bb212246, AUTHOR = "Bongini, P. and Becattini, F. and del Bimbo, A.", TITLE = "Is GPT-3 All You Need for Visual Question Answering in Cultural Heritage?", BOOKTITLE = VisArt22, YEAR = "2022", PAGES = "268-281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207324"} @inproceedings{bb212247, AUTHOR = "Jha, A. and Patro, B. and Van Gool, L.J. and Tuytelaars, T.", TITLE = "Barlow constrained optimization for Visual Question Answering", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "1084-1093", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207325"} @inproceedings{bb212248, AUTHOR = "Ravi, S. and Chinchure, A. and Sigal, L. and Liao, R.J. and Shwartz, V.", TITLE = "VLC-BERT: Visual Question Answering with Contextualized Commonsense Knowledge", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "1155-1165", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207326"} @inproceedings{bb212249, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense Acquisition", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4390-4398", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207327"} @inproceedings{bb212250, AUTHOR = "Etesam, Y. and Kochiev, L. and Chang, A.X.", TITLE = "3DVQA: Visual Question Answering for 3D Environments", BOOKTITLE = CRV22, YEAR = "2022", PAGES = "233-240", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207328"} @inproceedings{bb212251, AUTHOR = "Ramamurthy, P. and Aakur, S.N.", TITLE = "ISD-QA: Iterative Distillation of Commonsense Knowledge from General Language Models for Unsupervised Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1229-1235", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207329"} @inproceedings{bb212252, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1471-1477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207330"} @inproceedings{bb212253, AUTHOR = "Liu, L. and Su, X.D. and Guo, H. and Zhu, D.", TITLE = "A Transformer-based Medical Visual Question Answering Model", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1712-1718", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207331"} @inproceedings{bb212254, AUTHOR = "Wu, X.Y. and Lu, J.F. and Li, Z.F. and Xiong, F.C.", TITLE = "Ques-to-Visual Guided Visual Question Answering", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "4193-4197", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207332"} @inproceedings{bb212255, AUTHOR = "Sarkar, A. and Rahnemoonfar, M.", TITLE = "Grad-Cam Aware Supervised Attention for Visual Question Answering for Post-Disaster Damage Assessment", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3783-3787", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207333"} @inproceedings{bb212256, AUTHOR = "Whitehead, S. and Petryk, S. and Shakib, V. and Gonzalez, J. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.", TITLE = "Reliable Visual Question Answering: Abstain Rather Than Answer Incorrectly", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:148-166", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207334"} @inproceedings{bb212257, AUTHOR = "Chen, L. and Zheng, Y.H. and Xiao, J.", TITLE = "Rethinking Data Augmentation for Robust Visual Question Answering", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:95-112", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207335"} @inproceedings{bb212258, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "Context Relation Fusion Model for Visual Question Answering", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2112-2116", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207336"} @inproceedings{bb212259, AUTHOR = "Biten, A.F. and Litman, R. and Xie, Y.S. and Appalaraju, S. and Manmatha, R.", TITLE = "LaTr: Layout-Aware Transformer for Scene-Text VQA", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "16527-16537", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207337"} @inproceedings{bb212260, AUTHOR = "Lu, J.Y. and Ye, X. and Ren, Y. and Yang, Y.Z.", TITLE = "Good, Better, Best: Textual Distractors Generation for Multiple-Choice Visual Question Answering via Reinforcement Learning", BOOKTITLE = ODRUM22, YEAR = "2022", PAGES = "4917-4926", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207338"} @inproceedings{bb212261, AUTHOR = "Ding, Y.H. and Huang, Z. and Wang, R. and Zhang, Y.H. and Chen, X. and Ma, Y.Z. and Chung, H. and Han, S.C.", TITLE = "V-Doc: Visual questions answers with Documents", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "21460-21466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207339"} @inproceedings{bb212262, AUTHOR = "Azuma, D. and Miyanishi, T. and Kurita, S.H. and Kawanabe, M.", TITLE = "ScanQA: 3D Question Answering for Spatial Scene Understanding", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19107-19117", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207340"} @inproceedings{bb212263, AUTHOR = "Li, G.Y. and Wei, Y. and Tian, Y. and Xu, C.L. and Wen, J.R. and Hu, D.", TITLE = "Learning to Answer Questions in Dynamic Audio-Visual Scenarios", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19086-19096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207341"} @inproceedings{bb212264, AUTHOR = "Chen, C. and Anjum, S. and Gurari, D.", TITLE = "Grounding Answers for Visual Questions Asked by Visually Impaired People", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19076-19085", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207342"} @inproceedings{bb212265, AUTHOR = "Jing, C.C. and Jia, Y.D. and Wu, Y.W. and Liu, X.Y. and Wu, Q.", TITLE = "Maintaining Reasoning Consistency in Compositional Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5089-5098", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207343"} @inproceedings{bb212266, AUTHOR = "Cascante Bonilla, P. and Wu, H. and Wang, L. and Feris, R.S. and Ordonez, V.", TITLE = "Sim VQA: Exploring Simulated Environments for Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5046-5056", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207344"} @inproceedings{bb212267, AUTHOR = "Gupta, V. and Li, Z.W. and Kortylewski, A. and Zhang, C.Y. and Li, Y.W. and Yuille, A.L.", TITLE = "SwapMix: Diagnosing and Regularizing the Over-Reliance on Visual Context in Visual Question Answering", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5068-5078", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207345"} @inproceedings{bb212268, AUTHOR = "Burghouts, G.J. and Huizinga, W.", TITLE = "Coarse-to-Fine Visual Question Answering by Iterative, Conditional Refinement", BOOKTITLE = CIAP22, YEAR = "2022", PAGES = "II:418-428", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207346"} @inproceedings{bb212269, AUTHOR = "Kant, Y. and Moudgil, A. and Batra, D. and Parikh, D. and Agrawal, H.", TITLE = "Contrast and Classify: Training Robust VQA Models", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1584-1593", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207347"} @inproceedings{bb212270, AUTHOR = "Han, X.Z. and Wang, S.H. and Su, C. and Huang, Q.M. and Tian, Q.", TITLE = "Greedy Gradient Ensemble for Robust Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1564-1573", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207348"} @inproceedings{bb212271, AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.", TITLE = "Beyond Question-Based Biases: Assessing Multimodal Shortcut Learning in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1554-1563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207349"} @inproceedings{bb212272, AUTHOR = "Zhou, Y. and Ren, T. and Zhu, C.Y. and Sun, X.S. and Liu, J.Z. and Ding, X.H. and Xu, M.L. and Ji, R.R.", TITLE = "TRAR: Routing the Attention Spans in Transformer for Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2054-2064", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207350"} @inproceedings{bb212273, AUTHOR = "Yang, X. and Gao, C.Y. and Zhang, H.W. and Cai, J.F.", TITLE = "Auto-Parsing Network for Image Captioning and Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2177-2187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207351"} @inproceedings{bb212274, AUTHOR = "Banerjee, P. and Gokhale, T. and Yang, Y.Z. and Baral, C.", TITLE = "Weakly Supervised Relative Spatial Reasoning for Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1888-1898", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207352"} @inproceedings{bb212275, AUTHOR = "Li, L.J. and Lei, J. and Gan, Z. and Liu, J.J.", TITLE = "Adversarial VQA: A New Benchmark for Evaluating the Robustness of VQA Models", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2022-2031", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207353"} @inproceedings{bb212276, AUTHOR = "Askarian, N. and Abbasnejad, E. and Zukerman, I. and Buntine, W. and Haffari, G.", TITLE = "Inductive Biases for Low Data VQA: A Data Augmentation Approach", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "231-240", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207354"} @inproceedings{bb212277, AUTHOR = "Mathew, M. and Bagal, V. and Tito, R. and Karatzas, D. and Valveny, E. and Jawahar, C.V.", TITLE = "InfographicVQA", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2582-2591", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207355"} @inproceedings{bb212278, AUTHOR = "Kumar, S. and Patro, B.N. and Namboodiri, V.P.", TITLE = "Auto QA: The Question Is Not Only What, but Also Where", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "272-281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207356"} @inproceedings{bb212279, AUTHOR = "Kolling, C. and More, M. and Gavenski, N. and Pooch, E. and Parraga, O. and Barros, R.C.", TITLE = "Efficient Counterfactual Debiasing for Visual Question Answering", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2572-2581", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207357"} @inproceedings{bb212280, AUTHOR = "Jung, S.J. and Byun, J. and Shim, K. and Hwang, S.Y. and Kim, C.", TITLE = "Understanding VQA for Negative Answers Through Visual and Linguistic Inference", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "2873-2877", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207358"} @inproceedings{bb212281, AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.", TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the International Conference on Digital Image Computing: Techniques and Applications (DICTA 2021)", BOOKTITLE = DICTA21, YEAR = "2021", PAGES = "1-9", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207359"} @inproceedings{bb212282, AUTHOR = "Le, T. and Nguyen, H.T. and Nguyen, M.L.", TITLE = "Vision and Text Transformer for Predicting Answerability on Visual Question Answering", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "934-938", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207360"} @inproceedings{bb212283, AUTHOR = "Huang, Z.Q. and Zhu, H.Y. and Sun, Y. and Choi, D. and Tan, C. and Lim, J.H.", TITLE = "A Diagnostic Study of Visual Question Answering With Analogical Reasoning", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "2463-2467", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207361"} @inproceedings{bb212284, AUTHOR = "Chen, H.Y. and Liu, R.F. and Peng, B.", TITLE = "Cross-modal Relational Reasoning Network for Visual Question Answering", BOOKTITLE = MAIR2-21, YEAR = "2021", PAGES = "3939-3948", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207362"} @inproceedings{bb212285, AUTHOR = "Wang, Z.X. and Miao, Y. and Specia, L.", TITLE = "Latent Variable Models for Visual Question Answering", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3137-3141", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207363"} @inproceedings{bb212286, AUTHOR = "Hirota, Y. and Garcia, N. and Otani, M. and Chu, C. and Nakashima, Y. and Taniguchi, I. and Onoye, T.", TITLE = "Visual Question Answering with Textual Representations for Images", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3147-3150", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207364"} @inproceedings{bb212287, AUTHOR = "Ye, K. and Kovashka, A.", TITLE = "Linguistic Structures as Weak Supervision for Visual Scene Graph Generation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8285-8295", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207365"} @inproceedings{bb212288, AUTHOR = "Xiao, J.B. and Shang, X. and Yao, A. and Chua, T.S.", TITLE = "NExT-QA: Next Phase of Question-Answering to Explaining Temporal Actions", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "9772-9781", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207366"} @inproceedings{bb212289, AUTHOR = "Chen, X.Y. and Jiang, M. and Zhao, Q.", TITLE = "Predicting Human Scanpaths in Visual Question Answering", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "10871-10880", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207367"} @inproceedings{bb212290, AUTHOR = "Qi, Y.G. and Zhang, K. and Sain, A. and Song, Y.Z.", TITLE = "PQA: Perceptual Question Answering", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12051-12059", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207368"} @inproceedings{bb212291, AUTHOR = "Yuan, Y.Y. and Wang, S. and Jiang, M.Y. and Chen, T.Y.", TITLE = "Perception Matters: Detecting Perception Failures of VQA Models Using Metamorphic Testing", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "16903-16912", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207369"} @inproceedings{bb212292, AUTHOR = "Marino, K. and Chen, X.L. and Parikh, D. and Gupta, A. and Rohrbach, M.", TITLE = "KRISP: Integrating Implicit and Symbolic Knowledge for Open-Domain Knowledge-Based VQA", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "14106-14116", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207370"} @inproceedings{bb212293, AUTHOR = "Niu, Y. and Tang, K. and Zhang, H.W. and Lu, Z.W. and Hua, X.S. and Wen, J.R.", TITLE = "Counterfactual VQA: A Cause-Effect Look at Language Bias", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12695-12705", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207371"} @inproceedings{bb212294, AUTHOR = "Yang, Z.Y. and Lu, Y.J. and Wang, J.F. and Yin, X. and Florencio, D. and Wang, L.J. and Zhang, C. and Zhang, L. and Luo, J.B.", TITLE = "TAP: Text-Aware Pre-training for Text-VQA and Text-Caption", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8747-8757", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207372"} @inproceedings{bb212295, AUTHOR = "Kervadec, C. and Jaunet, T. and Antipov, G. and Baccouche, M. and Vuillemot, R. and Wolf, C.", TITLE = "How Transferable are Reasoning Patterns in VQA?", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "4205-4214", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207373"} @inproceedings{bb212296, AUTHOR = "Kervadec, C. and Antipov, G. and Baccouche, M. and Wolf, C.", TITLE = "Roses are Red, Violets are Blue… But Should VQA expect Them To?", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "2775-2784", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207374"} @inproceedings{bb212297, AUTHOR = "Dua, R. and Kancheti, S.S. and Balasubramanian, V.N.", TITLE = "Beyond VQA: Generating Multi-word Answers and Rationales to Visual Questions", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1623-1632", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207375"} @inproceedings{bb212298, AUTHOR = "Rahman, T. and Chou, S.H. and Sigal, L. and Carenini, G.", TITLE = "An Improved Attention for Visual Question Answering", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1653-1662", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207376"} @inproceedings{bb212299, AUTHOR = "Jolly, S. and Palacio, S. and Folz, J. and Raue, F. and Hees, J. and Dengel, A.", TITLE = "P ˜ NP, at least in Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "2748-2754", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT207377"}