@article{bb234300, AUTHOR = "Wang, J.J. and Ma, A.L. and Chen, Z.H. and Zheng, Z. and Wan, Y.T. and Zhang, L.P. and Zhong, Y.F.", TITLE = "EarthVQANet: Multi-task visual question answering for remote sensing image understanding", JOURNAL = PandRS, VOLUME = "212", YEAR = "2024", PAGES = "422-439", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229274"} @article{bb234301, AUTHOR = "Qian, S. and Liu, B.Q. and Sun, C.J. and Xu, Z. and Ma, L. and Wang, B.", TITLE = "CroMIC-QA: The Cross-Modal Information Complementation Based Question Answering", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8348-8359", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229275"} @article{bb234302, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "Learning by Asking Questions for Knowledge-Based Novel Object Recognition", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "6", MONTH = "June", PAGES = "2290-2309", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229276"} @inproceedings{bb234303, AUTHOR = "Uehara, K. and Harada, T.", TITLE = "K-VQG: Knowledge-aware Visual Question Generation for Common-sense Acquisition", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4390-4398", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229277"} @inproceedings{bb234304, AUTHOR = "Uehara, K. and Duan, N. and Harada, T.", TITLE = "Learning to Ask Informative Sub-Questions for Visual Question Answering", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4680-4689", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229278"} @inproceedings{bb234305, AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.", TITLE = "Visual Question Generation as Dual Task of Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6116-6124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229279"} @inproceedings{bb234306, AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.", TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "I: 485-501", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229280"} @inproceedings{bb234307, AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.", TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6632-6641", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229281"} @article{bb234308, AUTHOR = "Vosoughi, A. and Deng, S.J. and Zhang, S.Y. and Tian, Y.P. and Xu, C.L. and Luo, J.B.", TITLE = "Cross Modality Bias in Visual Question Answering: A Causal View With Possible Worlds VQA", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "8609-8624", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229282"} @article{bb234309, AUTHOR = "Guo, Y.Y. and Jiao, F. and Shen, Z.Q. and Nie, L.Q. and Kankanhalli, M.", TITLE = "UNK-VQA: A Dataset and a Probe Into the Abstention Ability of Multi-Modal Large Models", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "12", MONTH = "December", PAGES = "10284-10296", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229283"} @article{bb234310, AUTHOR = "Chen, F.Y. and Tang, X.S. and Hao, K.R.", TITLE = "GEXMERT: Geometrically enhanced cross-modality encoder representations from transformers inspired by higher-order visual percepts", JOURNAL = PR, VOLUME = "158", YEAR = "2025", PAGES = "111047", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229284"} @article{bb234311, AUTHOR = "Zhang, B. and Li, J.X. and Shi, Y.C. and Han, Y. and Hu, Q.H.", TITLE = "VADS: Visuo-Adaptive DualStrike attack on visual question answer", JOURNAL = CVIU, VOLUME = "249", YEAR = "2024", PAGES = "104137", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229285"} @article{bb234312, AUTHOR = "Peng, D. and Li, Z.X.", TITLE = "Unbiased VQA via modal information interaction and question transformation", JOURNAL = PR, VOLUME = "162", YEAR = "2025", PAGES = "111394", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229286"} @article{bb234313, AUTHOR = "Fan, L. and Gong, X. and Zheng, C.Y. and Tan, X.L. and Li, J. and Ou, Y.F.", TITLE = "Cycle-VQA: A Cycle-Consistent Framework for Robust Medical Visual Question Answering", JOURNAL = PR, VOLUME = "165", YEAR = "2025", PAGES = "111609", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229287"} @article{bb234314, AUTHOR = "Lin, Q. and He, K. and Zhu, Y.F. and Xu, F.Z. and Cambria, E. and Feng, M.L.", TITLE = "Cross-Modal Knowledge Diffusion-Based Generation for Difference-Aware Medical VQA", JOURNAL = IP, VOLUME = "34", YEAR = "2025", PAGES = "2421-2434", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229288"} @article{bb234315, AUTHOR = "Kim, B.S. and Kim, J. and Lee, D. and Jang, B.", TITLE = "Visual Question Answering: A Survey of Methods, Datasets, Evaluation, and Challenges", JOURNAL = Surveys, VOLUME = "57", YEAR = "2025", NUMBER = "10", MONTH = "May", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229289"} @inproceedings{bb234316, AUTHOR = "Kim, H.Y. and Jung, I. and Suh, D. and Zhang, Y. and Lee, S. and Hong, S.", TITLE = "Question-Aware Gaussian Experts for Audio-Visual Question Answering", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "13681-13690", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229290"} @inproceedings{bb234317, AUTHOR = "Huang, C.Y. and Maneechotesuwan, B. and Chopra, S. and Kira, Z.", TITLE = "FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "3909-3918", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229291"} @inproceedings{bb234318, AUTHOR = "Wu, K.X. and Li, X. and Li, X. and Hu, C. and Wu, G.L.", TITLE = "AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning", BOOKTITLE = CVPR25, YEAR = "2025", PAGES = "3252-3261", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229292"} @inproceedings{bb234319, AUTHOR = "Zhao, X.Y. and Bai, Z.W. and Zhou, M.L. and Ren, X.C. and Wang, Y.Q. and Wang, L.C.", TITLE = "Integrating Dynamic Routing with Reinforcement Learning and Multimodal Techniques for Visual Question Answering", BOOKTITLE = ICIVC24, YEAR = "2024", PAGES = "295-301", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229293"} @inproceedings{bb234320, AUTHOR = "Park, K.R. and Lee, H.J. and Kim, J.U.", TITLE = "Learning Trimodal Relation for Audio-visual Question Answering with Missing Modality", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "XV: 42-59", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229294"} @inproceedings{bb234321, AUTHOR = "Mishra, A. and Agarwala, A. and Tiwari, U. and Rajendiran, V.N. and Miriyala, S.S.", TITLE = "Efficient Visual Question Answering on Embedded Devices: Cross-Modality Attention with Evolutionary Quantization", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2142-2148", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229295"} @inproceedings{bb234322, AUTHOR = "Jiang, X. and Wang, G.M. and Guo, J.H. and Li, J.C. and Zhang, W.Q. and Lu, R.X. and Tang, S.L.", TITLE = "DIEM: Decomposition-Integration Enhancing Multimodal Insights", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "27294-27303", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229296"} @inproceedings{bb234323, AUTHOR = "Reichman, B. and Heck, L.", TITLE = "Cross-Modal Dense Passage Retrieval for Outside Knowledge Visual Question Answering", BOOKTITLE = CLVL23, YEAR = "2023", PAGES = "2829-2834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229297"} @inproceedings{bb234324, AUTHOR = "Qian, Z. and Wang, X. and Duan, X.G. and Qin, P. and Li, Y.H. and Zhu, W.W.", TITLE = "Decouple Before Interact: Multi-Modal Prompt Learning for Continual Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2941-2950", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229298"} @inproceedings{bb234325, AUTHOR = "Li, B.J. and Wang, J. and Zhao, M. and Zhou, S.", TITLE = "Two-stage Multimodality Fusion for High-performance Text-based Visual Question Answering", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:658-674", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229299"} @inproceedings{bb234326, AUTHOR = "Chai, Z. and Wan, X.J. and Han, S.C. and Poon, J.", TITLE = "Visual Question Generation Under Multi-granularity Cross-Modal Interaction", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 255-266", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229300"} @inproceedings{bb234327, AUTHOR = "Wang, J.H. and Hu, M.H. and Song, Y.G. and Yang, X.S.", TITLE = "Health-Oriented Multimodal Food Question Answering", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 191-203", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229301"} @inproceedings{bb234328, AUTHOR = "Zhang, H.T. and Wu, W.", TITLE = "CAT: Re-Conv Attention in Transformer for Visual Question Answering", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1471-1477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229302"} @inproceedings{bb234329, AUTHOR = "Dancette, C. and Cadene, R. and Teney, D. and Cord, M.", TITLE = "Beyond Question-Based Biases: Assessing Multimodal Shortcut Learning in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1554-1563", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229303"} @inproceedings{bb234330, AUTHOR = "Felix, R. and Repasky, B. and Hodge, S. and Zolfaghari, R. and Abbasnejad, E. and Sherrah, J.", TITLE = "Cross-Modal Visual Question Answering for Remote Sensing Data: the International Conference on Digital Image Computing: Techniques and Applications (DICTA 2021)", BOOKTITLE = DICTA21, YEAR = "2021", PAGES = "1-9", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229304"} @inproceedings{bb234331, AUTHOR = "Chen, H.Y. and Liu, R.F. and Peng, B.", TITLE = "Cross-modal Relational Reasoning Network for Visual Question Answering", BOOKTITLE = MAIR2-21, YEAR = "2021", PAGES = "3939-3948", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229305"} @inproceedings{bb234332, AUTHOR = "Farazi, M. and Khan, S. and Barnes, N.M.", TITLE = "Question-Agnostic Attention for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "3542-3549", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229306"} @inproceedings{bb234333, AUTHOR = "Li, Y. and Lin, Y. and Zhao, H.H. and Wang, D.H.", TITLE = "Dual Path Multi-Modal High-Order Features for Textual Content based Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "4324-4331", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229307"} @inproceedings{bb234334, AUTHOR = "Huang, H.T. and Han, T. and Han, W. and Yap, D. and Chiang, C.M.", TITLE = "Answer-checking in Context: A Multi-modal Fully Attention Network for Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "1173-1180", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229308"} @inproceedings{bb234335, AUTHOR = "Kant, Y. and Batra, D. and Anderson, P. and Schwing, A. and Parikh, D. and Lu, J. and Agrawal, H.", TITLE = "Spatially Aware Multimodal Transformers for TextVQA", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "IX:715-732", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229309"} @inproceedings{bb234336, AUTHOR = "Hu, R. and Singh, A. and Darrell, T.J. and Rohrbach, M.", TITLE = "Iterative Answer Prediction With Pointer-Augmented Multimodal Transformers for TextVQA", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "9989-9999", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229310"} @inproceedings{bb234337, AUTHOR = "Peng, G. and You, H.X. and Zhang, Z.P. and Wang, X.G. and Li, H.S.", TITLE = "Multi-Modality Latent Interaction Network for Visual Question Answering", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "5824-5834", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229311"} @inproceedings{bb234338, AUTHOR = "Cadene, R. and Ben younes, H. and Cord, M. and Thome, N.", TITLE = "MUREL: Multimodal Relational Reasoning for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1989-1998", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229312"} @inproceedings{bb234339, AUTHOR = "Haurilet, M. and Al Halah, Z. and Stiefelhagen, R.", TITLE = "DynGraph: Visual Question Answering via Dynamic Scene Graphs", BOOKTITLE = GCPR19, YEAR = "2019", PAGES = "428-441", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229313"} @inproceedings{bb234340, AUTHOR = "Haurilet, M. and Al Halah, Z. and Stiefelhagen, R.", TITLE = "MoQA: A Multi-modal Question Answering Architecture", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:106-113", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229314"} @inproceedings{bb234341, AUTHOR = "Gu, J.X. and Cai, J.F. and Joty, S. and Niu, L. and Wang, G.", TITLE = "Look, Imagine and Match: Improving Textual-Visual Cross-Modal Retrieval with Generative Models", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7181-7189", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229315"} @inproceedings{bb234342, AUTHOR = "Sheng, S.R. and Venkitasubramanian, A.N. and Moens, M.F.", TITLE = "A Markov Network Based Passage Retrieval Method for Multimodal Question Answering in the Cultural Heritage Domain", BOOKTITLE = MMMod18, YEAR = "2018", PAGES = "I:3-15", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229316"} @inproceedings{bb234343, AUTHOR = "Yu, Z. and Yu, J. and Fan, J. and Tao, D.", TITLE = "Multi-modal Factorized Bilinear Pooling with Co-attention Learning for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1839-1848", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229317"} @inproceedings{bb234344, AUTHOR = "Ben Younes, H. and Cadene, R. and Cord, M. and Thome, N.", TITLE = "MUTAN: Multimodal Tucker Fusion for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "2631-2639", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229318"} @inproceedings{bb234345, AUTHOR = "Kembhavi, A. and Seo, M. and Schwenk, D. and Choi, J. and Farhadi, A. and Hajishirzi, H.", TITLE = "Are You Smarter Than a Sixth Grader? Textbook Question Answering for Multimodal Machine Comprehension", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "5376-5384", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803mmovqa5.html#TT229319"} @article{bb234346, AUTHOR = "Das, A. and Agrawal, H. and Zitnick, L. and Parikh, D. and Batra, D.", TITLE = "Human Attention in Visual Question Answering: Do Humans and Deep Networks Look at the Same Regions?", JOURNAL = CVIU, VOLUME = "163", YEAR = "2017", NUMBER = "1", PAGES = "90-100", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229320"} @article{bb234347, AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.", TITLE = "Ask Your Neurons: A Deep Learning Approach to Visual Question Answering", JOURNAL = IJCV, VOLUME = "125", YEAR = "2018", NUMBER = "1-3", MONTH = "December", PAGES = "110-135", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229321"} @inproceedings{bb234348, AUTHOR = "Malinowski, M. and Rohrbach, M. and Fritz, M.", TITLE = "Ask Your Neurons: A Neural-Based Approach to Answering Questions about Images", BOOKTITLE = ICCV15, YEAR = "2015", PAGES = "1-9", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229322"} @inproceedings{bb234349, AUTHOR = "Dancette, C. and Whitehead, S. and Maheshwary, R. and Vedantam, R. and Scherer, S. and Chen, X.L. and Cord, M. and Rohrbach, M.", TITLE = "Improving Selective Visual Question Answering by Learning from Your Peers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "24049-24059", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229323"} @article{bb234350, AUTHOR = "Huang, Y.Z. and Zhong, T.", TITLE = "Multitask learning for neural generative question answering", JOURNAL = RealTimeIP, VOLUME = "14", YEAR = "2018", NUMBER = "1", MONTH = "January", PAGES = "1009-1017", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229324"} @article{bb234351, AUTHOR = "Ruwa, N. and Mao, Q. and Song, H.P. and Jia, H.J. and Dong, M.", TITLE = "Triple attention network for sentimental visual question answering", JOURNAL = CVIU, VOLUME = "189", YEAR = "2019", PAGES = "102829", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229325"} @article{bb234352, AUTHOR = "Bai, Z.W. and Li, Y. and Wozniak, M. and Zhou, M.L. and Li, D.", TITLE = "DecomVQANet: Decomposing visual question answering deep network via tensor decomposition and regression", JOURNAL = PR, VOLUME = "110", YEAR = "2021", PAGES = "107538", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229326"} @article{bb234353, AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhang, H. and Zhu, S.C.", TITLE = "Mining deep And-Or object structures via cost-sensitive question-answer-based active annotations", JOURNAL = CVIU, VOLUME = "176-177", YEAR = "2018", PAGES = "33-44", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229327"} @article{bb234354, AUTHOR = "Zhang, Q.S. and Ren, J. and Huang, G. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.", TITLE = "Mining Interpretable AOG Representations From Convolutional Networks via Active Question Answering", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "3949-3963", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229328"} @inproceedings{bb234355, AUTHOR = "Zhang, Q.S. and Cao, R.M. and Wu, Y.N. and Zhu, S.C.", TITLE = "Mining Object Parts from CNNs via Active Question-Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3890-3899", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229329"} @inproceedings{bb234356, AUTHOR = "Zhang, Q.S. and Wu, Y.N. and Zhu, S.C.", TITLE = "Mining And-Or Graphs for Graph Matching and Object Discovery", BOOKTITLE = ICCV15, YEAR = "2015", PAGES = "55-63", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229330"} @article{bb234357, AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Lin, L.", TITLE = "Interpretable Visual Question Answering by Reasoning on Dependency Trees", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "3", MONTH = "March", PAGES = "887-901", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229331"} @inproceedings{bb234358, AUTHOR = "Cao, Q.X. and Liang, X.D. and Li, B.L. and Li, G. and Lin, L.", TITLE = "Visual Question Reasoning on General Dependency Tree", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7249-7257", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229332"} @article{bb234359, AUTHOR = "Zhong, H.S. and Chen, J.Y. and Shen, C. and Zhang, H.W. and Huang, J.Q. and Hua, X.S.", TITLE = "Self-Adaptive Neural Module Transformer for Visual Question Answering", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "1264-1273", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229333"} @article{bb234360, AUTHOR = "Zheng, W.F. and Yin, L.R. and Chen, X.B. and Ma, Z.Y. and Liu, S. and Yang, B.", TITLE = "Knowledge base graph embedding module design for Visual question answering model", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108153", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229334"} @article{bb234361, AUTHOR = "Sharma, H. and Jalal, A.S.", TITLE = "Visual question answering model based on graph neural network and contextual attention", JOURNAL = IVC, VOLUME = "110", YEAR = "2021", PAGES = "104165", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229335"} @article{bb234362, AUTHOR = "Song, L.Y. and Li, J. and Liu, J. and Yang, Y. and Shang, X. and Sun, M.X.", TITLE = "Answering knowledge-based visual questions via the exploration of Question Purpose", JOURNAL = PR, VOLUME = "133", YEAR = "2023", PAGES = "109015", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229336"} @article{bb234363, AUTHOR = "MeshuWelde, T. and Liao, L.", TITLE = "Counting-based visual question answering with serial cascaded attention deep learning", JOURNAL = PR, VOLUME = "144", YEAR = "2023", PAGES = "109850", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229337"} @article{bb234364, AUTHOR = "Liu, Y. and Li, G.B. and Lin, L.", TITLE = "Cross-Modal Causal Relational Reasoning for Event-Level Visual Question Answering", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "11624-11641", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229338"} @inproceedings{bb234365, AUTHOR = "Cao, Q.X. and Wan, W.T. and Wang, K. and Liang, X.D. and Lin, L.", TITLE = "Linguistically Routing Capsule Network for Out-of-distribution Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1594-1603", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229339"} @article{bb234366, AUTHOR = "Yang, S.W. and Xiao, L. and Wu, X.J. and Xu, J.J. and Wang, L.L. and He, L.", TITLE = "Simple contrastive learning in a self-supervised manner for robust visual question answering", JOURNAL = CVIU, VOLUME = "241", YEAR = "2024", PAGES = "103976", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229340"} @article{bb234367, AUTHOR = "Wu, Y.L. and Pan, X. and Li, J.H. and Dou, S. and Wang, X.X.", TITLE = "Interpretable answer retrieval based on heterogeneous network embedding", JOURNAL = PRL, VOLUME = "182", YEAR = "2024", PAGES = "9-16", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229341"} @article{bb234368, AUTHOR = "Luo, L. and Lai, H.J. and Pan, Y. and Yin, J.", TITLE = "Efficient Multimodal Selection for Retrieval in Knowledge-Based Visual Question Answering", JOURNAL = CirSysVideo, VOLUME = "35", YEAR = "2025", NUMBER = "6", MONTH = "June", PAGES = "5195-5207", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229342"} @inproceedings{bb234369, AUTHOR = "Zhang, Y. and Chen, H. and Frikha, A. and Krompass, D. and Zhang, G. and Gu, J.D. and Tresp, V.", TITLE = "CL-Cross VQA: A Continual Learning Benchmark for Cross-Domain Visual Question Answering", BOOKTITLE = WACV25, YEAR = "2025", PAGES = "6269-6278", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229343"} @inproceedings{bb234370, AUTHOR = "Marcu, A.M. and Chen, L. and Hunermann, J. and Karnsund, A. and Hanotte, B. and Chidananda, P. and Nair, S. and Badrinarayanan, V. and Kendall, A. and Shotton, J. and Arani, E. and Sinavski, O.", TITLE = "Lingoqa: Visual Question Answering for Autonomous Driving", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXXVII: 252-269", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229344"} @inproceedings{bb234371, AUTHOR = "Sima, C.H. and Renz, K. and Chitta, K. and Chen, L. and Zhang, H. and Xie, C.G. and Beißwenger, J. and Luo, P. and Geiger, A. and Li, H.Y.", TITLE = "Drivelm: Driving with Graph Visual Question Answering", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LII: 256-274", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229345"} @inproceedings{bb234372, AUTHOR = "Feng, C. and Danier, D. and Zhang, F. and Bull, D.", TITLE = "RankDVQA: Deep VQA based on Ranking-inspired Hybrid Training", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "1637-1647", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229346"} @inproceedings{bb234373, AUTHOR = "Ishay, A. and Yang, Z. and Lee, J. and Kang, I. and Lim, D.J.", TITLE = "Think before You Simulate: Symbolic Reasoning to Orchestrate Neural Computation for Counterfactual Question Answering", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "6684-6693", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229347"} @inproceedings{bb234374, AUTHOR = "Wang, Y. and Yasunaga, M. and Ren, H.Y. and Wada, S. and Leskovec, J.", TITLE = "VQA-GNN: Reasoning with Multimodal Knowledge via Graph Neural Networks for Visual Question Answering", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "21525-21535", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229348"} @inproceedings{bb234375, AUTHOR = "Souza, B. and Aasan, M. and Pedrini, H. and Rivera, A.R.", TITLE = "SelfGraphVQA: A Self-Supervised Graph Neural Network for Scene-based Question Answering", BOOKTITLE = VLAR23, YEAR = "2023", PAGES = "4642-4647", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229349"} @inproceedings{bb234376, AUTHOR = "Haisa, G. and Altenbek, G.", TITLE = "Question Classification Based on Weak Supervision and Interrogative Pronouns Attention Mechanism", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "2273-2278", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229350"} @inproceedings{bb234377, AUTHOR = "Nguyen, B.X. and Do, T. and Tran, H. and Tjiputra, E. and Tran, Q.D. and Nguyen, A.", TITLE = "Coarse-to-Fine Reasoning for Visual Question Answering", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4557-4565", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229351"} @inproceedings{bb234378, AUTHOR = "Liang, Y.Y. and Wang, X. and Duan, X.G. and Zhu, W.W.", TITLE = "Multi-modal Contextual Graph Neural Network for Text Visual Question Answering", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "3491-3498", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229352"} @inproceedings{bb234379, AUTHOR = "Patro, B.N. and Kurmi, V.K. and Kumar, S. and Namboodiri, V.P.", TITLE = "Deep Bayesian Network for Visual Question Generation", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1555-1565", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229353"} @inproceedings{bb234380, AUTHOR = "Singh, A.K. and Mishra, A. and Shekhar, S. and Chakraborty, A.", TITLE = "From Strings to Things: Knowledge-Enabled VQA Model That Can Read and Reason", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4601-4611", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229354"} @inproceedings{bb234381, AUTHOR = "Wilf, A. and Ma, M.Q. and Liang, P.P. and Zadeh, A. and Morency, L.P.", TITLE = "Face-to-Face Contrastive Learning for Social Intelligence Question-Answering", BOOKTITLE = FG23, YEAR = "2023", PAGES = "1-7", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229355"} @inproceedings{bb234382, AUTHOR = "Zadeh, A. and Chan, M. and Liang, P.P. and Tong, E. and Morency, L.P.", TITLE = "Social-IQ: A Question Answering Benchmark for Artificial Social Intelligence", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "8799-8809", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229356"} @inproceedings{bb234383, AUTHOR = "Ma, C. and Shen, C. and Dick, A. and Wu, Q. and Wang, P. and van den Hengel, A.J. and Reid, I.D.", TITLE = "Visual Question Answering with Memory-Augmented Networks", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6975-6984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229357"} @inproceedings{bb234384, AUTHOR = "Shin, A. and Ushiku, Y. and Harada, T.", TITLE = "Customized Image Narrative Generation via Interactive Visual Question Generation and Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "8925-8933", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229358"} @inproceedings{bb234385, AUTHOR = "Teney, D. and Anderson, P. and He, X. and van den Hengel, A.J.", TITLE = "Tips and Tricks for Visual Question Answering: Learnings from the 2017 Challenge", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4223-4232", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229359"} @inproceedings{bb234386, AUTHOR = "Bai, Y.L. and Fu, J.L. and Zhao, T.J. and Mei, T.", TITLE = "Deep Attention Neural Tensor Network for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XII: 21-37", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229360"} @inproceedings{bb234387, AUTHOR = "Sinha, A. and Ayush, K.", TITLE = "Towards Mathematical Reasoning: A Multimodal Deep Learning Approach", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "4028-4032", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229361"} @inproceedings{bb234388, AUTHOR = "Rosso Mateus, A. and Gonzalez, F.A. and Montes y Gomez, M.", TITLE = "A Two-Step Neural Network Approach to Passage Retrieval for Open Domain Question Answering", BOOKTITLE = CIARP17, YEAR = "2017", PAGES = "566-574", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229362"} @inproceedings{bb234389, AUTHOR = "Zhu, C. and Zhao, Y. and Huang, S. and Tu, K. and Ma, Y.", TITLE = "Structured Attentions for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1300-1309", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229363"} @inproceedings{bb234390, AUTHOR = "Hu, R. and Andreas, J. and Rohrbach, M. and Darrell, T.J. and Saenko, K.", TITLE = "Learning to Reason: End-to-End Module Networks for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "804-813", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229364"} @inproceedings{bb234391, AUTHOR = "Peris, A. and Casacuberta, F.", TITLE = "Interactive-Predictive Neural Multimodal Systems", BOOKTITLE = IbPRIA19, YEAR = "2019", PAGES = "I:16-28", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229365"} @inproceedings{bb234392, AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.", TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question Answering", BOOKTITLE = IbPRIA17, YEAR = "2017", PAGES = "372-380", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229366"} @inproceedings{bb234393, AUTHOR = "Kafle, K. and Kanan, C.", TITLE = "An Analysis of Visual Question Answering Algorithms", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1983-1991", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229367"} @inproceedings{bb234394, AUTHOR = "Kafle, K. and Kanan, C.", TITLE = "Answer-Type Prediction for Visual Question Answering", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "4976-4984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229368"} @inproceedings{bb234395, AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.", TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to Answer New Questions", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3909-3918", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229369"} @inproceedings{bb234396, AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.", TITLE = "Multi-level Attention Networks for Visual Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4187-4195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229370"} @inproceedings{bb234397, AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.", TITLE = "An Empirical Evaluation of Visual Question Answering for Novel Objects", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "7312-7321", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqann4.html#TT229371"} @article{bb234398, AUTHOR = "Gouthaman, K.V. and Nambiar, A. and Srinivas, K.S. and Mittal, A.", TITLE = "Linguistically-aware attention for reducing the semantic gap in vision-language tasks", JOURNAL = PR, VOLUME = "112", YEAR = "2021", PAGES = "107812", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlmvqa4.html#TT229372"} @article{bb234399, AUTHOR = "Zhou, K.Y. and Yang, J.K. and Loy, C.C. and Liu, Z.W.", TITLE = "Learning to Prompt for Vision-Language Models", JOURNAL = IJCV, VOLUME = "130", YEAR = "2022", NUMBER = "9", MONTH = "September", PAGES = "2337-2348", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlmvqa4.html#TT229373"}