@inproceedings{bb198700, AUTHOR = "Schwenk, D. and Khandelwal, A. and Clark, C. and Marino, K. and Mottaghi, R.", TITLE = "A-OKVQA: A Benchmark for Visual Question Answering Using World Knowledge", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "VIII:146-162", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193901"} @inproceedings{bb198701, AUTHOR = "Marino, K. and Rastegari, M. and Farhadi, A. and Mottaghi, R.", TITLE = "OK-VQA: A Visual Question Answering Benchmark Requiring External Knowledge", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "3190-3199", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193902"} @inproceedings{bb198702, AUTHOR = "Krishna, R. and Bernstein, M. and Fei Fei, L.", TITLE = "Information Maximizing Visual Question Generation", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "2008-2018", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193903"} @inproceedings{bb198703, AUTHOR = "Cadene, R. and Ben younes, H. and Cord, M. and Thome, N.", TITLE = "MUREL: Multimodal Relational Reasoning for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1989-1998", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193904"} @inproceedings{bb198704, AUTHOR = "Haurilet, M. and Roitberg, A. and Stiefelhagen, R.", TITLE = "It's Not About the Journey; It's About the Destination: Following Soft Paths Under Question-Guidance for Visual Reasoning", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1930-1939", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193905"} @inproceedings{bb198705, AUTHOR = "Qiu, Y. and Satoh, Y. and Suzuki, R. and Kataoka, H.", TITLE = "Incorporating 3D Information Into Visual Question Answering", BOOKTITLE = "3DV19", YEAR = "2019", PAGES = "756-765", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193906"} @inproceedings{bb198706, AUTHOR = "Haurilet, M. and Al Halah, Z. and Stiefelhagen, R.", TITLE = "DynGraph: Visual Question Answering via Dynamic Scene Graphs", BOOKTITLE = GCPR19, YEAR = "2019", PAGES = "428-441", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193907"} @inproceedings{bb198707, AUTHOR = "Haurilet, M. and Al Halah, Z. and Stiefelhagen, R.", TITLE = "MoQA: A Multi-modal Question Answering Architecture", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:106-113", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193908"} @inproceedings{bb198708, AUTHOR = "Liu, F. and Liu, J. and Fang, Z. and Lu, H.", TITLE = "Language and Visual Relations Encoding for Visual Question Answering", BOOKTITLE = ICIP19, YEAR = "2019", PAGES = "3307-3311", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193909"} @inproceedings{bb198709, AUTHOR = "Fang, Z.W. and Liu, J. and Tang, Q. and Li, Y. and Lu, H.Q.", TITLE = "Answer Distillation for Visual Question Answering", BOOKTITLE = ACCV18, YEAR = "2018", PAGES = "I:72-87", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193910"} @inproceedings{bb198710, AUTHOR = "Kuhnle, A. and Xie, H.Y. and Copestake, A.", TITLE = "How Clever Is the FiLM Model, and How Clever Can it Be?", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:162-172", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193911"} @inproceedings{bb198711, AUTHOR = "Li, W. and Yuan, Z.H. and Fang, X.Z. and Wang, C.", TITLE = "Knowing Where to Look? Analysis on Attention of Visual Question Answering System", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:145-152", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193912"} @inproceedings{bb198712, AUTHOR = "Wagner, M. and Basevi, H. and Shetty, R. and Li, W.B. and Malinowski, M. and Fritz, M. and Leonardis, A.", TITLE = "Answering Visual What-If Questions: From Actions to Predicted Scene Descriptions", BOOKTITLE = VLEASE18, YEAR = "2018", PAGES = "I:521-537", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193913"} @inproceedings{bb198713, AUTHOR = "Duke, B. and Taylor, G.W.", TITLE = "Generalized Hadamard-Product Fusion Operators for Visual Question Answering", BOOKTITLE = CRV18, YEAR = "2018", PAGES = "39-46", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193914"} @inproceedings{bb198714, AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.", TITLE = "Embodied Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "1-10", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193915"} @inproceedings{bb198715, AUTHOR = "Misra, I. and Girshick, R. and Fergus, R. and Hebert, M. and Gupta, A. and van der Maaten, L.", TITLE = "Learning by Asking Questions", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "11-20", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193916"} @inproceedings{bb198716, AUTHOR = "Gurari, D. and Li, Q. and Stangl, A.J. and Guo, A. and Lin, C. and Grauman, K. and Luo, J. and Bigham, J.P.", TITLE = "VizWiz Grand Challenge: Answering Visual Questions from Blind People", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "3608-3617", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193917"} @inproceedings{bb198717, AUTHOR = "Li, J. and Su, H. and Zhu, J. and Wang, S. and Zhang, B.", TITLE = "Textbook Question Answering Under Instructor Guidance with Memory Networks", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "3655-3663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193918"} @inproceedings{bb198718, AUTHOR = "Gordon, D. and Kembhavi, A. and Rastegari, M. and Redmon, J. and Fox, D. and Farhadi, A.", TITLE = "IQA: Visual Question Answering in Interactive Environments", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4089-4098", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193919"} @inproceedings{bb198719, AUTHOR = "Agrawal, A. and Batra, D. and Parikh, D. and Kembhavi, A.", TITLE = "Don't Just Assume; Look and Answer: Overcoming Priors for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4971-4980", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193920"} @inproceedings{bb198720, AUTHOR = "Sha, F. and Chao, W. and Hu, H.", TITLE = "Learning Answer Embeddings for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5428-5436", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193921"} @inproceedings{bb198721, AUTHOR = "Kafle, K. and Price, B. and Cohen, S. and Kanan, C.", TITLE = "DVQA: Understanding Data Visualizations via Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5648-5656", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193922"} @inproceedings{bb198722, AUTHOR = "Sha, F. and Hu, H. and Chao, W.", TITLE = "Cross-Dataset Adaptation for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5716-5725", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193923"} @inproceedings{bb198723, AUTHOR = "Anderson, P. and He, X. and Buehler, C. and Teney, D. and Johnson, M. and Gould, S. and Zhang, L.", TITLE = "Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6077-6086", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193924"} @inproceedings{bb198724, AUTHOR = "Nguyen, D. and Okatani, T.", TITLE = "Improved Fusion of Visual and Language Representations by Dense Symmetric Co-attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6087-6096", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193925"} @inproceedings{bb198725, AUTHOR = "Ma, C. and Shen, C. and Dick, A. and Wu, Q. and Wang, P. and van den Hengel, A.J. and Reid, I.D.", TITLE = "Visual Question Answering with Memory-Augmented Networks", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6975-6984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193926"} @inproceedings{bb198726, AUTHOR = "Patro, B. and Namboodiri, V.P.", TITLE = "Differential Attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7680-7688", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193927"} @inproceedings{bb198727, AUTHOR = "Su, Z. and Zhu, C. and Dong, Y.P. and Cai, D.Q. and Chen, Y.R. and Li, J.G.", TITLE = "Learning Visual Knowledge Memory Networks for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7736-7745", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193928"} @inproceedings{bb198728, AUTHOR = "Shin, A. and Ushiku, Y. and Harada, T.", TITLE = "Customized Image Narrative Generation via Interactive Visual Question Generation and Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "8925-8933", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193929"} @inproceedings{bb198729, AUTHOR = "Das, A. and Datta, S. and Gkioxari, G. and Lee, S. and Parikh, D. and Batra, D.", TITLE = "Embodied Question Answering", BOOKTITLE = DeepLearnRV18, YEAR = "2018", PAGES = "2135-213509", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193930"} @inproceedings{bb198730, AUTHOR = "Cheng, W. and Huang, Y. and Wang, L.", TITLE = "Towards Unconstrained Pointing Problem of Visual Question Answering: A Retrieval-based Method", BOOKTITLE = ICPR18, YEAR = "2018", PAGES = "3303-3308", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193931"} @inproceedings{bb198731, AUTHOR = "Teney, D. and Anderson, P. and He, X. and van den Hengel, A.J.", TITLE = "Tips and Tricks for Visual Question Answering: Learnings from the 2017 Challenge", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "4223-4232", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193932"} @inproceedings{bb198732, AUTHOR = "Zhou, B. and Sun, Y. and Bau, D. and Torralba, A.B.", TITLE = "Interpretable Basis Decomposition for Visual Explanation", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VIII: 122-138", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193933"} @inproceedings{bb198733, AUTHOR = "Shi, Y. and Furlanello, T. and Zha, S. and Anandkumar, A.", TITLE = "Question Type Guided Attention in Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "II: 158-175", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193934"} @inproceedings{bb198734, AUTHOR = "Narasimhan, M. and Schwing, A.G.", TITLE = "Straight to the Facts: Learning Knowledge Base Retrieval for Factual Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VIII: 460-477", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193935"} @inproceedings{bb198735, AUTHOR = "Malinowski, M. and Doersch, C. and Santoro, A. and Battaglia, P.", TITLE = "Learning Visual Question Answering by Bootstrapping Hard Attention", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VI: 3-20", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193936"} @inproceedings{bb198736, AUTHOR = "Gu, J.X. and Cai, J.F. and Joty, S. and Niu, L. and Wang, G.", TITLE = "Look, Imagine and Match: Improving Textual-Visual Cross-Modal Retrieval with Generative Models", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7181-7189", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193937"} @inproceedings{bb198737, AUTHOR = "Li, Q. and Tao, Q.Y. and Joty, S. and Cai, J.F. and Luo, J.B.", TITLE = "VQA-E: Explaining, Elaborating, and Enhancing Your Answers for Visual Questions", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VII: 570-586", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193938"} @inproceedings{bb198738, AUTHOR = "Bai, Y.L. and Fu, J.L. and Zhao, T.J. and Mei, T.", TITLE = "Deep Attention Neural Tensor Network for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XII: 21-37", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193939"} @inproceedings{bb198739, AUTHOR = "Sinha, A. and Ayush, K.", TITLE = "Towards Mathematical Reasoning: A Multimodal Deep Learning Approach", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "4028-4032", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193940"} @inproceedings{bb198740, AUTHOR = "Yu, D. and Gao, X. and Xiong, H.", TITLE = "Structured Semantic Representation for Visual Question Answering", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "2286-2290", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193941"} @inproceedings{bb198741, AUTHOR = "Huang, L. and Kulkarni, K. and Jha, A. and Lohit, S. and Jayasuriya, S. and Turaga, P.K.", TITLE = "CS-VQA: Visual Question Answering with Compressively Sensed Images", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "1283-1287", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193942"} @inproceedings{bb198742, AUTHOR = "Desta, M.T. and Chen, L. and Kornuta, T.", TITLE = "Object-Based Reasoning in VQA", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1814-1823", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193943"} @inproceedings{bb198743, AUTHOR = "Zhao, H. and Fan, Q. and Gutfreund, D. and Fu, Y.", TITLE = "Semantically Guided Visual Question Answering", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1852-1860", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193944"} @inproceedings{bb198744, AUTHOR = "Wang, Z. and Liu, X. and Wang, L. and Qiao, Y. and Xie, X. and Fowlkes, C.C.", TITLE = "Structured Triplet Learning with POS-Tag Guided Attention for Visual Question Answering", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1888-1896", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193945"} @inproceedings{bb198745, AUTHOR = "Chowdhury, I. and Nguyen, K. and Fookes, C. and Sridharan, S.", TITLE = "A cascaded long short-term memory (LSTM) driven generic visual question answering (VQA)", BOOKTITLE = ICIP17, YEAR = "2017", PAGES = "1842-1846", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193946"} @inproceedings{bb198746, AUTHOR = "Sheng, S. and Venkitasubramanian, A.N. and Moens, M.F.", TITLE = "A Markov Network Based Passage Retrieval Method for Multimodal Question Answering in the Cultural Heritage Domain", BOOKTITLE = MMMod18, YEAR = "2018", PAGES = "I:3-15", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193947"} @inproceedings{bb198747, AUTHOR = "Rosso Mateus, A. and Gonzalez, F.A. and Montes y Gomez, M.", TITLE = "A Two-Step Neural Network Approach to Passage Retrieval for Open Domain Question Answering", BOOKTITLE = CIARP17, YEAR = "2017", PAGES = "566-574", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193948"} @inproceedings{bb198748, AUTHOR = "Gupta, T. and Shih, K.J. and Singh, S. and Hoiem, D.", TITLE = "Aligned Image-Word Representations Improve Inductive Transfer Across Vision-Language Tasks", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "4223-4232", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193949"} @inproceedings{bb198749, AUTHOR = "Yu, Z. and Yu, J. and Fan, J. and Tao, D.", TITLE = "Multi-modal Factorized Bilinear Pooling with Co-attention Learning for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1839-1848", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193950"} @inproceedings{bb198750, AUTHOR = "Ben younes, H. and Cadene, R. and Cord, M. and Thome, N.", TITLE = "MUTAN: Multimodal Tucker Fusion for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "2631-2639", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193951"} @inproceedings{bb198751, AUTHOR = "Zhu, C. and Zhao, Y. and Huang, S. and Tu, K. and Ma, Y.", TITLE = "Structured Attentions for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1300-1309", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193952"} @inproceedings{bb198752, AUTHOR = "Hu, R. and Andreas, J. and Rohrbach, M. and Darrell, T.J. and Saenko, K.", TITLE = "Learning to Reason: End-to-End Module Networks for Visual Question Answering", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "804-813", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193953"} @inproceedings{bb198753, AUTHOR = "Jain, U. and Zhang, Z.Y. and Schwing, A.", TITLE = "Creativity: Generating Diverse Questions Using Variational Autoencoders", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "5415-5424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193954"} @inproceedings{bb198754, AUTHOR = "Zhu, Y. and Lim, J.J. and Fei Fei, L.", TITLE = "Knowledge Acquisition for Visual Question Answering via Iterative Querying", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "6146-6155", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193955"} @inproceedings{bb198755, AUTHOR = "Peris, A. and Casacuberta, F.", TITLE = "Interactive-Predictive Neural Multimodal Systems", BOOKTITLE = "IbPRIA", PAGES = "I:16-28", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193956"} @inproceedings{bb198756, AUTHOR = "Bolanos, M. and Peris, A. and Casacuberta, F. and Radeva, P.", TITLE = "VIBIKNet: Visual Bidirectional Kernelized Network for Visual Question Answering", BOOKTITLE = IbPRIA17, YEAR = "2017", PAGES = "372-380", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193957"} @inproceedings{bb198757, AUTHOR = "Gao, P. and Li, H.S. and Li, S. and Lu, P. and Li, Y.K. and Hoi, S.C.H. and Wang, X.G.", TITLE = "Question-Guided Hybrid Convolution for Visual Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "I: 485-501", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193958"} @inproceedings{bb198758, AUTHOR = "Uehara, K. and Duan, N. and Harada, T.", TITLE = "Learning to Ask Informative Sub-Questions for Visual Question Answering", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4680-4689", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193959"} @inproceedings{bb198759, AUTHOR = "Li, Y.K. and Duan, N. and Zhou, B.L. and Chu, X. and Ouyang, W.L. and Wang, X.G. and Zhou, M.", TITLE = "Visual Question Generation as Dual Task of Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6116-6124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193960"} @inproceedings{bb198760, AUTHOR = "Gao, P. and Jiang, Z.K. and You, H.X. and Lu, P. and Hoi, S.C.H. and Wang, X.G. and Li, H.S.", TITLE = "Dynamic Fusion With Intra- and Inter-Modality Attention Flow for Visual Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6632-6641", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193961"} @inproceedings{bb198761, AUTHOR = "Lin, Y.T. and Pang, Z.Y. and Li, Y. and Wang, D.H.", TITLE = "Simple and effective visual question answering in a single modality", BOOKTITLE = ICIP16, YEAR = "2016", PAGES = "2276-2280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193962"} @inproceedings{bb198762, AUTHOR = "Kafle, K. and Kanan, C.", TITLE = "An Analysis of Visual Question Answering Algorithms", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1983-1991", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193963"} @inproceedings{bb198763, AUTHOR = "Kafle, K. and Kanan, C.", TITLE = "Answer-Type Prediction for Visual Question Answering", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "4976-4984", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193964"} @inproceedings{bb198764, AUTHOR = "Wang, P. and Wu, Q. and Shen, C. and van den Hengel, A.J.", TITLE = "The VQA-Machine: Learning How to Use Existing Vision Algorithms to Answer New Questions", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3909-3918", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193965"} @inproceedings{bb198765, AUTHOR = "Yu, D. and Fu, J. and Mei, T. and Rui, Y.", TITLE = "Multi-level Attention Networks for Visual Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4187-4195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193966"} @inproceedings{bb198766, AUTHOR = "Kembhavi, A. and Seo, M. and Schwenk, D. and Choi, J. and Farhadi, A. and Hajishirzi, H.", TITLE = "Are You Smarter Than a Sixth Grader? Textbook Question Answering for Multimodal Machine Comprehension", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "5376-5384", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193967"} @inproceedings{bb198767, AUTHOR = "Ganju, S. and Russakovsky, O. and Gupta, A.", TITLE = "What's in a Question: Using Visual Questions as a Form of Supervision", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "6422-6431", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193968"} @inproceedings{bb198768, AUTHOR = "Ramakrishnan, S.K. and Pal, A. and Sharma, G. and Mittal, A.", TITLE = "An Empirical Evaluation of Visual Question Answering for Novel Objects", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "7312-7321", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193969"} @inproceedings{bb198769, AUTHOR = "Xu, H.J. and Saenko, K.", TITLE = "Ask, Attend and Answer: Exploring Question-Guided Spatial Attention for Visual Question Answering", BOOKTITLE = ECCV16, YEAR = "2016", PAGES = "VII: 451-466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193970"} @inproceedings{bb198770, AUTHOR = "Jabri, A. and Joulin, A. and van der Maaten, L.", TITLE = "Revisiting Visual Question Answering Baselines", BOOKTITLE = ECCV16, YEAR = "2016", PAGES = "VIII: 727-739", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193971"} @inproceedings{bb198771, AUTHOR = "Yang, Z.C. and He, X.D. and Gao, J.F. and Deng, L. and Smola, A.", TITLE = "Stacked Attention Networks for Image Question Answering", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "21-29", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193972"} @inproceedings{bb198772, AUTHOR = "Sadeghi, F. and Divvala, S.K. and Farhadi, A.", TITLE = "VisKE: Visual knowledge extraction and question answering by visual verification of relation phrases", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "1456-1464", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193973"} @inproceedings{bb198773, AUTHOR = "Liu, Y. and Liu, J. and Wang, D. and Cheng, J.", TITLE = "A robust multivariate reranking algorithm for Question Answering enrichment", BOOKTITLE = ICIP12, YEAR = "2012", PAGES = "1917-1920", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193974"} @inproceedings{bb198774, AUTHOR = "Varekamp, C. and van de Walle, P. and de Putter, M.", TITLE = "Question interface for 3D picture creation on an autostereoscopic digital picture frame", BOOKTITLE = "3DTV09", YEAR = "2009", PAGES = "1-4", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqa1.html#TT193975"} @article{bb198775, AUTHOR = "Wu, Y.C. and Yang, J.C.", TITLE = "A Robust Passage Retrieval Algorithm for Video Question Answering", JOURNAL = CirSysVideo, VOLUME = "18", YEAR = "2008", NUMBER = "10", MONTH = "October", PAGES = "1411-1421", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193976"} @inproceedings{bb198776, AUTHOR = "Wu, Y.C. and Lee, Y.S. and Yang, J.C. and Yen, S.J.", TITLE = "A New Passage Ranking Algorithm for Video Question Answering", BOOKTITLE = PSIVT06, YEAR = "2006", PAGES = "563-572", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193977"} @article{bb198777, AUTHOR = "Li, G.D. and Li, H.J. and Ming, Z.Y. and Hong, R.C. and Tang, S. and Chua, T.S.", TITLE = "Question Answering over Community-Contributed Web Videos", JOURNAL = MultMedMag, VOLUME = "17", YEAR = "2010", NUMBER = "4", MONTH = "October", PAGES = "46-57", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193978"} @inproceedings{bb198778, AUTHOR = "Song, Y.C. and Li, H.J.", TITLE = "Mash-Up Approach for Web Video Category Recommendation", BOOKTITLE = PSIVT10, YEAR = "2010", PAGES = "197-202", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193979"} @article{bb198779, AUTHOR = "Guo, Z.Y. and Zhao, Z. and Jin, W. and Wei, Z.C. and Yang, M. and Wang, N.N. and Yuan, N.J.", TITLE = "Multi-Turn Video Question Generation via Reinforced Multi-Choice Attention Network", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "5", PAGES = "1697-1710", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193980"} @article{bb198780, AUTHOR = "Xue, H.Y. and Chu, W. and Zhao, Z. and Cai, D.", TITLE = "A Better Way to Attend: Attention With Trees for Video Question Answering", JOURNAL = IP, VOLUME = "27", YEAR = "2018", NUMBER = "11", MONTH = "November", PAGES = "5563-5574", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193981"} @article{bb198781, AUTHOR = "Xue, H.Y. and Zhao, Z. and Cai, D.", TITLE = "Unifying the Video and Question Attentions for Open-Ended Video Question Answering", JOURNAL = IP, VOLUME = "26", YEAR = "2017", NUMBER = "12", MONTH = "December", PAGES = "5656-5666", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193982"} @article{bb198782, AUTHOR = "Zhao, Z. and Xiao, S.W. and Song, Z. and Lu, C.J. and Xiao, J. and Zhuang, Y.T.", TITLE = "Open-Ended Video Question Answering via Multi-Modal Conditional Adversarial Networks", JOURNAL = IP, VOLUME = "29", YEAR = "2020", PAGES = "3859-3870", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193983"} @article{bb198783, AUTHOR = "Zhao, Z. and Zhang, Z. and Xiao, S.W. and Xiao, Z.X. and Yan, X.H. and Yu, J. and Cai, D. and Wu, F.", TITLE = "Long-Form Video Question Answering via Dynamic Hierarchical Reinforced Networks", JOURNAL = IP, VOLUME = "28", YEAR = "2019", NUMBER = "12", MONTH = "December", PAGES = "5939-5952", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193984"} @article{bb198784, AUTHOR = "Yu, T. and Yu, J. and Yu, Z. and Huang, Q.M. and Tian, Q.", TITLE = "Long-Term Video Question Answering via Multimodal Hierarchical Memory Attentive Networks", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "3", MONTH = "March", PAGES = "931-944", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193985"} @article{bb198785, AUTHOR = "Jang, Y. and Song, Y. and Kim, C.D. and Yu, Y. and Kim, Y. and Kim, G.", TITLE = "Video Question Answering with Spatio-Temporal Reasoning", JOURNAL = IJCV, VOLUME = "127", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "1385-1412", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193986"} @inproceedings{bb198786, AUTHOR = "Jang, Y. and Song, Y. and Yu, Y. and Kim, Y. and Kim, G.", TITLE = "TGIF-QA: Toward Spatio-Temporal Reasoning in Visual Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "1359-1367", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193987"} @article{bb198787, AUTHOR = "Yu, T. and Yu, J. and Yu, Z. and Tao, D.", TITLE = "Compositional Attention Networks With Two-Stream Fusion for Video Question Answering", JOURNAL = IP, VOLUME = "29", YEAR = "2020", NUMBER = "", PAGES = "1204-1218", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193988"} @article{bb198788, AUTHOR = "Wang, W.N. and Huang, Y. and Wang, L.", TITLE = "Long video question answering: A Matching-guided Attention Model", JOURNAL = PR, VOLUME = "102", YEAR = "2020", PAGES = "107248", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193989"} @article{bb198789, AUTHOR = "Zhang, W. and Tang, S. and Cao, Y. and Pu, S. and Wu, F. and Zhuang, Y.", TITLE = "Frame Augmented Alternating Attention Network for Video Question Answering", JOURNAL = MultMed, VOLUME = "22", YEAR = "2020", NUMBER = "4", MONTH = "April", PAGES = "1032-1041", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193990"} @article{bb198790, AUTHOR = "Chen, J. and Shao, J. and He, C.", TITLE = "Movie fill in the blank by joint learning from video and text with adaptive temporal attention", JOURNAL = PRL, VOLUME = "132", YEAR = "2020", PAGES = "62-68", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193991"} @article{bb198791, AUTHOR = "Wang, A. and Luu, A.T. and Foo, C. and Zhu, H. and Tay, Y. and Chandrasekhar, V.", TITLE = "Holistic Multi-Modal Memory Network for Movie Question Answering", JOURNAL = IP, VOLUME = "29", YEAR = "2020", NUMBER = "1", PAGES = "489-499", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193992"} @article{bb198792, AUTHOR = "Yuan, Z.Q. and Sun, S.Y. and Duan, L.X. and Li, C.S. and Wu, X. and Xu, C.S.", TITLE = "Adversarial Multimodal Network for Movie Story Question Answering", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "1744-1756", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193993"} @article{bb198793, AUTHOR = "Gu, M. and Zhao, Z. and Jin, W. and Hong, R. and Wu, F.", TITLE = "Graph-Based Multi-Interaction Network for Video Question Answering", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "2758-2770", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193994"} @article{bb198794, AUTHOR = "Xie, Z. and Wu, K.W. and Zhang, X.Y. and Yang, X.M. and Hou, J.K.", TITLE = "Learning continuous temporal embedding of videos using pattern theory", JOURNAL = PRL, VOLUME = "146", YEAR = "2021", PAGES = "222-229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193995"} @article{bb198795, AUTHOR = "Liu, Y. and Zhang, X.M. and Zhang, Q.Y. and Li, C.Z. and Huang, F. and Tang, X.H. and Li, Z.J.", TITLE = "Dual self-attention with co-attention networks for visual question answering", JOURNAL = PR, VOLUME = "117", YEAR = "2021", PAGES = "107956", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193996"} @article{bb198796, AUTHOR = "Liu, Y. and Zhang, X.M. and Huang, F. and Shen, S.X. and Tian, P. and Li, L. and Li, Z.J.", TITLE = "Dynamic Self-Attention with Vision Synchronization Networks for Video Question Answering", JOURNAL = PR, VOLUME = "132", YEAR = "2022", PAGES = "108959", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193997"} @article{bb198797, AUTHOR = "Liu, Y. and Zhang, X.M. and Huang, F. and Zhang, B. and Li, Z.J.", TITLE = "Cross-Attentional Spatio-Temporal Semantic Graph Networks for Video Question Answering", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "1684-1696", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193998"} @article{bb198798, AUTHOR = "Jin, W. and Zhao, Z. and Cao, X.C. and Zhu, J.M. and He, X.Q. and Zhuang, Y.T.", TITLE = "Adaptive Spatio-Temporal Graph Enhanced Vision-Language Representation for Video QA", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "5477-5489", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT193999"} @article{bb198799, AUTHOR = "Gao, L. and Chen, T.M. and Li, X.P. and Zeng, P.P. and Zhao, L. and Li, Y.F.", TITLE = "Generalized pyramid co-attention with learnable aggregation net for video question answering", JOURNAL = PR, VOLUME = "120", YEAR = "2021", PAGES = "108145", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT194000"}