@inproceedings{bb217800, AUTHOR = "Kim, K.M. and Choi, S.H. and Kim, J.H. and Zhang, B.T.", TITLE = "Multimodal Dual Attention Memory for Video Story Question Answering", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XV: 698-713", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT212849"} @inproceedings{bb217801, AUTHOR = "Yu, Y.J. and Kim, J.S. and Kim, G.", TITLE = "A Joint Sequence Fusion Model for Video Question Answering and Retrieval", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VII: 487-503", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT212850"} @inproceedings{bb217802, AUTHOR = "Hasan Chowdhury, M.I. and Nguyen, K. and Sridharan, S. and Fookes, C.", TITLE = "Hierarchical Relational Attention for Video Question Answering", BOOKTITLE = ICIP18, YEAR = "2018", PAGES = "599-603", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT212851"} @inproceedings{bb217803, AUTHOR = "Mun, J. and Seo, P.H. and Jung, I. and Han, B.H.", TITLE = "MarioQA: Answering Questions by Watching Gameplay Videos", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "2886-2894", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT212852"} @inproceedings{bb217804, AUTHOR = "Yu, Y. and Ko, H. and Choi, J. and Kim, G.", TITLE = "End-to-End Concept Word Detection for Video Captioning, Retrieval, and Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3261-3269", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidq2.html#TT212853"} @article{bb217805, AUTHOR = "Kafle, K. and Kanan, C.", TITLE = "Visual question answering: Datasets, algorithms, and future challenges", JOURNAL = CVIU, VOLUME = "163", YEAR = "2017", NUMBER = "1", PAGES = "3-20", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212856"} @article{bb217806, AUTHOR = "Wu, Q. and Teney, D. and Wang, P. and Shen, C.H. and Dick, A. and van den Hengel, A.J.", TITLE = "Visual question answering: A survey of methods and datasets", JOURNAL = CVIU, VOLUME = "163", YEAR = "2017", NUMBER = "1", PAGES = "21-40", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212857"} @article{bb217807, AUTHOR = "Teney, D. and Wu, Q. and van den Hengel, A.J.", TITLE = "Visual Question Answering: A Tutorial", JOURNAL = SPMag, VOLUME = "34", YEAR = "2017", NUMBER = "6", MONTH = "November", PAGES = "63-75", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212858"} @inproceedings{bb217808, AUTHOR = "Teney, D. and Liu, L. and van den Hengel, A.J.", TITLE = "Graph-Structured Representations for Visual Question Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "3233-3241", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212859"} @inproceedings{bb217809, AUTHOR = "Teney, D. and van den Hengel, A.J.", TITLE = "Visual Question Answering as a Meta Learning Task", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XV: 229-245", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212860"} @inproceedings{bb217810, AUTHOR = "Teney, D. and Abbasnejad, E. and van den Hengel, A.J.", TITLE = "Unshuffling Data for Improved Generalization in Visual Question Answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1397-1407", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212861"} @article{bb217811, AUTHOR = "Wu, Q. and Shen, C.H. and Wang, P. and Dick, A. and van den Hengel, A.J.", TITLE = "Image Captioning and Visual Question Answering Based on Attributes and External Knowledge", JOURNAL = PAMI, VOLUME = "40", YEAR = "2018", NUMBER = "6", MONTH = "June", PAGES = "1367-1381", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212862"} @inproceedings{bb217812, AUTHOR = "Wu, Q. and Wang, P. and Shen, C.H. and Dick, A. and van den Hengel, A.J.", TITLE = "Ask Me Anything: Free-Form Visual Question Answering Based on Knowledge from External Sources", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "4622-4630", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212863"} @article{bb217813, AUTHOR = "Tommasi, T. and Mallya, A. and Plummer, B.A. and Lazebnik, S. and Berg, A.C. and Berg, T.L.", TITLE = "Combining Multiple Cues for Visual Madlibs Question Answering", JOURNAL = IJCV, VOLUME = "127", YEAR = "2019", NUMBER = "1", MONTH = "January", PAGES = "38-60", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212864"} @inproceedings{bb217814, AUTHOR = "Tommasi, T. and Mallya, A. and Plummer, B.A. and Lazebnik, S. and Berg, A.C. and Berg, T.L.", TITLE = "Solving Visual Madlibs with Multiple Cues", BOOKTITLE = BMVC16, YEAR = "2016", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212865"} @inproceedings{bb217815, AUTHOR = "Yu, L.C. and Park, E. and Berg, A.C. and Berg, T.L.", TITLE = "Visual Madlibs: Fill in the Blank Description Generation and Question Answering", BOOKTITLE = ICCV15, YEAR = "2015", PAGES = "2461-2469", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212866"} @article{bb217816, AUTHOR = "Liu, F. and Xiang, T. and Hospedales, T.M. and Yang, W.K. and Sun, C.Y.", TITLE = "Inverse Visual Question Answering: A New Benchmark and VQA Diagnosis Tool", JOURNAL = PAMI, VOLUME = "42", YEAR = "2020", NUMBER = "2", MONTH = "February", PAGES = "460-474", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212867"} @inproceedings{bb217817, AUTHOR = "Liu, F. and Xiang, T. and Hospedales, T.M. and Yang, W.K. and Sun, C.Y.", TITLE = "iVQA: Inverse Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "8611-8619", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212868"} @article{bb217818, AUTHOR = "Patil, C. and Patwardhan, M.", TITLE = "Visual Question Generation: The State of the Art", JOURNAL = Surveys, VOLUME = "53", YEAR = "2020", NUMBER = "3", MONTH = "May", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212869"} @article{bb217819, AUTHOR = "He, F.J. and Wang, Y.X. and Miao, X.L. and Sun, X.", TITLE = "Interpretable visual reasoning: A survey", JOURNAL = IVC, VOLUME = "112", YEAR = "2021", PAGES = "104194", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212870"} @article{bb217820, AUTHOR = "Sharma, H. and Jalal, A.S.", TITLE = "A survey of methods, datasets and evaluation metrics for visual question answering", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104327", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212871"} @article{bb217821, AUTHOR = "Yang, L. and Jiang, H. and Song, Q. and Guo, J.", TITLE = "A Survey on Long-Tailed Visual Recognition", JOURNAL = IJCV, VOLUME = "130", YEAR = "2022", NUMBER = "7", MONTH = "July", PAGES = "1837-1872", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212872"} @article{bb217822, AUTHOR = "Zhao, W.L. and Rao, Y.M. and Tang, Y.S. and Zhou, J. and Lu, J.W.", TITLE = "VideoABC: A Real-World Video Dataset for Abductive Visual Reasoning", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "6048-6061", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212873"} @article{bb217823, AUTHOR = "Lahouti, F. and Kostina, V. and Hassibi, B.", TITLE = "How to Query an Oracle? Efficient Strategies to Label Data", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "11", MONTH = "November", PAGES = "7597-7609", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212874"} @article{bb217824, AUTHOR = "Ma, J. and Wang, P.H. and Kong, D.C. and Wang, Z.W. and Liu, J. and Pei, H.B. and Zhao, J.Z.", TITLE = "Robust Visual Question Answering: Datasets, Methods, and Future Challenges", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "8", MONTH = "August", PAGES = "5575-5594", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212875"} @article{bb217825, AUTHOR = "Li, K. and Vosselman, G. and Yang, M.Y.", TITLE = "HRVQA: A Visual Question Answering benchmark for high-resolution aerial images", JOURNAL = PandRS, VOLUME = "214", YEAR = "2024", PAGES = "65-81", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212876"} @inproceedings{bb217826, AUTHOR = "Singh, M. and Patvardhan, C. and Lakshmi, C.V.", TITLE = "Does ChatGPT Spell the End of Automatic Question Generation Research?", BOOKTITLE = ICCVMI23, YEAR = "2023", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212877"} @inproceedings{bb217827, AUTHOR = "Zhu, L. and Ning, R. and Li, J. and Xin, C.S. and Wu, H.Y.", TITLE = "Most and Least Retrievable Images in Visual-Language Query Systems", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:1-18", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212878"} @inproceedings{bb217828, AUTHOR = "Salewski, L. and Emde, C. and Do, V. and Akata, Z. and Lukasiewicz, T.", TITLE = "e-ViL: A Dataset and Benchmark for Natural Language Explanations in Vision-Language Tasks", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1224-1234", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212879"} @inproceedings{bb217829, AUTHOR = "Gupta, V. and Patro, B.N. and Parihar, H. and Namboodiri, V.P.", TITLE = "VQuAD: Video Question Answering Diagnostic Dataset", BOOKTITLE = Novelty22, YEAR = "2022", PAGES = "282-291", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212880"} @inproceedings{bb217830, AUTHOR = "Nishimura, T. and Sakoda, K. and Hashimoto, A. and Ushiku, Y. and Tanaka, N. and Ono, F. and Kameko, H. and Mori, S.", TITLE = "Egocentric Biochemical Video-and-Language Dataset", BOOKTITLE = CLVL21, YEAR = "2021", PAGES = "3122-3126", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212881"} @inproceedings{bb217831, AUTHOR = "Zhang, M. and Maidment, T. and Diab, A. and Kovashka, A. and Hwa, R.", TITLE = "Domain-robust VQA with diverse datasets and methods but no target labels", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7042-7052", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212882"} @inproceedings{bb217832, AUTHOR = "Mathew, M. and Karatzas, D. and Jawahar, C.V.", TITLE = "DocVQA: A Dataset for VQA on Document Images", BOOKTITLE = WACV21, YEAR = "2021", PAGES = "2199-2208", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212883"} @inproceedings{bb217833, AUTHOR = "Patel, D. and Parikh, R. and Shastri, Y.", TITLE = "Recent Advances in Video Question Answering: A Review of Datasets and Methods", BOOKTITLE = VTIUR20, YEAR = "2020", PAGES = "339-356", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212884"} @inproceedings{bb217834, AUTHOR = "Fan, C.", TITLE = "EgoVQA: An Egocentric Video Question Answering Benchmark Dataset", BOOKTITLE = EPIC19, YEAR = "2019", PAGES = "4359-4366", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212885"} @inproceedings{bb217835, AUTHOR = "Hudson, D.A. and Manning, C.D.", TITLE = "GQA: A New Dataset for Real-World Visual Reasoning and Compositional Question Answering", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6693-6702", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212886"} @inproceedings{bb217836, AUTHOR = "Yang, G.Y.R. and Ganichev, I. and Wang, X.J. and Shlens, J. and Sussillo, D.", TITLE = "A Dataset and Architecture for Visual Reasoning with a Working Memory", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "X: 729-745", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212887"} @inproceedings{bb217837, AUTHOR = "Gan, C. and Li, Y. and Li, H. and Sun, C. and Gong, B.", TITLE = "VQS: Linking Segmentations to Questions and Answers for Supervised Attention in VQA and Question-Focused Semantic Segmentation", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "1829-1838", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212888"} @inproceedings{bb217838, AUTHOR = "Maharaj, T. and Ballas, N. and Rohrbach, A. and Courville, A. and Pal, C.", TITLE = "A Dataset and Exploration of Models for Understanding Video Data through Fill-in-the-Blank Question-Answering", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "7359-7368", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vqds43.html#TT212889"} @article{bb217839, AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Lee, S. and Moura, J.M.F. and Parikh, D. and Batra, D.", TITLE = "Visual Dialog", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "5", MONTH = "May", PAGES = "1242-1256", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212890"} @article{bb217840, AUTHOR = "Zhao, Z. and Zhang, Z. and Jiang, X.H. and Cai, D.", TITLE = "Multi-Turn Video Question Answering via Hierarchical Attention Context Reinforced Networks", JOURNAL = IP, VOLUME = "28", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "3860-3872", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212891"} @article{bb217841, AUTHOR = "Gu, M. and Zhao, Z. and Jin, W. and Cai, D. and Wu, F.", TITLE = "Video Dialog via Multi-Grained Convolutional Self-Attention Context Multi-Modal Networks", JOURNAL = CirSysVideo, VOLUME = "30", YEAR = "2020", NUMBER = "12", MONTH = "December", PAGES = "4453-4466", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212892"} @article{bb217842, AUTHOR = "Guo, D. and Wang, H. and Wang, S. and Wang, M.", TITLE = "Textual-Visual Reference-Aware Attention Network for Visual Dialog", JOURNAL = IP, VOLUME = "29", YEAR = "2020", PAGES = "6655-6666", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212893"} @article{bb217843, AUTHOR = "Patro, B.N. and Anupriy and Namboodiri, V.P.", TITLE = "Probabilistic framework for solving visual dialog", JOURNAL = PR, VOLUME = "110", YEAR = "2021", PAGES = "107586", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212894"} @article{bb217844, AUTHOR = "Zhao, L. and Lyu, X.Y. and Song, J.K. and Gao, L.L.", TITLE = "GuessWhich? Visual dialog with attentive memory network", JOURNAL = PR, VOLUME = "114", YEAR = "2021", PAGES = "107823", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212895"} @article{bb217845, AUTHOR = "Jiang, T.L. and Shao, H.L. and Tian, X. and Ji, Y. and Liu, C.P.", TITLE = "Aligning vision-language for graph inference in visual dialog", JOURNAL = IVC, VOLUME = "116", YEAR = "2021", PAGES = "104316", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212896"} @article{bb217846, AUTHOR = "Guo, D. and Wang, H. and Wang, M.", TITLE = "Context-Aware Graph Inference With Knowledge Distillation for Visual Dialog", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "10", MONTH = "October", PAGES = "6056-6073", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212897"} @inproceedings{bb217847, AUTHOR = "Guo, D. and Wang, H. and Zhang, H.W. and Zha, Z.J. and Wang, M.", TITLE = "Iterative Context-Aware Graph Inference for Visual Dialog", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10052-10061", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212898"} @article{bb217848, AUTHOR = "Patro, B.N. and Anupriy and Namboodiri, V.P.", TITLE = "Explanation vs. attention: A two-player game to obtain attention for VQA and visual dialog", JOURNAL = PR, VOLUME = "132", YEAR = "2022", PAGES = "108898", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212899"} @article{bb217849, AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.", TITLE = "Saying the Unseen: Video Descriptions via Dialog Agents", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "10", MONTH = "October", PAGES = "7190-7204", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212900"} @article{bb217850, AUTHOR = "Huang, Y. and Wang, Y.M. and Wang, L.", TITLE = "Efficient Image and Sentence Matching", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "3", MONTH = "March", PAGES = "2970-2983", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212901"} @article{bb217851, AUTHOR = "Zhao, L. and Li, J.L. and Gao, L.L. and Rao, Y. and Song, J.K. and Shen, H.T.", TITLE = "Heterogeneous Knowledge Network for Visual Dialog", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "2", MONTH = "February", PAGES = "861-871", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212902"} @article{bb217852, AUTHOR = "Bucinca, Z. and Yemez, Y. and Erzin, E. and Sezgin, M.", TITLE = "AffectON: Incorporating Affect Into Dialog Generation", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "823-835", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212903"} @article{bb217853, AUTHOR = "Yu, H. and Ko, Y.J.", TITLE = "Enriching the dialogue state tracking model with a asyntactic discourse graph", JOURNAL = PRL, VOLUME = "169", YEAR = "2023", PAGES = "81-86", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212904"} @article{bb217854, AUTHOR = "Wu, Y.X. and Liao, L. and Zhang, G.Y. and Lei, W.Q. and Zhao, G.S. and Qian, X.M. and Chua, T.S.", TITLE = "State Graph Reasoning for Multimodal Conversational Recommendation", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3113-3124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212905"} @article{bb217855, AUTHOR = "Firdaus, M. and Thangavelu, N. and Ekbal, A. and Bhattacharyya, P.", TITLE = "I Enjoy Writing and Playing, Do You?: A Personalized and Emotion Grounded Dialogue Agent Using Generative Adversarial Network", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "3", MONTH = "July", PAGES = "2127-2138", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212906"} @article{bb217856, AUTHOR = "Zhang, Z. and Li, S. and Ji, Y. and Liu, C.P.", TITLE = "Infer unseen from seen: Relation regularized zero-shot visual dialog", JOURNAL = JVCIR, VOLUME = "97", YEAR = "2023", PAGES = "103961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212907"} @article{bb217857, AUTHOR = "Qi, Q.S. and Zhang, A. and Liao, Y. and Sun, W.Y. and Wang, Y.L. and Li, X.B. and Liu, S.", TITLE = "Simultaneously Training and Compressing Vision-and-Language Pre-Training Model", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "8194-8203", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212908"} @article{bb217858, AUTHOR = "Liu, A.A. and Huang, C.X. and Xu, N. and Tian, H. and Liu, J. and Zhang, Y.D.", TITLE = "Counterfactual Visual Dialog: Robust Commonsense Knowledge Learning From Unbiased Training", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "1639-1651", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212909"} @article{bb217859, AUTHOR = "Ricci, R. and Bazi, Y. and Melgani, F.", TITLE = "Machine-to-Machine Visual Dialoguing with ChatGPT for Enriched Textual Image Description", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "3", PAGES = "441", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212910"} @article{bb217860, AUTHOR = "Bulat, A. and Tzimiropoulos, G.", TITLE = "Language-Aware Soft Prompting: Text-to-Text Optimization for Few- and Zero-Shot Adaptation of V&L Models", JOURNAL = IJCV, VOLUME = "132", YEAR = "2024", NUMBER = "4", MONTH = "April", PAGES = "1108-1125", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212911"} @inproceedings{bb217861, AUTHOR = "Bulat, A. and Tzimiropoulos, G.", TITLE = "LASP: Text-to-Text Optimization for Language-Aware Soft Prompting of Vision and Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23232-23241", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212912"} @article{bb217862, AUTHOR = "Wang, A.J.P. and Zhou, P. and Shou, M.Z. and Yan, S.C.", TITLE = "Enhancing Visual Grounding in Vision-Language Pre-Training With Position-Guided Text Prompts", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "5", MONTH = "May", PAGES = "3406-3421", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212913"} @inproceedings{bb217863, AUTHOR = "Wang, A.J.P. and Zhou, P. and Shou, M.Z. and Yan, S.C.", TITLE = "Position-Guided Text Prompt for Vision-Language Pre-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23242-23251", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212914"} @article{bb217864, AUTHOR = "Du, S.S. and Wang, H. and Li, T. and Chen, C.W.", TITLE = "Hybrid Graph Reasoning With Dynamic Interaction for Visual Dialog", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "9095-9108", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212915"} @article{bb217865, AUTHOR = "Sun, J.T. and Kou, J.Y. and Hou, W. and Bai, Y.", TITLE = "A multi-agent curiosity reward model for task-oriented dialogue systems", JOURNAL = PR, VOLUME = "157", YEAR = "2025", PAGES = "110884", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212916"} @article{bb217866, AUTHOR = "Kane, B. and Giugno, C. and Schubert, L. and Haut, K. and Wohn, C. and Hoque, E.", TITLE = "Managing Emotional Dialogue for a Virtual Cancer Patient: A Schema-Guided Approach", JOURNAL = AffCom, VOLUME = "15", YEAR = "2024", NUMBER = "3", MONTH = "July", PAGES = "1041-1052", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212917"} @inproceedings{bb217867, AUTHOR = "Abdessaied, A. and Shi, L. and Bulling, A.", TITLE = "VD-GR: Boosting Visual Dialog with Cascaded Spatial-Temporal Multi-Modal GRaphs", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "5793-5802", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212918"} @inproceedings{bb217868, AUTHOR = "Han, S.J. and Hessel, J. and Dziri, N. and Choi, Y. and Yu, Y.J.", TITLE = "Champagne: Learning Real-world Conversation from Large-Scale Web Videos", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15452-15463", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212919"} @inproceedings{bb217869, AUTHOR = "Oshima, R. and Shinagawa, S. and Tsunashima, H. and Feng, Q. and Morishima, S.", TITLE = "Pointing out Human Answer Mistakes in a Goal-Oriented Visual Dialogue", BOOKTITLE = VLAR23, YEAR = "2023", PAGES = "4665-4670", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212920"} @inproceedings{bb217870, AUTHOR = "Ishii, T. and Miura, J. and Hayashi, K.", TITLE = "Enhancing Human-Robot Collaborative Object Search through Human Behavior Observation and Dialog", BOOKTITLE = ACVR23, YEAR = "2023", PAGES = "1841-1848", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212921"} @inproceedings{bb217871, AUTHOR = "Madasu, A. and Lal, V.", TITLE = "Is Multimodal Vision Supervision Beneficial to Language?", BOOKTITLE = NFVLR23, YEAR = "2023", PAGES = "2637-2642", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212922"} @inproceedings{bb217872, AUTHOR = "Ashutosh, K. and Girdhar, R. and Torresani, L. and Grauman, K.", TITLE = "HierVL: Learning Hierarchical Video-Language Embeddings", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23066-23078", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212923"} @inproceedings{bb217873, AUTHOR = "Smith, J.S. and Cascante Bonilla, P. and Arbelle, A. and Kim, D.H. and Panda, R. and Cox, D. and Yang, D. and Kira, Z. and Feris, R.S. and Karlinsky, L.", TITLE = "ConStruct-VL: Data-Free Continual Structured VL Concepts Learning*", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14994-15004", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212924"} @inproceedings{bb217874, AUTHOR = "Chen, Y.X. and Ma, Z.Y. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.", TITLE = "ViLEM: Visual-Language Error Modeling for Image-Text Retrieval", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11018-11027", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212925"} @inproceedings{bb217875, AUTHOR = "Huang, J.J. and Li, Y. and Feng, J.S. and Wu, X.L. and Sun, X.S. and Ji, R.R.", TITLE = "Clover: Towards A Unified Video-Language Alignment and Fusion Model", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14856-14866", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212926"} @inproceedings{bb217876, AUTHOR = "Li, C.H. and Li, Z. and Jing, C.C. and Jia, Y.D. and Wu, Y.W.", TITLE = "Exploring the Effect of Primitives for Compositional Generalization in Vision-and-Language", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "19092-19101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212927"} @inproceedings{bb217877, AUTHOR = "Yao, H.T. and Zhang, R. and Xu, C.S.", TITLE = "Visual-Language Prompt Tuning with Knowledge-Guided Context Optimization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6757-6767", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212928"} @inproceedings{bb217878, AUTHOR = "Kwon, H. and Song, T. and Jeong, S. and Kim, J. and Jang, J. and Sohn, K.H.", TITLE = "Probabilistic Prompt Learning for Dense Prediction", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6768-6777", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212929"} @inproceedings{bb217879, AUTHOR = "Luo, H.C. and Zhai, W. and Zhang, J. and Cao, Y. and Tao, D.C.", TITLE = "Leverage Interactive Affinity for Affordance Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6809-6819", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212930"} @inproceedings{bb217880, AUTHOR = "Bagad, P. and Tapaswi, M. and Snoek, C.G.M.", TITLE = "Test of Time: Instilling Video-Language Models with a Sense of Time", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2503-2516", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212931"} @inproceedings{bb217881, AUTHOR = "Kang, G.C. and Kim, S. and Kim, J.H. and Kwak, D.H. and Zhang, B.T.", TITLE = "The Dialog Must Go On: Improving Visual Dialog via Generative Self-Training", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6746-6756", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212932"} @inproceedings{bb217882, AUTHOR = "Bannur, S. and Hyland, S. and Liu, Q. and Perez Garcia, F. and Ilse, M. and Castro, D.C. and Boecking, B. and Sharma, H. and Bouzid, K. and Thieme, A. and Schwaighofer, A. and Wetscherek, M. and Lungren, M.P. and Nori, A. and Alvarez Valle, J. and Oktay, O.", TITLE = "Learning to Exploit Temporal Structure for Biomedical Vision-Language Processing", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15016-15027", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212933"} @inproceedings{bb217883, AUTHOR = "Srinivasan, T. and Ren, X. and Thomason, J.", TITLE = "Curriculum Learning for Data-Efficient Vision-Language Alignment", BOOKTITLE = ODRUM23, YEAR = "2023", PAGES = "5619-5624", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212934"} @inproceedings{bb217884, AUTHOR = "Ibing, M. and Lim, I. and Kobbelt, L.", TITLE = "Localized Latent Updates for Fine-Tuning Vision-Language Models", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4509-4518", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212935"} @inproceedings{bb217885, AUTHOR = "Zhou, Y.T. and Shimada, N.", TITLE = "Vision + Language Applications: A Survey", BOOKTITLE = GCV23, YEAR = "2023", PAGES = "826-842", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212936"} @inproceedings{bb217886, AUTHOR = "Parisot, S. and Yang, Y.X. and McDonagh, S.", TITLE = "Learning to Name Classes for Vision and Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23477-23486", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212937"} @inproceedings{bb217887, AUTHOR = "Kim, S. and Jo, D. and Lee, D. and Kim, J.", TITLE = "MAGVLT: Masked Generative Vision-and-Language Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23338-23348", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212938"} @inproceedings{bb217888, AUTHOR = "Ji, Y. and Wang, J.J. and Gong, Y. and Zhang, L. and Zhu, Y. and Wang, H.F. and Zhang, J.X. and Sakai, T. and Yang, Y.", TITLE = "MAP: Multimodal Uncertainty-Aware Vision-Language Pre-training Model", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23262-23271", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212939"} @inproceedings{bb217889, AUTHOR = "Zhang, X. and Wang, W. and Chen, Z. and Xu, Y.F. and Zhang, J. and Tao, D.C.", TITLE = "CLAMP: Prompt-based Contrastive Learning for Connecting Language and Animal Pose", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23272-23281", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212940"} @inproceedings{bb217890, AUTHOR = "Wang, T. and Ge, Y.X. and Zheng, F. and Cheng, R. and Shan, Y. and Qie, X. and Luo, P.", TITLE = "Accelerating Vision-Language Pretraining with Free Language Modeling", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "23161-23170", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212941"} @inproceedings{bb217891, AUTHOR = "Doveh, S. and Arbelle, A. and Harary, S. and Schwartz, E. and Herzig, R. and Giryes, R. and Feris, R.S. and Panda, R. and Ullman, S. and Karlinsky, L.", TITLE = "Teaching Structured Vision and Language Concepts to Vision and Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2657-2668", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212942"} @inproceedings{bb217892, AUTHOR = "Chino, A. and Teraoka, T.", TITLE = "Relevance-aware Question Generation in Non-task-oriented Dialogue Systems", BOOKTITLE = VAMR23, YEAR = "2023", PAGES = "344-358", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212943"} @inproceedings{bb217893, AUTHOR = "Tang, Z. and Cho, J. and Lei, J. and Bansal, M.", TITLE = "PERCEIVER-VL: Efficient Vision-and-Language Modeling with Iterative Latent Attention", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4399-4409", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212944"} @inproceedings{bb217894, AUTHOR = "Tripathi, A. and Mishra, A. and Chakraborty, A.", TITLE = "Grounding Scene Graphs on Natural Images via Visio-Lingual Message Passing", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "4380-4389", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212945"} @inproceedings{bb217895, AUTHOR = "Byun, J. and Hwang, T. and Fu, J.L. and Moon, T.", TITLE = "GRIT-VLP: Grouped Mini-batch Sampling for Efficient Vision and Language Pre-training", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XIX:395-412", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212946"} @inproceedings{bb217896, AUTHOR = "Yan, S.P. and Hong, L. and Xu, H. and Han, J.H. and Tuytelaars, T. and Li, Z.G. and He, X.M.", TITLE = "Generative Negative Text Replay for Continual Vision-Language Pretraining", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:22-38", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212947"} @inproceedings{bb217897, AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.", TITLE = "New Datasets and Models for Contextual Reasoning in Visual Dialog", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:434-451", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212948"} @inproceedings{bb217898, AUTHOR = "Pham, H.A. and Le, T.M. and Le, V. and Phuong, T.M. and Tran, T.", TITLE = "Video Dialog as Conversation About Objects Living in Space-Time", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIX:710-726", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212949"} @inproceedings{bb217899, AUTHOR = "Zhang, Z.F. and Jiang, T.L. and Liu, C.P. and Ji, Y.", TITLE = "Coupling Attention and Convolution for Heuristic Network in Visual Dialog", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2896-2900", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212950"}