@inproceedings{bb212000,
        AUTHOR = "Kim, S. and Jo, D. and Lee, D. and Kim, J.",
        TITLE = "MAGVLT: Masked Generative Vision-and-Language Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23338-23348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207083"}

@inproceedings{bb212001,
        AUTHOR = "Ji, Y. and Wang, J.J. and Gong, Y. and Zhang, L. and Zhu, Y. and Wang, H.F. and Zhang, J.X. and Sakai, T. and Yang, Y.",
        TITLE = "MAP: Multimodal Uncertainty-Aware Vision-Language Pre-training Model",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23262-23271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207084"}

@inproceedings{bb212002,
        AUTHOR = "Zhang, X. and Wang, W. and Chen, Z. and Xu, Y.F. and Zhang, J. and Tao, D.C.",
        TITLE = "CLAMP: Prompt-based Contrastive Learning for Connecting Language and
Animal Pose",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23272-23281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207085"}

@inproceedings{bb212003,
        AUTHOR = "Bulat, A. and Tzimiropoulos, G.",
        TITLE = "LASP: Text-to-Text Optimization for Language-Aware Soft Prompting of
Vision and Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23232-23241",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207086"}

@inproceedings{bb212004,
        AUTHOR = "Wang, J.P. and Zhou, P. and Shou, M.Z. and Yan, S.C.",
        TITLE = "Position-Guided Text Prompt for Vision-Language Pre-Training",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23242-23251",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207087"}

@inproceedings{bb212005,
        AUTHOR = "Wang, T. and Ge, Y.X. and Zheng, F. and Cheng, R. and Shan, Y. and Qie, X. and Luo, P.",
        TITLE = "Accelerating Vision-Language Pretraining with Free Language Modeling",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23161-23170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207088"}

@inproceedings{bb212006,
        AUTHOR = "Doveh, S. and Arbelle, A. and Harary, S. and Schwartz, E. and Herzig, R. and Giryes, R. and Feris, R.S. and Panda, R. and Ullman, S. and Karlinsky, L.",
        TITLE = "Teaching Structured Vision and Language Concepts to Vision and
Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2657-2668",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207089"}

@inproceedings{bb212007,
        AUTHOR = "Chino, A. and Teraoka, T.",
        TITLE = "Relevance-aware Question Generation in Non-task-oriented Dialogue
Systems",
        BOOKTITLE = VAMR23,
        YEAR = "2023",
        PAGES = "344-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207090"}

@inproceedings{bb212008,
        AUTHOR = "Tang, Z. and Cho, J. and Lei, J. and Bansal, M.",
        TITLE = "PERCEIVER-VL: Efficient Vision-and-Language Modeling with Iterative
Latent Attention",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4399-4409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207091"}

@inproceedings{bb212009,
        AUTHOR = "Tripathi, A. and Mishra, A. and Chakraborty, A.",
        TITLE = "Grounding Scene Graphs on Natural Images via Visio-Lingual Message
Passing",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4380-4389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207092"}

@inproceedings{bb212010,
        AUTHOR = "Byun, J. and Hwang, T. and Fu, J.L. and Moon, T.",
        TITLE = "GRIT-VLP: Grouped Mini-batch Sampling for Efficient Vision and Language
Pre-training",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIX:395-412",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207093"}

@inproceedings{bb212011,
        AUTHOR = "Yan, S.P. and Hong, L. and Xu, H. and Han, J.H. and Tuytelaars, T. and Li, Z.G. and He, X.M.",
        TITLE = "Generative Negative Text Replay for Continual Vision-Language
Pretraining",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:22-38",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207094"}

@inproceedings{bb212012,
        AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
        TITLE = "New Datasets and Models for Contextual Reasoning in Visual Dialog",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:434-451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207095"}

@inproceedings{bb212013,
        AUTHOR = "Pham, H.A. and Le, T.M. and Le, V. and Phuong, T.M. and Tran, T.",
        TITLE = "Video Dialog as Conversation About Objects Living in Space-Time",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIX:710-726",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207096"}

@inproceedings{bb212014,
        AUTHOR = "Zhang, Z.F. and Jiang, T.L. and Liu, C.P. and Ji, Y.",
        TITLE = "Coupling Attention and Convolution for Heuristic Network in Visual
Dialog",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2896-2900",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207097"}

@inproceedings{bb212015,
        AUTHOR = "Zhang, H.Y. and Li, Y.M. and Zhang, Z.F.",
        TITLE = "Video-Grounded Dialogues with Joint Video and Image Training",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3903-3907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207098"}

@inproceedings{bb212016,
        AUTHOR = "Zhang, S. and Jiang, X.Z. and Yang, Z. and Wan, T. and Qin, Z.C.",
        TITLE = "Reasoning with Multi-Structure Commonsense Knowledge in Visual Dialog",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4599-4608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207099"}

@inproceedings{bb212017,
        AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F.D. and Liang, X.D. and Ye, Q.X. and Lu, Y.T. and Jiao, J.B.",
        TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog
Navigation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1574-1583",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207100"}

@inproceedings{bb212018,
        AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.",
        TITLE = "On the hidden treasure of dialog in video question answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2044-2053",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207101"}

@inproceedings{bb212019,
        AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.",
        TITLE = "Unified Questioner Transformer for Descriptive Question Generation in
Goal-Oriented Visual Dialogue",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1878-1887",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207102"}

@inproceedings{bb212020,
        AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.",
        TITLE = "Learning Better Visual Dialog Agents with Pretrained
Visual-Linguistic Representation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5618-5627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207103"}

@inproceedings{bb212021,
        AUTHOR = "Jiang, T.L. and Ji, Y. and Liu, C.P.",
        TITLE = "Integrating Historical States and Co-attention Mechanism for Visual
Dialog",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "2041-2048",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207104"}

@inproceedings{bb212022,
        AUTHOR = "Nguyen, V.Q. and Suganuma, M. and Okatani, T.",
        TITLE = "Efficient Attention Mechanism for Visual Dialog that Can Handle All the
Interactions Between Multiple Inputs",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:223-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207105"}

@inproceedings{bb212023,
        AUTHOR = "Murahari, V. and Batra, D. and Parikh, D. and Das, A.",
        TITLE = "Large-scale Pretraining for Visual Dialog:
A Simple State-of-the-art Baseline",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:336-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207106"}

@inproceedings{bb212024,
        AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.",
        TITLE = "Describing Unseen Videos via Multi-Modal Cooperative Dialog Agents",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIII:153-169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207107"}

@inproceedings{bb212025,
        AUTHOR = "Qi, J. and Niu, Y. and Huang, J. and Zhang, H.",
        TITLE = "Two Causal Principles for Improving Visual Dialog",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10857-10866",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207108"}

@inproceedings{bb212026,
        AUTHOR = "Abbasnejad, E. and Teney, D. and Parvaneh, A. and Shi, J. and van den Hengel, A.J.",
        TITLE = "Counterfactual Vision and Language Learning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10041-10051",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207109"}

@inproceedings{bb212027,
        AUTHOR = "Zhu, Y. and Zhu, F. and Zhan, Z. and Lin, B. and Jiao, J. and Chang, X. and Liang, X.",
        TITLE = "Vision-Dialog Navigation by Exploring Cross-Modal Memory",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10727-10736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207110"}

@inproceedings{bb212028,
        AUTHOR = "Yang, T. and Zha, Z. and Zhang, H.",
        TITLE = "Making History Matter:
History-Advantage Sequence Training for Visual Dialog",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2561-2569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207111"}

@inproceedings{bb212029,
        AUTHOR = "Guo, D. and Xu, C. and Tao, D.C.",
        TITLE = "Image-Question-Answer Synergistic Network for Visual Dialog",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10426-10435",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207112"}

@inproceedings{bb212030,
        AUTHOR = "Zheng, Z.L. and Wang, W.G. and Qi, S.Y. and Zhu, S.C.",
        TITLE = "Reasoning Visual Dialogs With Structural and Partial Observations",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6662-6671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207113"}

@inproceedings{bb212031,
        AUTHOR = "Bani, G. and Belli, D. and Dagan, G. and Geenen, A. and Skliar, A. and Venkatesh, A. and Baumgartner, T. and Bruni, E. and Fernandez, R.",
        TITLE = "Adding Object Detection Skills to Visual Dialogue Agents",
        BOOKTITLE = VL18,
        YEAR = "2018",
        PAGES = "IV:180-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207114"}

@inproceedings{bb212032,
        AUTHOR = "Yang, M. and Yang, N.S.R. and Zhang, K. and Tao, J.",
        TITLE = "Self-Talk: Responses to Users' Opinions and Challenges in Human
Computer Dialog",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "2839-2844",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207115"}

@inproceedings{bb212033,
        AUTHOR = "Jain, U. and Schwing, A. and Lazebnik, S.",
        TITLE = "Two Can Play This Game: Visual Dialog with Discriminative Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5754-5763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207116"}

@inproceedings{bb212034,
        AUTHOR = "Dokania, P.K. and Torr, P.H.S. and Siddharth, N. and Massiceti, D.",
        TITLE = "FLIPDIAL: A Generative Model for Two-Way Visual Dialogue",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6097-6105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207117"}

@inproceedings{bb212035,
        AUTHOR = "Wu, Q. and Wang, P. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Are You Talking to Me? Reasoned Visual Dialog Generation Through
Adversarial Learning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6106-6115",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207118"}

@inproceedings{bb212036,
        AUTHOR = "Kottur, S. and Moura, J.M.F. and Parikh, D. and Batra, D. and Rohrbach, M.",
        TITLE = "Visual Coreference Resolution in Visual Dialog Using Neural Module
Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XV: 160-178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207119"}

@inproceedings{bb212037,
        AUTHOR = "Strub, F. and Seurin, M. and Perez, E. and de Vries, H. and Mary, J. and Preux, P. and Courville, A. and Pietquin, O.",
        TITLE = "Visual Reasoning with Multi-hop Feature Modulation",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VI: 808-831",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207120"}

@inproceedings{bb212038,
        AUTHOR = "Das, A. and Kottur, S. and Moura, J.M.F. and Lee, S. and Batra, D.",
        TITLE = "Learning Cooperative Visual Dialog Agents with Deep Reinforcement
Learning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2970-2979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207121"}

@inproceedings{bb212039,
        AUTHOR = "de Vries, H. and Strub, F. and Chandar, S. and Pietquin, O. and Larochelle, H. and Courville, A.",
        TITLE = "GuessWhat?! Visual Object Discovery through Multi-modal Dialogue",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4466-4475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207122"}

@inproceedings{bb212040,
        AUTHOR = "Nam, H. and Ha, J.W. and Kim, J.",
        TITLE = "Dual Attention Networks for Multimodal Reasoning and Matching",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "2156-2164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207123"}

@inproceedings{bb212041,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Hoffman, J. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "Inferring and Executing Programs for Visual Reasoning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "3008-3017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207124"}

@inproceedings{bb212042,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "CLEVR: A Diagnostic Dataset for Compositional Language and Elementary
Visual Reasoning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1988-1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207125"}

@inproceedings{bb212043,
        AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Moura, J.M.F. and Parikh, D. and Batra, D.",
        TITLE = "Visual Dialog",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1080-1089",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT207126"}

@article{bb212044,
        AUTHOR = "Zhou, N. and Fan, J.P.",
        TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
        JOURNAL = PR,
        VOLUME = "48",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "205-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207127"}

@article{bb212045,
        AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.",
        TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text
Matching",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2008-2020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207128"}

@article{bb212046,
        AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.",
        TITLE = "Characterization and classification of semantic image-text relations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "March",
        PAGES = "31-45",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207129"}

@article{bb212047,
        AUTHOR = "Niu, K. and Huang, Y. and Wang, L.",
        TITLE = "Re-ranking image-text matching by adaptive metric fusion",
        JOURNAL = PR,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "107351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207130"}

@article{bb212048,
        AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.",
        TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2866-2879",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207131"}

@article{bb212049,
        AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X. and Liu, A.A.",
        TITLE = "Dual-Level Representation Enhancement on Characteristic and Context
for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "8037-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207132"}

@article{bb212050,
        AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.",
        TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive
Relational Network",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1840-1852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207133"}

@inproceedings{bb212051,
        AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.",
        TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1556-1564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207134"}

@article{bb212052,
        AUTHOR = "Lan, H. and Zhang, P.",
        TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text
Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "374-378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207135"}

@article{bb212053,
        AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.",
        TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "388-397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207136"}

@article{bb212054,
        AUTHOR = "Malali, N. and Keller, Y.",
        TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10252-10260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207137"}

@article{bb212055,
        AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "471-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207138"}

@article{bb212056,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic
Reasoning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "641-656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207139"}

@inproceedings{bb212057,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Visual Semantic Reasoning for Image-Text Matching",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4653-4661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207140"}

@article{bb212058,
        AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.",
        TITLE = "Plug-and-Play Regulators for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "2322-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207141"}

@article{bb212059,
        AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.",
        TITLE = "Bi-Attention enhanced representation learning for image-text matching",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207142"}

@article{bb212060,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.",
        TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for
Image-Text Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1320-1332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207143"}

@article{bb212061,
        AUTHOR = "Liu, Z. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.",
        TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2465-2476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207144"}

@article{bb212062,
        AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.",
        TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for
Image-Text Matching",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "38",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "41-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207145"}

@article{bb212063,
        AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.",
        TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent
Multimodal Contrastive Training",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "3622-3633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207146"}

@article{bb212064,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.",
        TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3516-3528",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207147"}

@article{bb212065,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.",
        TITLE = "The Style Transformer With Common Knowledge Optimization for
Image-Text Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1197-1201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207148"}

@article{bb212066,
        AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.",
        TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6131-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207149"}

@article{bb212067,
        AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.",
        TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting
False Negatives",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "103-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207150"}

@article{bb212068,
        AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.",
        TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for
Image-Text Retrieval",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "595-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207151"}

@article{bb212069,
        AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.",
        TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised
Fine-Grained Alignment",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1361-1372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207152"}

@article{bb212070,
        AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
        TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained
Image-Text Matching",
        JOURNAL = Cyber,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "948-961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207153"}

@article{bb212071,
        AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X. and Liu, A.A.",
        TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1867-1880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207154"}

@article{bb212072,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y. and Guo, L.",
        TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3827-3841",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207155"}

@inproceedings{bb212073,
        AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
        TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15159-15168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207156"}

@inproceedings{bb212074,
        AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C. and Wei, Z.M.",
        TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and
Similarity Filtering",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "635-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207157"}

@inproceedings{bb212075,
        AUTHOR = "Li, Z. and Nian, X.H. and Pan, C. and Yang, D. and Xiong, H.Y. and Wang, H.B.",
        TITLE = "Relation Graph Reasoning for Image-Text Matching",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "319-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207158"}

@inproceedings{bb212076,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Wang, Q. and Zhang, Y.D.",
        TITLE = "Negative-Aware Attention Framework for Image-Text Matching",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15640-15649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207159"}

@inproceedings{bb212077,
        AUTHOR = "Long, S. and Han, S.C. and Wan, X.J. and Poon, J.",
        TITLE = "GraDual: Graph-based Dual-modal Representation for Image-Text
Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2463-2472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207160"}

@inproceedings{bb212078,
        AUTHOR = "Biten, A.F. and Mafla, A. and Gomez, L. and Karatzas, D.",
        TITLE = "Is An Image Worth Five Sentences? A New Look into Semantics for
Image-Text Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2483-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207161"}

@inproceedings{bb212079,
        AUTHOR = "Mithun, N.C. and Pasricha, R. and Papalexakis, E. and Roy Chowdhury, A.K.",
        TITLE = "Webly Supervised Image-Text Embedding with Noisy Tag Refinement",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7454-7461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207162"}

@inproceedings{bb212080,
        AUTHOR = "Chen, J.A. and Zhang, L. and Wang, Q. and Bai, C. and Kpalma, K.",
        TITLE = "Intra-Modal Constraint Loss for Image-Text Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "4023-4027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207163"}

@inproceedings{bb212081,
        AUTHOR = "Liu, Y. and Wang, H.Q. and Meng, F.Y. and Liu, M.Y. and Liu, H.",
        TITLE = "Attend, Correct and Focus: A Bidirectional Correct Attention Network
for Image-Text Matching",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2673-2677",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207164"}

@inproceedings{bb212082,
        AUTHOR = "Yang, S.T. and Huang, K.H. and Howe, B.",
        TITLE = "JECL: Joint Embedding and Cluster Learning for Image-Text Pairs",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8344-8351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207165"}

@inproceedings{bb212083,
        AUTHOR = "Mikriukov, G. and Ravanbakhsh, M. and Demir, B.",
        TITLE = "An Unsupervised Cross-Modal Hashing Method Robust to Noisy Training
Image-Text Correspondences in Remote Sensing",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2556-2560",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207166"}

@inproceedings{bb212084,
        AUTHOR = "Anwaar, M.U. and Labintcev, E. and Kleinsteuber, M.",
        TITLE = "Compositional Learning of Image-Text Query for Image Retrieval",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "1139-1148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207167"}

@inproceedings{bb212085,
        AUTHOR = "Messina, N. and Falchi, F. and Esuli, A. and Amato, G.",
        TITLE = "Transformer Reasoning Network for Image-Text Matching and Retrieval",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "5222-5229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207168"}

@inproceedings{bb212086,
        AUTHOR = "Zhang, Q. and Lei, Z. and Zhang, Z.X. and Li, S.Z.",
        TITLE = "Context-Aware Attention Network for Image-Text Retrieval",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "3533-3542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207169"}

@inproceedings{bb212087,
        AUTHOR = "Chen, Y.C. and Li, L.J. and Yu, L.C. and El Kholy, A. and Ahmed, F. and Gan, Z. and Cheng, Y. and Liu, J.J.",
        TITLE = "Uniter: Universal Image-Text Representation Learning",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXX: 104-120",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207170"}

@inproceedings{bb212088,
        AUTHOR = "Wang, H.R. and Zhang, Y. and Ji, Z. and Pang, Y.W. and Ma, L.",
        TITLE = "Consensus-aware Visual-semantic Embedding for Image-Text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207171"}

@inproceedings{bb212089,
        AUTHOR = "Chen, T.L. and Deng, J.J. and Luo, J.B.",
        TITLE = "Adaptive Offline Quintuplet Loss for Image-text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:549-565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207172"}

@inproceedings{bb212090,
        AUTHOR = "Lee, K.H. and Chen, X. and Hua, G. and Hu, H.D. and He, X.D.",
        TITLE = "Stacked Cross Attention for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 212-228",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207173"}

@inproceedings{bb212091,
        AUTHOR = "Zhang, Y. and Lu, H.C.",
        TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 707-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207174"}

@inproceedings{bb212092,
        AUTHOR = "Plummer, B.A. and Kordas, P. and Kiapour, M.H. and Zheng, S. and Piramuthu, R. and Lazebnik, S.",
        TITLE = "Conditional Image-Text Embedding Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 258-274",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT207175"}

@article{bb212093,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207177"}

@inproceedings{bb212094,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207178"}

@article{bb212095,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207179"}

@article{bb212096,
        AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.",
        TITLE = "Grounding-Tracking-Integration",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3433-3443",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207180"}

@article{bb212097,
        AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.",
        TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate
Adversarial Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3469-3481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207181"}

@article{bb212098,
        AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.",
        TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "36",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "3-12",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207182"}

@article{bb212099,
        AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.",
        TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained
Model Decisions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4196-4202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207183"}

Last update:Feb 29, 2024 at 09:13:14