@inproceedings{bb217900,
        AUTHOR = "Zhang, H.Y. and Li, Y.M. and Zhang, Z.F.",
        TITLE = "Video-Grounded Dialogues with Joint Video and Image Training",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3903-3907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212951"}

@inproceedings{bb217901,
        AUTHOR = "Zhang, S.Y. and Jiang, X.Z. and Yang, Z.Q. and Wan, T. and Qin, Z.C.",
        TITLE = "Reasoning with Multi-Structure Commonsense Knowledge in Visual Dialog",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4599-4608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212952"}

@inproceedings{bb217902,
        AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F.D. and Liang, X.D. and Ye, Q.X. and Lu, Y.T. and Jiao, J.B.",
        TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog
Navigation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1574-1583",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212953"}

@inproceedings{bb217903,
        AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.",
        TITLE = "On the hidden treasure of dialog in video question answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2044-2053",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212954"}

@inproceedings{bb217904,
        AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.",
        TITLE = "Unified Questioner Transformer for Descriptive Question Generation in
Goal-Oriented Visual Dialogue",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1878-1887",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212955"}

@inproceedings{bb217905,
        AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.",
        TITLE = "Learning Better Visual Dialog Agents with Pretrained
Visual-Linguistic Representation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5618-5627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212956"}

@inproceedings{bb217906,
        AUTHOR = "Jiang, T.L. and Ji, Y. and Liu, C.P.",
        TITLE = "Integrating Historical States and Co-attention Mechanism for Visual
Dialog",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "2041-2048",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212957"}

@inproceedings{bb217907,
        AUTHOR = "Nguyen, V.Q. and Suganuma, M. and Okatani, T.",
        TITLE = "Efficient Attention Mechanism for Visual Dialog that Can Handle All the
Interactions Between Multiple Inputs",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:223-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212958"}

@inproceedings{bb217908,
        AUTHOR = "Murahari, V. and Batra, D. and Parikh, D. and Das, A.",
        TITLE = "Large-scale Pretraining for Visual Dialog:
A Simple State-of-the-art Baseline",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:336-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212959"}

@inproceedings{bb217909,
        AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.",
        TITLE = "Describing Unseen Videos via Multi-Modal Cooperative Dialog Agents",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIII:153-169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212960"}

@inproceedings{bb217910,
        AUTHOR = "Qi, J. and Niu, Y. and Huang, J. and Zhang, H.",
        TITLE = "Two Causal Principles for Improving Visual Dialog",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10857-10866",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212961"}

@inproceedings{bb217911,
        AUTHOR = "Abbasnejad, E. and Teney, D. and Parvaneh, A. and Shi, J. and van den Hengel, A.J.",
        TITLE = "Counterfactual Vision and Language Learning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10041-10051",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212962"}

@inproceedings{bb217912,
        AUTHOR = "Zhu, Y. and Zhu, F. and Zhan, Z. and Lin, B. and Jiao, J. and Chang, X. and Liang, X.",
        TITLE = "Vision-Dialog Navigation by Exploring Cross-Modal Memory",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10727-10736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212963"}

@inproceedings{bb217913,
        AUTHOR = "Yang, T. and Zha, Z. and Zhang, H.",
        TITLE = "Making History Matter:
History-Advantage Sequence Training for Visual Dialog",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2561-2569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212964"}

@inproceedings{bb217914,
        AUTHOR = "Guo, D. and Xu, C. and Tao, D.C.",
        TITLE = "Image-Question-Answer Synergistic Network for Visual Dialog",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10426-10435",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212965"}

@inproceedings{bb217915,
        AUTHOR = "Zheng, Z.L. and Wang, W.G. and Qi, S.Y. and Zhu, S.C.",
        TITLE = "Reasoning Visual Dialogs With Structural and Partial Observations",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6662-6671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212966"}

@inproceedings{bb217916,
        AUTHOR = "Bani, G. and Belli, D. and Dagan, G. and Geenen, A. and Skliar, A. and Venkatesh, A. and Baumgartner, T. and Bruni, E. and Fernandez, R.",
        TITLE = "Adding Object Detection Skills to Visual Dialogue Agents",
        BOOKTITLE = VL18,
        YEAR = "2018",
        PAGES = "IV:180-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212967"}

@inproceedings{bb217917,
        AUTHOR = "Yang, M. and Yang, N.S.R. and Zhang, K. and Tao, J.",
        TITLE = "Self-Talk: Responses to Users' Opinions and Challenges in Human
Computer Dialog",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "2839-2844",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212968"}

@inproceedings{bb217918,
        AUTHOR = "Jain, U. and Schwing, A. and Lazebnik, S.",
        TITLE = "Two Can Play This Game: Visual Dialog with Discriminative Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5754-5763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212969"}

@inproceedings{bb217919,
        AUTHOR = "Dokania, P.K. and Torr, P.H.S. and Siddharth, N. and Massiceti, D.",
        TITLE = "FLIPDIAL: A Generative Model for Two-Way Visual Dialogue",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6097-6105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212970"}

@inproceedings{bb217920,
        AUTHOR = "Wu, Q. and Wang, P. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Are You Talking to Me? Reasoned Visual Dialog Generation Through
Adversarial Learning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6106-6115",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212971"}

@inproceedings{bb217921,
        AUTHOR = "Kottur, S. and Moura, J.M.F. and Parikh, D. and Batra, D. and Rohrbach, M.",
        TITLE = "Visual Coreference Resolution in Visual Dialog Using Neural Module
Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XV: 160-178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212972"}

@inproceedings{bb217922,
        AUTHOR = "Strub, F. and Seurin, M. and Perez, E. and de Vries, H. and Mary, J. and Preux, P. and Courville, A. and Pietquin, O.",
        TITLE = "Visual Reasoning with Multi-hop Feature Modulation",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VI: 808-831",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212973"}

@inproceedings{bb217923,
        AUTHOR = "Das, A. and Kottur, S. and Moura, J.M.F. and Lee, S. and Batra, D.",
        TITLE = "Learning Cooperative Visual Dialog Agents with Deep Reinforcement
Learning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2970-2979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212974"}

@inproceedings{bb217924,
        AUTHOR = "de Vries, H. and Strub, F. and Chandar, S. and Pietquin, O. and Larochelle, H. and Courville, A.",
        TITLE = "GuessWhat?! Visual Object Discovery through Multi-modal Dialogue",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4466-4475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212975"}

@inproceedings{bb217925,
        AUTHOR = "Nam, H. and Ha, J.W. and Kim, J.",
        TITLE = "Dual Attention Networks for Multimodal Reasoning and Matching",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "2156-2164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212976"}

@inproceedings{bb217926,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Hoffman, J. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "Inferring and Executing Programs for Visual Reasoning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "3008-3017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212977"}

@inproceedings{bb217927,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "CLEVR: A Diagnostic Dataset for Compositional Language and Elementary
Visual Reasoning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1988-1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212978"}

@inproceedings{bb217928,
        AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Moura, J.M.F. and Parikh, D. and Batra, D.",
        TITLE = "Visual Dialog",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1080-1089",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212979"}

@article{bb217929,
        AUTHOR = "Zhou, N. and Fan, J.P.",
        TITLE = "Automatic image-text alignment for large-scale web image indexing and
retrieval",
        JOURNAL = PR,
        VOLUME = "48",
        YEAR = "2015",
        NUMBER = "1",
        PAGES = "205-219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212980"}

@article{bb217930,
        AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.",
        TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text
Matching",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2008-2020",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212981"}

@article{bb217931,
        AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.",
        TITLE = "Characterization and classification of semantic image-text relations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "March",
        PAGES = "31-45",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212982"}

@article{bb217932,
        AUTHOR = "Niu, K. and Huang, Y. and Wang, L.",
        TITLE = "Re-ranking image-text matching by adaptive metric fusion",
        JOURNAL = PR,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "107351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212983"}

@article{bb217933,
        AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.",
        TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2866-2879",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212984"}

@article{bb217934,
        AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X. and Liu, A.A.",
        TITLE = "Dual-Level Representation Enhancement on Characteristic and Context
for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "8037-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212985"}

@article{bb217935,
        AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.",
        TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive
Relational Network",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1840-1852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212986"}

@inproceedings{bb217936,
        AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.",
        TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1556-1564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212987"}

@article{bb217937,
        AUTHOR = "Lan, H. and Zhang, P.",
        TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text
Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "374-378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212988"}

@article{bb217938,
        AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.",
        TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "388-397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212989"}

@article{bb217939,
        AUTHOR = "Malali, N. and Keller, Y.",
        TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10252-10260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212990"}

@article{bb217940,
        AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.",
        TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "471-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212991"}

@article{bb217941,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic
Reasoning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "641-656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212992"}

@inproceedings{bb217942,
        AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.",
        TITLE = "Visual Semantic Reasoning for Image-Text Matching",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4653-4661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212993"}

@article{bb217943,
        AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.",
        TITLE = "Plug-and-Play Regulators for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "2322-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212994"}

@article{bb217944,
        AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.",
        TITLE = "Bi-Attention enhanced representation learning for image-text matching",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109548",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212995"}

@article{bb217945,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.",
        TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for
Image-Text Matching",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "1320-1332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212996"}

@article{bb217946,
        AUTHOR = "Liu, Z. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.",
        TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2465-2476",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212997"}

@article{bb217947,
        AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.",
        TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for
Image-Text Matching",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "38",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "May",
        PAGES = "41-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212998"}

@article{bb217948,
        AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.",
        TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent
Multimodal Contrastive Training",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "3622-3633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212999"}

@article{bb217949,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.",
        TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust
Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3516-3528",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213000"}

@article{bb217950,
        AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.",
        TITLE = "The Style Transformer With Common Knowledge Optimization for
Image-Text Retrieval",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1197-1201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213001"}

@article{bb217951,
        AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.",
        TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6131-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213002"}

@article{bb217952,
        AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.",
        TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting
False Negatives",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "103-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213003"}

@article{bb217953,
        AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.",
        TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for
Image-Text Retrieval",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "595-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213004"}

@article{bb217954,
        AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.",
        TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised
Fine-Grained Alignment",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1361-1372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213005"}

@article{bb217955,
        AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.",
        TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained
Image-Text Matching",
        JOURNAL = Cyber,
        VOLUME = "54",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "948-961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213006"}

@article{bb217956,
        AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X. and Liu, A.A.",
        TITLE = "Commonsense-Guided Semantic and Relational Consistencies for
Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "1867-1880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213007"}

@article{bb217957,
        AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y. and Guo, L.",
        TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual
Features",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3827-3841",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213008"}

@article{bb217958,
        AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.",
        TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on
Domain-Selective Attention",
        JOURNAL = PR,
        VOLUME = "149",
        YEAR = "2024",
        PAGES = "110273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213009"}

@article{bb217959,
        AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.",
        TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic
Consistency of Image-Text Retrieval",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4912-4925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213010"}

@article{bb217960,
        AUTHOR = "Zhang, K. and Hu, B. and Zhang, H. and Li, Z. and Mao, Z.D.",
        TITLE = "Enhanced Semantic Similarity Learning Framework for Image-Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2973-2988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213011"}

@inproceedings{bb217961,
        AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.",
        TITLE = "Learning Semantic Relationship among Instances for Image-Text
Matching",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15159-15168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213012"}

@article{bb217962,
        AUTHOR = "Diao, H. and Zhang, Y. and Gao, S. and Ruan, X. and Lu, H.C.",
        TITLE = "Deep Boosting Learning:
A Brand-New Cooperative Approach for Image-Text Matching",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "3341-3352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213013"}

@inproceedings{bb217963,
        AUTHOR = "Zhang, Y. and Lu, H.C.",
        TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 707-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213014"}

@article{bb217964,
        AUTHOR = "Cao, M. and Bai, Y. and Cao, Z.Q. and Nie, L.Q. and Zhang, M.",
        TITLE = "Efficient Image-Text Retrieval via Keyword-Guided Pre-Screening",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5132-5145",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213015"}

@article{bb217965,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Improving Image-Text Matching With Bidirectional Consistency of
Cross-Modal Alignment",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6590-6607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213016"}

@article{bb217966,
        AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.",
        TITLE = "Fast, Accurate, and Lightweight Memory-Enhanced Embedding Learning
Framework for Image-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6542-6558",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213017"}

@article{bb217967,
        AUTHOR = "Cui, Z. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.",
        TITLE = "Context-aware relation enhancement and similarity reasoning for
image-text retrieval",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "5",
        PAGES = "652-665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213018"}

@article{bb217968,
        AUTHOR = "Pan, Z.X. and Mao, Y.C. and Xiong, L. and Pang, T.F. and Ping, P.",
        TITLE = "MFAE: Multimodal Fusion and Alignment for Entity-level Disinformation
Detection",
        JOURNAL = PRL,
        VOLUME = "184",
        YEAR = "2024",
        PAGES = "59-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213019"}

@article{bb217969,
        AUTHOR = "Pu, X. and Wang, Z.W. and Yuan, L. and Wu, Y. and Jing, L.P. and Gao, X.B.",
        TITLE = "GADNet: Improving image-text matching via graph-based aggregation and
disentanglement",
        JOURNAL = PR,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "110900",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213020"}

@inproceedings{bb217970,
        AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C. and Wei, Z.M.",
        TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and
Similarity Filtering",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "635-640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213021"}

@inproceedings{bb217971,
        AUTHOR = "Li, Z. and Nian, X.H. and Pan, C. and Yang, D. and Xiong, H.Y. and Wang, H.B.",
        TITLE = "Relation Graph Reasoning for Image-Text Matching",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "319-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213022"}

@inproceedings{bb217972,
        AUTHOR = "Zhang, K. and Mao, Z.D. and Wang, Q. and Zhang, Y.D.",
        TITLE = "Negative-Aware Attention Framework for Image-Text Matching",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15640-15649",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213023"}

@inproceedings{bb217973,
        AUTHOR = "Long, S. and Han, S.C. and Wan, X.J. and Poon, J.",
        TITLE = "GraDual: Graph-based Dual-modal Representation for Image-Text
Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2463-2472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213024"}

@inproceedings{bb217974,
        AUTHOR = "Biten, A.F. and Mafla, A. and Gomez, L. and Karatzas, D.",
        TITLE = "Is An Image Worth Five Sentences? A New Look into Semantics for
Image-Text Matching",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2483-2492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213025"}

@inproceedings{bb217975,
        AUTHOR = "Mithun, N.C. and Pasricha, R. and Papalexakis, E. and Roy Chowdhury, A.K.",
        TITLE = "Webly Supervised Image-Text Embedding with Noisy Tag Refinement",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7454-7461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213026"}

@inproceedings{bb217976,
        AUTHOR = "Chen, J.A. and Zhang, L. and Wang, Q. and Bai, C. and Kpalma, K.",
        TITLE = "Intra-Modal Constraint Loss for Image-Text Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "4023-4027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213027"}

@inproceedings{bb217977,
        AUTHOR = "Liu, Y. and Wang, H.Q. and Meng, F.Y. and Liu, M.Y. and Liu, H.",
        TITLE = "Attend, Correct and Focus: A Bidirectional Correct Attention Network
for Image-Text Matching",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2673-2677",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213028"}

@inproceedings{bb217978,
        AUTHOR = "Yang, S.T. and Huang, K.H. and Howe, B.",
        TITLE = "JECL: Joint Embedding and Cluster Learning for Image-Text Pairs",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "8344-8351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213029"}

@inproceedings{bb217979,
        AUTHOR = "Mikriukov, G. and Ravanbakhsh, M. and Demir, B.",
        TITLE = "An Unsupervised Cross-Modal Hashing Method Robust to Noisy Training
Image-Text Correspondences in Remote Sensing",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2556-2560",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213030"}

@inproceedings{bb217980,
        AUTHOR = "Anwaar, M.U. and Labintcev, E. and Kleinsteuber, M.",
        TITLE = "Compositional Learning of Image-Text Query for Image Retrieval",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "1139-1148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213031"}

@inproceedings{bb217981,
        AUTHOR = "Messina, N. and Falchi, F. and Esuli, A. and Amato, G.",
        TITLE = "Transformer Reasoning Network for Image-Text Matching and Retrieval",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "5222-5229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213032"}

@inproceedings{bb217982,
        AUTHOR = "Zhang, Q. and Lei, Z. and Zhang, Z.X. and Li, S.Z.",
        TITLE = "Context-Aware Attention Network for Image-Text Retrieval",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "3533-3542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213033"}

@inproceedings{bb217983,
        AUTHOR = "Chen, Y.C. and Li, L.J. and Yu, L.C. and El Kholy, A. and Ahmed, F. and Gan, Z. and Cheng, Y. and Liu, J.J.",
        TITLE = "Uniter: Universal Image-Text Representation Learning",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXX: 104-120",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213034"}

@inproceedings{bb217984,
        AUTHOR = "Wang, H.R. and Zhang, Y. and Ji, Z. and Pang, Y.W. and Ma, L.",
        TITLE = "Consensus-aware Visual-semantic Embedding for Image-Text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:18-34",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213035"}

@inproceedings{bb217985,
        AUTHOR = "Chen, T.L. and Deng, J.J. and Luo, J.B.",
        TITLE = "Adaptive Offline Quintuplet Loss for Image-text Matching",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:549-565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213036"}

@inproceedings{bb217986,
        AUTHOR = "Lee, K.H. and Chen, X. and Hua, G. and Hu, H.D. and He, X.D.",
        TITLE = "Stacked Cross Attention for Image-Text Matching",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 212-228",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213037"}

@inproceedings{bb217987,
        AUTHOR = "Plummer, B.A. and Kordas, P. and Kiapour, M.H. and Zheng, S. and Piramuthu, R. and Lazebnik, S.",
        TITLE = "Conditional Image-Text Embedding Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 258-274",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213038"}

@article{bb217988,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Memex Question Answering",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1893-1908",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213040"}

@inproceedings{bb217989,
        AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.",
        TITLE = "Focal Visual-Text Attention for Visual Question Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6135-6143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213041"}

@article{bb217990,
        AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.",
        TITLE = "Explaining VQA predictions using visual grounding and a knowledge
base",
        JOURNAL = IVC,
        VOLUME = "101",
        YEAR = "2020",
        PAGES = "103968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213042"}

@article{bb217991,
        AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.",
        TITLE = "Grounding-Tracking-Integration",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3433-3443",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213043"}

@article{bb217992,
        AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.",
        TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate
Adversarial Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3469-3481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213044"}

@article{bb217993,
        AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.",
        TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "36",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "3-12",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213045"}

@article{bb217994,
        AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.",
        TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained
Model Decisions",
        JOURNAL = PAMI,
        VOLUME = "43",
        YEAR = "2021",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "4196-4202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213046"}

@article{bb217995,
        AUTHOR = "Yang, W.F. and Zhang, T.Z. and Zhang, Y.D. and Wu, F.",
        TITLE = "Local Correspondence Network for Weakly Supervised Temporal Sentence
Grounding",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "3252-3262",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213047"}

@inproceedings{bb217996,
        AUTHOR = "Luo, W. and Zhang, T.Z. and Yang, W.F. and Liu, J.G. and Mei, T. and Wu, F. and Zhang, Y.D.",
        TITLE = "Action Unit Memory Network for Weakly Supervised Temporal Action
Localization",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9964-9974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213048"}

@article{bb217997,
        AUTHOR = "Hong, R.C. and Liu, D. and Mo, X.Y. and He, X.N. and Zhang, H.W.",
        TITLE = "Learning to Compose and Reason with Language Tree Structures for
Visual Grounding",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "684-696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213049"}

@inproceedings{bb217998,
        AUTHOR = "Tang, K.H. and Zhang, H.W. and Wu, B.Y. and Luo, W.H. and Liu, W.",
        TITLE = "Learning to Compose Dynamic Tree Structures for Visual Contexts",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6612-6621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213050"}

@article{bb217999,
        AUTHOR = "Bin, Y. and Ding, Y.J. and Peng, B. and Peng, L. and Yang, Y. and Chua, T.S.",
        TITLE = "Entity Slot Filling for Visual Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "52-62",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213051"}

Last update:Sep 28, 2024 at 17:47:54