@inproceedings{bb217900, AUTHOR = "Zhang, H.Y. and Li, Y.M. and Zhang, Z.F.", TITLE = "Video-Grounded Dialogues with Joint Video and Image Training", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3903-3907", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212951"} @inproceedings{bb217901, AUTHOR = "Zhang, S.Y. and Jiang, X.Z. and Yang, Z.Q. and Wan, T. and Qin, Z.C.", TITLE = "Reasoning with Multi-Structure Commonsense Knowledge in Visual Dialog", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4599-4608", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212952"} @inproceedings{bb217902, AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F.D. and Liang, X.D. and Ye, Q.X. and Lu, Y.T. and Jiao, J.B.", TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog Navigation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1574-1583", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212953"} @inproceedings{bb217903, AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.", TITLE = "On the hidden treasure of dialog in video question answering", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2044-2053", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212954"} @inproceedings{bb217904, AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.", TITLE = "Unified Questioner Transformer for Descriptive Question Generation in Goal-Oriented Visual Dialogue", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1878-1887", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212955"} @inproceedings{bb217905, AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.", TITLE = "Learning Better Visual Dialog Agents with Pretrained Visual-Linguistic Representation", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "5618-5627", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212956"} @inproceedings{bb217906, AUTHOR = "Jiang, T.L. and Ji, Y. and Liu, C.P.", TITLE = "Integrating Historical States and Co-attention Mechanism for Visual Dialog", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "2041-2048", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212957"} @inproceedings{bb217907, AUTHOR = "Nguyen, V.Q. and Suganuma, M. and Okatani, T.", TITLE = "Efficient Attention Mechanism for Visual Dialog that Can Handle All the Interactions Between Multiple Inputs", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXIV:223-240", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212958"} @inproceedings{bb217908, AUTHOR = "Murahari, V. and Batra, D. and Parikh, D. and Das, A.", TITLE = "Large-scale Pretraining for Visual Dialog: A Simple State-of-the-art Baseline", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVIII:336-352", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212959"} @inproceedings{bb217909, AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.", TITLE = "Describing Unseen Videos via Multi-Modal Cooperative Dialog Agents", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXIII:153-169", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212960"} @inproceedings{bb217910, AUTHOR = "Qi, J. and Niu, Y. and Huang, J. and Zhang, H.", TITLE = "Two Causal Principles for Improving Visual Dialog", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10857-10866", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212961"} @inproceedings{bb217911, AUTHOR = "Abbasnejad, E. and Teney, D. and Parvaneh, A. and Shi, J. and van den Hengel, A.J.", TITLE = "Counterfactual Vision and Language Learning", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10041-10051", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212962"} @inproceedings{bb217912, AUTHOR = "Zhu, Y. and Zhu, F. and Zhan, Z. and Lin, B. and Jiao, J. and Chang, X. and Liang, X.", TITLE = "Vision-Dialog Navigation by Exploring Cross-Modal Memory", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10727-10736", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212963"} @inproceedings{bb217913, AUTHOR = "Yang, T. and Zha, Z. and Zhang, H.", TITLE = "Making History Matter: History-Advantage Sequence Training for Visual Dialog", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2561-2569", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212964"} @inproceedings{bb217914, AUTHOR = "Guo, D. and Xu, C. and Tao, D.C.", TITLE = "Image-Question-Answer Synergistic Network for Visual Dialog", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "10426-10435", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212965"} @inproceedings{bb217915, AUTHOR = "Zheng, Z.L. and Wang, W.G. and Qi, S.Y. and Zhu, S.C.", TITLE = "Reasoning Visual Dialogs With Structural and Partial Observations", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6662-6671", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212966"} @inproceedings{bb217916, AUTHOR = "Bani, G. and Belli, D. and Dagan, G. and Geenen, A. and Skliar, A. and Venkatesh, A. and Baumgartner, T. and Bruni, E. and Fernandez, R.", TITLE = "Adding Object Detection Skills to Visual Dialogue Agents", BOOKTITLE = VL18, YEAR = "2018", PAGES = "IV:180-187", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212967"} @inproceedings{bb217917, AUTHOR = "Yang, M. and Yang, N.S.R. and Zhang, K. and Tao, J.", TITLE = "Self-Talk: Responses to Users' Opinions and Challenges in Human Computer Dialog", BOOKTITLE = ICPR18, YEAR = "2018", PAGES = "2839-2844", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212968"} @inproceedings{bb217918, AUTHOR = "Jain, U. and Schwing, A. and Lazebnik, S.", TITLE = "Two Can Play This Game: Visual Dialog with Discriminative Question Generation and Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "5754-5763", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212969"} @inproceedings{bb217919, AUTHOR = "Dokania, P.K. and Torr, P.H.S. and Siddharth, N. and Massiceti, D.", TITLE = "FLIPDIAL: A Generative Model for Two-Way Visual Dialogue", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6097-6105", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212970"} @inproceedings{bb217920, AUTHOR = "Wu, Q. and Wang, P. and Shen, C. and Reid, I.D. and van den Hengel, A.J.", TITLE = "Are You Talking to Me? Reasoned Visual Dialog Generation Through Adversarial Learning", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6106-6115", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212971"} @inproceedings{bb217921, AUTHOR = "Kottur, S. and Moura, J.M.F. and Parikh, D. and Batra, D. and Rohrbach, M.", TITLE = "Visual Coreference Resolution in Visual Dialog Using Neural Module Networks", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XV: 160-178", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212972"} @inproceedings{bb217922, AUTHOR = "Strub, F. and Seurin, M. and Perez, E. and de Vries, H. and Mary, J. and Preux, P. and Courville, A. and Pietquin, O.", TITLE = "Visual Reasoning with Multi-hop Feature Modulation", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "VI: 808-831", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212973"} @inproceedings{bb217923, AUTHOR = "Das, A. and Kottur, S. and Moura, J.M.F. and Lee, S. and Batra, D.", TITLE = "Learning Cooperative Visual Dialog Agents with Deep Reinforcement Learning", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "2970-2979", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212974"} @inproceedings{bb217924, AUTHOR = "de Vries, H. and Strub, F. and Chandar, S. and Pietquin, O. and Larochelle, H. and Courville, A.", TITLE = "GuessWhat?! Visual Object Discovery through Multi-modal Dialogue", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4466-4475", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212975"} @inproceedings{bb217925, AUTHOR = "Nam, H. and Ha, J.W. and Kim, J.", TITLE = "Dual Attention Networks for Multimodal Reasoning and Matching", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "2156-2164", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212976"} @inproceedings{bb217926, AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Hoffman, J. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.", TITLE = "Inferring and Executing Programs for Visual Reasoning", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "3008-3017", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212977"} @inproceedings{bb217927, AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.", TITLE = "CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "1988-1997", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212978"} @inproceedings{bb217928, AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Moura, J.M.F. and Parikh, D. and Batra, D.", TITLE = "Visual Dialog", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "1080-1089", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT212979"} @article{bb217929, AUTHOR = "Zhou, N. and Fan, J.P.", TITLE = "Automatic image-text alignment for large-scale web image indexing and retrieval", JOURNAL = PR, VOLUME = "48", YEAR = "2015", NUMBER = "1", PAGES = "205-219", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212980"} @article{bb217930, AUTHOR = "Huang, F.R. and Zhang, X.M. and Zhao, Z.H. and Li, Z.J.", TITLE = "Bi-Directional Spatial-Semantic Attention Networks for Image-Text Matching", JOURNAL = IP, VOLUME = "28", YEAR = "2019", NUMBER = "4", MONTH = "April", PAGES = "2008-2020", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212981"} @article{bb217931, AUTHOR = "Otto, C. and Springstein, M. and Anand, A. and Ewerth, R.", TITLE = "Characterization and classification of semantic image-text relations", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "1", MONTH = "March", PAGES = "31-45", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212982"} @article{bb217932, AUTHOR = "Niu, K. and Huang, Y. and Wang, L.", TITLE = "Re-ranking image-text matching by adaptive metric fusion", JOURNAL = PR, VOLUME = "104", YEAR = "2020", PAGES = "107351", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212983"} @article{bb217933, AUTHOR = "Wen, K.Y. and Gu, X.D. and Cheng, Q.R.", TITLE = "Learning Dual Semantic Relations With Graph Attention for Image-Text Matching", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "7", MONTH = "July", PAGES = "2866-2879", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212984"} @article{bb217934, AUTHOR = "Yang, S. and Li, Q. and Li, W.H. and Li, X. and Liu, A.A.", TITLE = "Dual-Level Representation Enhancement on Characteristic and Context for Image-Text Retrieval", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "11", MONTH = "November", PAGES = "8037-8050", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212985"} @article{bb217935, AUTHOR = "Jing, Y. and Wang, W. and Wang, L. and Tan, T.N.", TITLE = "Learning Aligned Image-Text Representations Using Graph Attentive Relational Network", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "1840-1852", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212986"} @inproceedings{bb217936, AUTHOR = "Zhao, F. and Huang, Y.Z. and Wang, L. and Tan, T.N.", TITLE = "Deep Semantic Ranking Based Hashing for Multi-Label Image Retrieval", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "1556-1564", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212987"} @article{bb217937, AUTHOR = "Lan, H. and Zhang, P.", TITLE = "Learning and Integrating Multi-Level Matching Features for Image-Text Retrieval", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "374-378", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212988"} @article{bb217938, AUTHOR = "Wu, J. and Wu, C.L. and Lu, J. and Wang, L.Q. and Cui, X.R.", TITLE = "Region Reinforcement Network With Topic Constraint for Image-Text Matching", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "388-397", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212989"} @article{bb217939, AUTHOR = "Malali, N. and Keller, Y.", TITLE = "Learning to Embed Semantic Similarity for Joint Image-Text Retrieval", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "10252-10260", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212990"} @article{bb217940, AUTHOR = "Tian, M.X. and Wu, X.X. and Jia, Y.D.", TITLE = "Adaptive Latent Graph Representation Learning for Image-Text Matching", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "471-482", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212991"} @article{bb217941, AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.", TITLE = "Image-Text Embedding Learning via Visual and Textual Semantic Reasoning", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "1", MONTH = "January", PAGES = "641-656", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212992"} @inproceedings{bb217942, AUTHOR = "Li, K.P. and Zhang, Y.L. and Li, K. and Li, Y.Y. and Fu, Y.", TITLE = "Visual Semantic Reasoning for Image-Text Matching", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4653-4661", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212993"} @article{bb217943, AUTHOR = "Diao, H.W. and Zhang, Y. and Liu, W. and Ruan, X. and Lu, H.C.", TITLE = "Plug-and-Play Regulators for Image-Text Matching", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "2322-2334", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212994"} @article{bb217944, AUTHOR = "Tian, Y.M. and Ding, A. and Wang, D. and Luo, X.M. and Wan, B. and Wang, Y.F.", TITLE = "Bi-Attention enhanced representation learning for image-text matching", JOURNAL = PR, VOLUME = "140", YEAR = "2023", PAGES = "109548", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212995"} @article{bb217945, AUTHOR = "Zhang, K. and Mao, Z.D. and Liu, A.A. and Zhang, Y.D.", TITLE = "Unified Adaptive Relevance Distinguishable Attention Network for Image-Text Matching", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "1320-1332", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212996"} @article{bb217946, AUTHOR = "Liu, Z. and Chen, F.L. and Xu, J. and Pei, W.J. and Lu, G.M.", TITLE = "Image-Text Retrieval With Cross-Modal Semantic Importance Consistency", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "5", MONTH = "May", PAGES = "2465-2476", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212997"} @article{bb217947, AUTHOR = "Shang, H. and Zhao, G.S. and Shi, J. and Qian, X.M.", TITLE = "A Multiview Text Imagination Network Based on Latent Alignment for Image-Text Matching", JOURNAL = IEEE_Int_Sys, VOLUME = "38", YEAR = "2023", NUMBER = "3", MONTH = "May", PAGES = "41-50", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212998"} @article{bb217948, AUTHOR = "Liu, C. and Zhang, Y.Q. and Wang, H. and Chen, W.H. and Wang, F. and Huang, Y. and Shen, Y.D. and Wang, L.", TITLE = "Efficient Token-Guided Image-Text Retrieval With Consistent Multimodal Contrastive Training", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "3622-3633", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT212999"} @article{bb217949, AUTHOR = "Li, W.R. and Ma, Z.Y. and Deng, L.J. and Fan, X.P. and Tian, Y.H.", TITLE = "Neuron-Based Spiking Transmission and Reasoning Network for Robust Image-Text Retrieval", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "7", MONTH = "July", PAGES = "3516-3528", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213000"} @article{bb217950, AUTHOR = "Li, W.R. and Ma, Z.Y. and Shi, J.Q. and Fan, X.P.", TITLE = "The Style Transformer With Common Knowledge Optimization for Image-Text Retrieval", JOURNAL = SPLetters, VOLUME = "30", YEAR = "2023", PAGES = "1197-1201", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213001"} @article{bb217951, AUTHOR = "Zhu, H.G. and Zhang, C.J. and Wei, Y.C. and Huang, S. and Zhao, Y.", TITLE = "ESA: External Space Attention Aggregation for Image-Text Retrieval", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "6131-6143", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213002"} @article{bb217952, AUTHOR = "Li, Z. and Guo, C. and Feng, Z. and Hwang, J.N. and Du, Z.T.", TITLE = "Integrating Language Guidance Into Image-Text Matching for Correcting False Negatives", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "103-116", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213003"} @article{bb217953, AUTHOR = "Zhang, Y. and Ji, Z. and Wang, D. and Pang, Y.W. and Li, X.L.", TITLE = "USER: Unified Semantic Enhancement With Momentum Contrast for Image-Text Retrieval", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "595-609", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213004"} @article{bb217954, AUTHOR = "Zhuang, J. and Yu, J. and Ding, Y. and Qu, X.Y. and Hu, Y.", TITLE = "Towards Fast and Accurate Image-Text Retrieval With Self-Supervised Fine-Grained Alignment", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "1361-1372", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213005"} @article{bb217955, AUTHOR = "Liu, X. and He, Y. and Cheung, Y.M. and Xu, X. and Wang, N.N.", TITLE = "Learning Relationship-Enhanced Semantic Graph for Fine-Grained Image-Text Matching", JOURNAL = Cyber, VOLUME = "54", YEAR = "2024", NUMBER = "2", MONTH = "February", PAGES = "948-961", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213006"} @article{bb217956, AUTHOR = "Li, W.H. and Yang, S. and Li, Q. and Li, X. and Liu, A.A.", TITLE = "Commonsense-Guided Semantic and Relational Consistencies for Image-Text Retrieval", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "1867-1880", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213007"} @article{bb217957, AUTHOR = "Wu, D.Q. and Li, H.H. and Gu, C. and Liu, H. and Xu, C. and Hou, Y. and Guo, L.", TITLE = "Feature First: Advancing Image-Text Retrieval Through Improved Visual Features", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3827-3841", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213008"} @article{bb217958, AUTHOR = "Yang, R. and Wang, S. and Gu, Y. and Wang, J.H. and Sun, Y.Z. and Zhang, H. and Liao, Y. and Jiao, L.C.", TITLE = "Continual Learning for Cross-Modal Image-Text Retrieval Based on Domain-Selective Attention", JOURNAL = PR, VOLUME = "149", YEAR = "2024", PAGES = "110273", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213009"} @article{bb217959, AUTHOR = "Pan, R.J. and Yang, H. and Li, C. and Yang, J.H.", TITLE = "Joint Intra & Inter-Grained Reasoning: A New Look Into Semantic Consistency of Image-Text Retrieval", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4912-4925", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213010"} @article{bb217960, AUTHOR = "Zhang, K. and Hu, B. and Zhang, H. and Li, Z. and Mao, Z.D.", TITLE = "Enhanced Semantic Similarity Learning Framework for Image-Text Matching", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "4", MONTH = "April", PAGES = "2973-2988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213011"} @inproceedings{bb217961, AUTHOR = "Fu, Z.R. and Mao, Z.D. and Song, Y. and Zhang, Y.D.", TITLE = "Learning Semantic Relationship among Instances for Image-Text Matching", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15159-15168", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213012"} @article{bb217962, AUTHOR = "Diao, H. and Zhang, Y. and Gao, S. and Ruan, X. and Lu, H.C.", TITLE = "Deep Boosting Learning: A Brand-New Cooperative Approach for Image-Text Matching", JOURNAL = IP, VOLUME = "33", YEAR = "2024", PAGES = "3341-3352", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213013"} @inproceedings{bb217963, AUTHOR = "Zhang, Y. and Lu, H.C.", TITLE = "Deep Cross-Modal Projection Learning for Image-Text Matching", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "I: 707-723", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213014"} @article{bb217964, AUTHOR = "Cao, M. and Bai, Y. and Cao, Z.Q. and Nie, L.Q. and Zhang, M.", TITLE = "Efficient Image-Text Retrieval via Keyword-Guided Pre-Screening", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "6", MONTH = "June", PAGES = "5132-5145", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213015"} @article{bb217965, AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.", TITLE = "Improving Image-Text Matching With Bidirectional Consistency of Cross-Modal Alignment", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "7", MONTH = "July", PAGES = "6590-6607", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213016"} @article{bb217966, AUTHOR = "Li, Z. and Zhang, L. and Zhang, K. and Zhang, Y.D. and Mao, Z.D.", TITLE = "Fast, Accurate, and Lightweight Memory-Enhanced Embedding Learning Framework for Image-Text Retrieval", JOURNAL = CirSysVideo, VOLUME = "34", YEAR = "2024", NUMBER = "7", MONTH = "July", PAGES = "6542-6558", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213017"} @article{bb217967, AUTHOR = "Cui, Z. and Hu, Y.L. and Sun, Y.F. and Yin, B.C.", TITLE = "Context-aware relation enhancement and similarity reasoning for image-text retrieval", JOURNAL = IET-CV, VOLUME = "18", YEAR = "2024", NUMBER = "5", PAGES = "652-665", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213018"} @article{bb217968, AUTHOR = "Pan, Z.X. and Mao, Y.C. and Xiong, L. and Pang, T.F. and Ping, P.", TITLE = "MFAE: Multimodal Fusion and Alignment for Entity-level Disinformation Detection", JOURNAL = PRL, VOLUME = "184", YEAR = "2024", PAGES = "59-65", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213019"} @article{bb217969, AUTHOR = "Pu, X. and Wang, Z.W. and Yuan, L. and Wu, Y. and Jing, L.P. and Gao, X.B.", TITLE = "GADNet: Improving image-text matching via graph-based aggregation and disentanglement", JOURNAL = PR, VOLUME = "157", YEAR = "2025", PAGES = "110900", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213020"} @inproceedings{bb217970, AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C. and Wei, Z.M.", TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and Similarity Filtering", BOOKTITLE = ICIVC22, YEAR = "2022", PAGES = "635-640", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213021"} @inproceedings{bb217971, AUTHOR = "Li, Z. and Nian, X.H. and Pan, C. and Yang, D. and Xiong, H.Y. and Wang, H.B.", TITLE = "Relation Graph Reasoning for Image-Text Matching", BOOKTITLE = ICIVC22, YEAR = "2022", PAGES = "319-324", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213022"} @inproceedings{bb217972, AUTHOR = "Zhang, K. and Mao, Z.D. and Wang, Q. and Zhang, Y.D.", TITLE = "Negative-Aware Attention Framework for Image-Text Matching", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15640-15649", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213023"} @inproceedings{bb217973, AUTHOR = "Long, S. and Han, S.C. and Wan, X.J. and Poon, J.", TITLE = "GraDual: Graph-based Dual-modal Representation for Image-Text Matching", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2463-2472", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213024"} @inproceedings{bb217974, AUTHOR = "Biten, A.F. and Mafla, A. and Gomez, L. and Karatzas, D.", TITLE = "Is An Image Worth Five Sentences? A New Look into Semantics for Image-Text Matching", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2483-2492", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213025"} @inproceedings{bb217975, AUTHOR = "Mithun, N.C. and Pasricha, R. and Papalexakis, E. and Roy Chowdhury, A.K.", TITLE = "Webly Supervised Image-Text Embedding with Noisy Tag Refinement", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "7454-7461", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213026"} @inproceedings{bb217976, AUTHOR = "Chen, J.A. and Zhang, L. and Wang, Q. and Bai, C. and Kpalma, K.", TITLE = "Intra-Modal Constraint Loss for Image-Text Retrieval", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "4023-4027", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213027"} @inproceedings{bb217977, AUTHOR = "Liu, Y. and Wang, H.Q. and Meng, F.Y. and Liu, M.Y. and Liu, H.", TITLE = "Attend, Correct and Focus: A Bidirectional Correct Attention Network for Image-Text Matching", BOOKTITLE = ICIP21, YEAR = "2021", PAGES = "2673-2677", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213028"} @inproceedings{bb217978, AUTHOR = "Yang, S.T. and Huang, K.H. and Howe, B.", TITLE = "JECL: Joint Embedding and Cluster Learning for Image-Text Pairs", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "8344-8351", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213029"} @inproceedings{bb217979, AUTHOR = "Mikriukov, G. and Ravanbakhsh, M. and Demir, B.", TITLE = "An Unsupervised Cross-Modal Hashing Method Robust to Noisy Training Image-Text Correspondences in Remote Sensing", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2556-2560", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213030"} @inproceedings{bb217980, AUTHOR = "Anwaar, M.U. and Labintcev, E. and Kleinsteuber, M.", TITLE = "Compositional Learning of Image-Text Query for Image Retrieval", BOOKTITLE = WACV21, YEAR = "2021", PAGES = "1139-1148", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213031"} @inproceedings{bb217981, AUTHOR = "Messina, N. and Falchi, F. and Esuli, A. and Amato, G.", TITLE = "Transformer Reasoning Network for Image-Text Matching and Retrieval", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "5222-5229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213032"} @inproceedings{bb217982, AUTHOR = "Zhang, Q. and Lei, Z. and Zhang, Z.X. and Li, S.Z.", TITLE = "Context-Aware Attention Network for Image-Text Retrieval", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "3533-3542", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213033"} @inproceedings{bb217983, AUTHOR = "Chen, Y.C. and Li, L.J. and Yu, L.C. and El Kholy, A. and Ahmed, F. and Gan, Z. and Cheng, Y. and Liu, J.J.", TITLE = "Uniter: Universal Image-Text Representation Learning", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXX: 104-120", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213034"} @inproceedings{bb217984, AUTHOR = "Wang, H.R. and Zhang, Y. and Ji, Z. and Pang, Y.W. and Ma, L.", TITLE = "Consensus-aware Visual-semantic Embedding for Image-Text Matching", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXIV:18-34", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213035"} @inproceedings{bb217985, AUTHOR = "Chen, T.L. and Deng, J.J. and Luo, J.B.", TITLE = "Adaptive Offline Quintuplet Loss for Image-text Matching", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIII:549-565", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213036"} @inproceedings{bb217986, AUTHOR = "Lee, K.H. and Chen, X. and Hua, G. and Hu, H.D. and He, X.D.", TITLE = "Stacked Cross Attention for Image-Text Matching", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "II: 212-228", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213037"} @inproceedings{bb217987, AUTHOR = "Plummer, B.A. and Kordas, P. and Kiapour, M.H. and Zheng, S. and Piramuthu, R. and Lazebnik, S.", TITLE = "Conditional Image-Text Embedding Networks", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "XII: 258-274", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT213038"} @article{bb217988, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Kalantidis, Y. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Memex Question Answering", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "1893-1908", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213040"} @inproceedings{bb217989, AUTHOR = "Liang, J.W. and Jiang, L. and Cao, L.L. and Li, L.J. and Hauptmann, A.G.", TITLE = "Focal Visual-Text Attention for Visual Question Answering", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6135-6143", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213041"} @article{bb217990, AUTHOR = "Riquelme, F. and de Goyeneche, A. and Zhang, Y.D. and Niebles, J.C. and Soto, A.", TITLE = "Explaining VQA predictions using visual grounding and a knowledge base", JOURNAL = IVC, VOLUME = "101", YEAR = "2020", PAGES = "103968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213042"} @article{bb217991, AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.", TITLE = "Grounding-Tracking-Integration", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "9", MONTH = "September", PAGES = "3433-3443", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213043"} @article{bb217992, AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.", TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate Adversarial Learning", JOURNAL = CirSysVideo, VOLUME = "31", YEAR = "2021", NUMBER = "9", MONTH = "September", PAGES = "3469-3481", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213044"} @article{bb217993, AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.", TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding", JOURNAL = IEEE_Int_Sys, VOLUME = "36", YEAR = "2021", NUMBER = "4", MONTH = "July", PAGES = "3-12", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213045"} @article{bb217994, AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.", TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained Model Decisions", JOURNAL = PAMI, VOLUME = "43", YEAR = "2021", NUMBER = "11", MONTH = "November", PAGES = "4196-4202", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213046"} @article{bb217995, AUTHOR = "Yang, W.F. and Zhang, T.Z. and Zhang, Y.D. and Wu, F.", TITLE = "Local Correspondence Network for Weakly Supervised Temporal Sentence Grounding", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "3252-3262", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213047"} @inproceedings{bb217996, AUTHOR = "Luo, W. and Zhang, T.Z. and Yang, W.F. and Liu, J.G. and Mei, T. and Wu, F. and Zhang, Y.D.", TITLE = "Action Unit Memory Network for Weakly Supervised Temporal Action Localization", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "9964-9974", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213048"} @article{bb217997, AUTHOR = "Hong, R.C. and Liu, D. and Mo, X.Y. and He, X.N. and Zhang, H.W.", TITLE = "Learning to Compose and Reason with Language Tree Structures for Visual Grounding", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "2", MONTH = "February", PAGES = "684-696", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213049"} @inproceedings{bb217998, AUTHOR = "Tang, K.H. and Zhang, H.W. and Wu, B.Y. and Luo, W.H. and Liu, W.", TITLE = "Learning to Compose Dynamic Tree Structures for Visual Contexts", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6612-6621", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213050"} @article{bb217999, AUTHOR = "Bin, Y. and Ding, Y.J. and Peng, B. and Peng, L. and Yang, Y. and Chua, T.S.", TITLE = "Entity Slot Filling for Visual Captioning", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "52-62", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT213051"}