@article{bb241900,
AUTHOR = "Koo, H. and Shin, J. and Kim, E.",
TITLE = "Dual-branch scale disentanglement for text-video retrieval",
JOURNAL = PRL,
VOLUME = "196",
YEAR = "2025",
PAGES = "296-302",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236816"}
@article{bb241901,
AUTHOR = "Zhou, J. and Wang, M.",
TITLE = "Unified learning for image-text alignment via multi-scale feature
fusion",
JOURNAL = CVIU,
VOLUME = "260",
YEAR = "2025",
PAGES = "104468",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236817"}
@article{bb241902,
AUTHOR = "Wen, J. and Chen, Y.F. and Shi, R.Q. and Ji, W. and Yang, M.L. and Gao, D.F. and Yuan, J.S. and Zimmermann, R.",
TITLE = "HOVER: Hyperbolic Video-Text Retrieval",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "6192-6203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236818"}
@article{bb241903,
AUTHOR = "Fang, J.Y. and Zhu, B. and Yuan, J.L. and Chen, Y.Y. and Tang, M. and Wang, J.Q.",
TITLE = "AMITA: Attribute-Guided Masked Image-Text Alignment for Multi-Label
Image Representation",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11432-11447",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236819"}
@article{bb241904,
AUTHOR = "Ji, L.L. and Liu, L.",
TITLE = "Multi-Scale Feature Fusion Based on Piecewise Polynomial Activation
Function for Image-Text Matching",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11627-11640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236820"}
@article{bb241905,
AUTHOR = "Chen, R. and Su, T. and Wang, H. and Ni, Z.K.",
TITLE = "Similarity Shuffled Criss-Cross Transformer With Angle Loss for
Image-Text Matching",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "9723-9734",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236821"}
@article{bb241906,
AUTHOR = "Chen, D. and Wang, Y.T. and Xie, Y.Z. and Chen, S.Y. and Peng, W.L. and Tang, M. and Fang, M. and Chen, C.L.P. and Li, P. and Zhang, W.",
TITLE = "Intra-modal consistency for image-text retrieval through soft-label
distillation",
JOURNAL = PR,
VOLUME = "173",
YEAR = "2026",
PAGES = "112817",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236822"}
@article{bb241907,
AUTHOR = "Shi, Z.X. and Ding, Y. and Dong, J.Y. and Zhang, T.Z.",
TITLE = "Beyond One and Two Tower: Cross-Modal Consensus Learning for
Image-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "2581-2593",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236823"}
@article{bb241908,
AUTHOR = "Wang, H.C. and Liu, L. and Zhang, H.X. and Zhu, L. and Chang, X.J. and Du, H.",
TITLE = "VisualRAG: Knowledge-Guided Retrieval Augmentation for Image-Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "1234-1248",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236824"}
@article{bb241909,
AUTHOR = "Zhang, D.L. and Wang, Z.W. and Wu, X.J. and Kittler, J.V.",
TITLE = "HACG: Leveraging Hierarchical Alignment and Caption Generation for
Text-Video Retrieval",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "1",
MONTH = "January",
PAGES = "93",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236825"}
@inproceedings{bb241910,
AUTHOR = "Vongala, M.R. and Srivastava, S. and Kosecka, J.",
TITLE = "Compositional Image-Text Matching and Retrieval by Grounding Entities",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "241-250",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236826"}
@inproceedings{bb241911,
AUTHOR = "Zhang, Z.C. and Li, X.Y. and Sun, W. and Zhang, Z.C. and Li, Y.H. and Liu, X.H. and Zhai, G.T.",
TITLE = "Leveraging Multimodal Large Language Models for Joint Discrete and
Continuous Evaluation in Text-to-Image Alignment",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "968-977",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236827"}
@inproceedings{bb241912,
AUTHOR = "Zhang, Z.J. and Zheng, X.H. and Wu, X.C. and Peng, C. and Cao, X.Z.",
TITLE = "Tokenfocus-VQA: Enhancing Text-to-Image Alignment with Position-Aware
Focus and Multi-Perspective Aggregations on LVLMs",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "1270-1279",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236828"}
@inproceedings{bb241913,
AUTHOR = "Yue, X. and Sun, J. and Lu, J. and Yao, L.C. and XIA, F. and Wang, T.Y. and Rao, F.Y. and LYU, J. and Deng, Y.",
TITLE = "Instruction-Augmented Multimodal Alignment for Image-Text and Element
Matching",
BOOKTITLE = NTIRE25,
YEAR = "2025",
PAGES = "1370-1379",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236829"}
@inproceedings{bb241914,
AUTHOR = "Lai, H. and Xiong, G.X. and Mai, H.Y. and Liu, X. and Zhang, T.Z.",
TITLE = "Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "9231-9241",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236830"}
@inproceedings{bb241915,
AUTHOR = "Kim, D. and Piergiovanni, A. and Mallya, G. and Angelova, A.",
TITLE = "VideoComp: Advancing Fine-Grained Compositional and Temporal
Alignment in Video-Text Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "29060-29070",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236831"}
@inproceedings{bb241916,
AUTHOR = "Shen, L. and Gong, G.Q. and Hao, T.X. and He, T. and Zhang, Y.F. and Liu, P.Z. and Zhao, S.C. and Han, J.G. and Ding, G.",
TITLE = "DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment
for Parameter-Efficient Video-Text Retrieval",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19702-19712",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236832"}
@inproceedings{bb241917,
AUTHOR = "Jin, Z.X. and Xu, X.W. and Wang, X.D.",
TITLE = "MADA:Multi-Window Attention and Dual-Alignment for Image-Text
Retrieval",
BOOKTITLE = ICIVC24,
YEAR = "2024",
PAGES = "240-245",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236833"}
@inproceedings{bb241918,
AUTHOR = "Xie, C.W. and Sun, S.Y. and Zhao, L.M. and Li, P. and Ma, S. and Zheng, Y.",
TITLE = "Fuseteacher: Modality-fused Encoders are Strong Vision Supervisors",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 287-304",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236834"}
@inproceedings{bb241919,
AUTHOR = "Kim, W. and Chun, S. and Kim, T. and Han, D.Y. and Yun, S.",
TITLE = "HYPE: Hyperbolic Entailment Filtering for Underspecified Images and
Texts",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XL: 247-265",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236835"}
@inproceedings{bb241920,
AUTHOR = "Sogi, N. and Shibata, T. and Terao, M.",
TITLE = "Object-aware Query Perturbation for Cross-modal Image-text Retrieval",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXIX: 447-464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236836"}
@inproceedings{bb241921,
AUTHOR = "Alper, M. and Averbuch Elor, H.",
TITLE = "Emergent Visual-semantic Hierarchies in Image-text Representations",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LII: 220-238",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236837"}
@inproceedings{bb241922,
AUTHOR = "Gordon, B. and Bitton, Y. and Shafir, Y. and Garg, R. and Chen, X. and Lischinski, D. and Cohen Or, D. and Szpektor, I.",
TITLE = "Mismatch Quest: Visual and Textual Feedback for Image-Text Misalignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVII: 310-328",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236838"}
@inproceedings{bb241923,
AUTHOR = "Hua, H. and Shi, J. and Kafle, K. and Jenni, S. and Zhang, D. and Collomosse, J. and Cohen, S. and Luo, J.B.",
TITLE = "Finematch: Aspect-based Fine-grained Image and Text Mismatch Detection
and Correction",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "IX: 474-491",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236839"}
@inproceedings{bb241924,
AUTHOR = "Li, Y.H. and Liu, H.T. and Cai, M. and Li, Y.J. and Shechtman, E. and Lin, Z. and Lee, Y.J. and Singh, K.K.",
TITLE = "Removing Distributional Discrepancies in Captions Improves Image-Text
Alignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXI: 405-422",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236840"}
@inproceedings{bb241925,
AUTHOR = "Ma, W. and Li, K. and Jiang, Z. and Meshry, M. and Liu, Q.H. and Wang, H.Y. and Hane, C. and Yuille, A.L.",
TITLE = "Rethinking Video-text Understanding: Retrieval from Counterfactually
Augmented Data",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIII: 254-269",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236841"}
@inproceedings{bb241926,
AUTHOR = "Zhang, W. and Xu, X.W. and Tao, Y. and Wang, X.D. and Wang, C.L. and Wei, Z.M.",
TITLE = "Bi-Directional Image-Text Retrieval With Position Attention and
Similarity Filtering",
BOOKTITLE = ICIVC22,
YEAR = "2022",
PAGES = "635-640",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236842"}
@inproceedings{bb241927,
AUTHOR = "Li, Z. and Nian, X.H. and Pan, C. and Yang, D. and Xiong, H.Y. and Wang, H.B.",
TITLE = "Relation Graph Reasoning for Image-Text Matching",
BOOKTITLE = ICIVC22,
YEAR = "2022",
PAGES = "319-324",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236843"}
@inproceedings{bb241928,
AUTHOR = "Zhang, K. and Mao, Z.D. and Wang, Q. and Zhang, Y.D.",
TITLE = "Negative-Aware Attention Framework for Image-Text Matching",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15640-15649",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236844"}
@inproceedings{bb241929,
AUTHOR = "Long, S. and Han, S.C. and Wan, X.J. and Poon, J.",
TITLE = "GraDual: Graph-based Dual-modal Representation for Image-Text
Matching",
BOOKTITLE = WACV22,
YEAR = "2022",
PAGES = "2463-2472",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236845"}
@inproceedings{bb241930,
AUTHOR = "Biten, A.F. and Mafla, A. and Gomez, L. and Karatzas, D.",
TITLE = "Is An Image Worth Five Sentences? A New Look into Semantics for
Image-Text Matching",
BOOKTITLE = WACV22,
YEAR = "2022",
PAGES = "2483-2492",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236846"}
@inproceedings{bb241931,
AUTHOR = "Mithun, N.C. and Pasricha, R. and Papalexakis, E. and Roy Chowdhury, A.K.",
TITLE = "Webly Supervised Image-Text Embedding with Noisy Tag Refinement",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "7454-7461",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236847"}
@inproceedings{bb241932,
AUTHOR = "Chen, J.A. and Zhang, L. and Wang, Q. and Bai, C. and Kpalma, K.",
TITLE = "Intra-Modal Constraint Loss for Image-Text Retrieval",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "4023-4027",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236848"}
@inproceedings{bb241933,
AUTHOR = "Liu, Y. and Wang, H.Q. and Meng, F.Y. and Liu, M.Y. and Liu, H.",
TITLE = "Attend, Correct and Focus: A Bidirectional Correct Attention Network
for Image-Text Matching",
BOOKTITLE = ICIP21,
YEAR = "2021",
PAGES = "2673-2677",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236849"}
@inproceedings{bb241934,
AUTHOR = "Yang, S.T. and Huang, K.H. and Howe, B.",
TITLE = "JECL: Joint Embedding and Cluster Learning for Image-Text Pairs",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "8344-8351",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236850"}
@inproceedings{bb241935,
AUTHOR = "Mikriukov, G. and Ravanbakhsh, M. and Demir, B.",
TITLE = "An Unsupervised Cross-Modal Hashing Method Robust to Noisy Training
Image-Text Correspondences in Remote Sensing",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "2556-2560",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236851"}
@inproceedings{bb241936,
AUTHOR = "Anwaar, M.U. and Labintcev, E. and Kleinsteuber, M.",
TITLE = "Compositional Learning of Image-Text Query for Image Retrieval",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "1139-1148",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236852"}
@inproceedings{bb241937,
AUTHOR = "Messina, N. and Falchi, F. and Esuli, A. and Amato, G.",
TITLE = "Transformer Reasoning Network for Image-Text Matching and Retrieval",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "5222-5229",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236853"}
@inproceedings{bb241938,
AUTHOR = "Zhang, Q. and Lei, Z. and Zhang, Z.X. and Li, S.Z.",
TITLE = "Context-Aware Attention Network for Image-Text Retrieval",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "3533-3542",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236854"}
@inproceedings{bb241939,
AUTHOR = "Chen, Y.C. and Li, L.J. and Yu, L.C. and El Kholy, A. and Ahmed, F. and Gan, Z. and Cheng, Y. and Liu, J.J.",
TITLE = "Uniter: Universal Image-Text Representation Learning",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XXX: 104-120",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236855"}
@inproceedings{bb241940,
AUTHOR = "Wang, H.R. and Zhang, Y. and Ji, Z. and Pang, Y.W. and Ma, L.",
TITLE = "Consensus-aware Visual-semantic Embedding for Image-Text Matching",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XXIV:18-34",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236856"}
@inproceedings{bb241941,
AUTHOR = "Chen, T.L. and Deng, J.J. and Luo, J.B.",
TITLE = "Adaptive Offline Quintuplet Loss for Image-text Matching",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XIII:549-565",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236857"}
@inproceedings{bb241942,
AUTHOR = "Lee, K.H. and Chen, X. and Hua, G. and Hu, H.D. and He, X.D.",
TITLE = "Stacked Cross Attention for Image-Text Matching",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "II: 212-228",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236858"}
@inproceedings{bb241943,
AUTHOR = "Plummer, B.A. and Kordas, P. and Kiapour, M.H. and Zheng, S. and Piramuthu, R. and Lazebnik, S.",
TITLE = "Conditional Image-Text Embedding Networks",
BOOKTITLE = ECCV18,
YEAR = "2018",
PAGES = "XII: 258-274",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803imt4.html#TT236859"}
@article{bb241944,
AUTHOR = "Yang, Z.Y. and Kumar, T. and Chen, T.L. and Su, J.S. and Luo, J.B.",
TITLE = "Grounding-Tracking-Integration",
JOURNAL = CirSysVideo,
VOLUME = "31",
YEAR = "2021",
NUMBER = "9",
MONTH = "September",
PAGES = "3433-3443",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236860"}
@article{bb241945,
AUTHOR = "Zhang, W.X. and Ma, C. and Wu, Q. and Yang, X.K.",
TITLE = "Language-Guided Navigation via Cross-Modal Grounding and Alternate
Adversarial Learning",
JOURNAL = CirSysVideo,
VOLUME = "31",
YEAR = "2021",
NUMBER = "9",
MONTH = "September",
PAGES = "3469-3481",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236861"}
@article{bb241946,
AUTHOR = "Zhai, S.L. and Guo, G.B. and Yuan, F.J. and Liu, Y. and Wang, X.W.",
TITLE = "VSE-fs: Fast Full-Sample Visual Semantic Embedding",
JOURNAL = IEEE_Int_Sys,
VOLUME = "36",
YEAR = "2021",
NUMBER = "4",
MONTH = "July",
PAGES = "3-12",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236862"}
@article{bb241947,
AUTHOR = "Bargal, S.A. and Zunino, A. and Petsiuk, V. and Zhang, J.M. and Saenko, K. and Murino, V. and Sclaroff, S.",
TITLE = "Guided Zoom: Zooming into Network Evidence to Refine Fine-Grained
Model Decisions",
JOURNAL = PAMI,
VOLUME = "43",
YEAR = "2021",
NUMBER = "11",
MONTH = "November",
PAGES = "4196-4202",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236863"}
@article{bb241948,
AUTHOR = "Hong, R.C. and Liu, D. and Mo, X.Y. and He, X.N. and Zhang, H.W.",
TITLE = "Learning to Compose and Reason with Language Tree Structures for
Visual Grounding",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "2",
MONTH = "February",
PAGES = "684-696",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236864"}
@inproceedings{bb241949,
AUTHOR = "Tang, K.H. and Zhang, H.W. and Wu, B.Y. and Luo, W.H. and Liu, W.",
TITLE = "Learning to Compose Dynamic Tree Structures for Visual Contexts",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6612-6621",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236865"}
@article{bb241950,
AUTHOR = "Bin, Y. and Ding, Y.J. and Peng, B. and Peng, L. and Yang, Y. and Chua, T.S.",
TITLE = "Entity Slot Filling for Visual Captioning",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "1",
MONTH = "January",
PAGES = "52-62",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236866"}
@article{bb241951,
AUTHOR = "Chu, C. and Oliveira, V. and Virgo, F.G. and Otani, M. and Garcia, N. and Nakashima, Y.",
TITLE = "The semantic typology of visually grounded paraphrases",
JOURNAL = CVIU,
VOLUME = "215",
YEAR = "2022",
PAGES = "103333",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236867"}
@article{bb241952,
AUTHOR = "Deng, C.R. and Wu, Q. and Wu, Q.Y. and Hu, F.Y. and Lyu, F. and Tan, M.K.",
TITLE = "Visual Grounding Via Accumulated Attention",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "3",
MONTH = "March",
PAGES = "1670-1684",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236868"}
@inproceedings{bb241953,
AUTHOR = "Tan, M.K. and Lyu, F. and Hu, F.Y. and Wu, Q.Y. and Wu, Q. and Deng, C.R.",
TITLE = "Visual Grounding Via Accumulated Attention",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7746-7755",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236868"}
@article{bb241954,
AUTHOR = "Yu, X.T. and Zhang, H.M. and Hong, R.X. and Song, Y.Q. and Zhang, C.S.",
TITLE = "VD-PCR: Improving visual dialog with pronoun coreference resolution",
JOURNAL = PR,
VOLUME = "125",
YEAR = "2022",
PAGES = "108540",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236869"}
@article{bb241955,
AUTHOR = "Yuan, Y.T. and Ma, L. and Wang, J.W. and Liu, W. and Zhu, W.W.",
TITLE = "Semantic Conditioned Dynamic Modulation for Temporal Sentence
Grounding in Videos",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "5",
MONTH = "May",
PAGES = "2725-2741",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236870"}
@article{bb241956,
AUTHOR = "He, S. and Yang, X.F. and Lin, G.S.",
TITLE = "Learning language to symbol and language to vision mapping for visual
grounding",
JOURNAL = IVC,
VOLUME = "122",
YEAR = "2022",
PAGES = "104451",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236871"}
@article{bb241957,
AUTHOR = "Jiang, W.H. and Zhu, M. and Fang, Y.M. and Shi, G.M. and Zhao, X.W. and Liu, Y.",
TITLE = "Visual Cluster Grounding for Image Captioning",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "3920-3934",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236872"}
@article{bb241958,
AUTHOR = "Liao, Y. and Zhang, A. and Chen, Z.Y. and Hui, T.R. and Liu, S.",
TITLE = "Progressive Language-Customized Visual Feature Learning for One-Stage
Visual Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "4266-4277",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236873"}
@article{bb241959,
AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Huang, Z.Y. and Li, X.M. and Tang, M.Q. and Liu, T.L. and Gao, X.B.",
TITLE = "Exploring Language Hierarchy for Video Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "4693-4706",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236874"}
@article{bb241960,
AUTHOR = "Xu, Z. and Chen, D. and Wei, K. and Deng, C. and Xue, H.",
TITLE = "HiSA: Hierarchically Semantic Associating for Video Temporal
Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "5178-5188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236875"}
@article{bb241961,
AUTHOR = "Gao, J.L. and Sun, X. and Ghanem, B. and Zhou, X. and Ge, S.M.",
TITLE = "Efficient Video Grounding With Which-Where Reading Comprehension",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6900-6913",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236876"}
@article{bb241962,
AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Hu, C.P. and Zhang, W.J.",
TITLE = "Thinking Inside Uncertainty: Interest Moment Perception for Diverse
Temporal Grounding",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "7190-7203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236877"}
@article{bb241963,
AUTHOR = "Tang, Z.H. and Liao, Y. and Liu, S. and Li, G.B. and Jin, X.J. and Jiang, H.X. and Yu, Q. and Xu, D.",
TITLE = "Human-Centric Spatio-Temporal Video Grounding With Visual
Transformers",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "8238-8249",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236878"}
@article{bb241964,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Causal Intervention",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "3",
MONTH = "March",
PAGES = "3933-3948",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236879"}
@article{bb241965,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Learning Uni-Modal
Associations",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6329-6340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236880"}
@article{bb241966,
AUTHOR = "Nayyeri, M. and Xu, C.J. and Alam, M.M. and Lehmann, J. and Yazdi, H.S.",
TITLE = "LogicENN: A Neural Based Knowledge Graphs Embedding Model With
Logical Rules",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "6",
MONTH = "June",
PAGES = "7050-7062",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236881"}
@article{bb241967,
AUTHOR = "Chen, T.B. and Wang, W. and Han, K. and Xu, H.J.",
TITLE = "SaGCN: Semantic-Aware Graph Calibration Network for Temporal Sentence
Grounding",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "6",
MONTH = "June",
PAGES = "3003-3016",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236882"}
@article{bb241968,
AUTHOR = "Zhang, H. and Sun, A. and Jing, W. and Zhou, J.T.Y.",
TITLE = "Temporal Sentence Grounding in Videos: A Survey and Future Directions",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "8",
MONTH = "August",
PAGES = "10443-10465",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236883"}
@article{bb241969,
AUTHOR = "Deng, J.J. and Yang, Z.Y. and Liu, D. and Chen, T.L. and Zhou, W.G. and Zhang, Y. and Li, H.Q. and Ouyang, W.L.",
TITLE = "TransVG++: End-to-End Visual Grounding With Language Conditioned
Vision Transformer",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "11",
MONTH = "November",
PAGES = "13636-13652",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236884"}
@inproceedings{bb241970,
AUTHOR = "Deng, J.J. and Yang, Z.Y. and Chen, T.L. and Zhou, W.G. and Li, H.Q.",
TITLE = "TransVG: End-to-End Visual Grounding with Transformers",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1749-1759",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236885"}
@article{bb241971,
AUTHOR = "Li, J.C. and Tang, S.L. and Zhu, L.C. and Zhang, W.Q. and Yang, Y. and Chua, T.S. and Wu, F. and Zhuang, Y.T.",
TITLE = "Variational Cross-Graph Reasoning and Adaptive Structured Semantics
Learning for Compositional Temporal Grounding",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "12601-12617",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236886"}
@inproceedings{bb241972,
AUTHOR = "Li, J.C. and Xie, J.L. and Qian, L. and Zhu, L.C. and Tang, S.L. and Wu, F. and Yang, Y. and Zhuang, Y.T. and Wang, X.E.",
TITLE = "Compositional Temporal Grounding with Structured Variational
Cross-Graph Correspondence Learning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "3022-3031",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236887"}
@article{bb241973,
AUTHOR = "Gonzalez, C. and Ayobi, N. and Hernandez, I. and Pont Tuset, J. and Arbelaez, P.",
TITLE = "PiGLET:
Pixel-Level Grounding of Language Expressions With Transformers",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "12206-12221",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236888"}
@article{bb241974,
AUTHOR = "Zhang, R.S. and Wang, C. and Liu, C.L.",
TITLE = "Cycle-Consistent Weakly Supervised Visual Grounding With Individual
and Contextual Representations",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "5167-5180",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236889"}
@article{bb241975,
AUTHOR = "Wang, Y. and Su, Y.T. and Li, W.H. and Xiao, J. and Li, X.Y. and Liu, A.A.",
TITLE = "Dual-Path Rare Content Enhancement Network for Image and Text
Matching",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "6144-6158",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236890"}
@article{bb241976,
AUTHOR = "Xu, Z. and Wei, K. and Yang, X. and Deng, C.",
TITLE = "Point-Supervised Video Temporal Grounding",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6121-6131",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236891"}
@article{bb241977,
AUTHOR = "Luo, F. and Chen, S.X. and Chen, J.J. and Wu, Z.X. and Jiang, Y.G.",
TITLE = "Self-Supervised Learning for Semi-Supervised Temporal Language
Grounding",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "7747-7757",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236892"}
@article{bb241978,
AUTHOR = "Liu, D.Z. and Fang, X. and Hu, W. and Zhou, P.",
TITLE = "Exploring Optical-Flow-Guided Motion and Detection-Based Appearance
for Temporal Sentence Grounding",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "8539-8553",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236893"}
@article{bb241979,
AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
TITLE = "Effective End-to-End Vision Language Pretraining With Semantic Visual
Loss",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "8408-8417",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236894"}
@article{bb241980,
AUTHOR = "Ma, G.Q. and Bai, Y. and Zhang, W. and Yao, T. and Shihada, B. and Mei, T.",
TITLE = "Boosting Generic Visual-Linguistic Representation With Dynamic
Contexts",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "8445-8457",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236895"}
@article{bb241981,
AUTHOR = "Su, C. and Li, Z. and Lei, T.Y. and Peng, D.Z. and Wang, X.",
TITLE = "MetaVG: A Meta-Learning Framework for Visual Grounding",
JOURNAL = SPLetters,
VOLUME = "31",
YEAR = "2024",
PAGES = "236-240",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236896"}
@article{bb241982,
AUTHOR = "Fang, X. and Liu, D. and Zhou, P. and Xu, Z.C. and Li, R.X.",
TITLE = "Hierarchical Local-Global Transformer for Temporal Sentence Grounding",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3263-3277",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236897"}
@article{bb241983,
AUTHOR = "Wang, Z.Y. and Yang, C. and Jiang, B. and Yuan, J.S.",
TITLE = "A Dual Reinforcement Learning Framework for Weakly Supervised Phrase
Grounding",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "394-405",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236898"}
@article{bb241984,
AUTHOR = "Lu, Y. and Quan, R.J. and Zhu, L.C. and Yang, Y.",
TITLE = "Zero-Shot Video Grounding With Pseudo Query Lookup and Verification",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "1643-1654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236899"}
@article{bb241985,
AUTHOR = "Wang, W.K. and Su, Y.T. and Liu, J. and Jing, P.G.",
TITLE = "Adaptive proposal network based on generative adversarial learning
for weakly supervised temporal sentence grounding",
JOURNAL = PRL,
VOLUME = "179",
YEAR = "2024",
PAGES = "9-16",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236900"}
@article{bb241986,
AUTHOR = "Liu, M. and Zhou, D. and Guo, J. and Luo, X. and Gao, Z. and Nie, L.Q.",
TITLE = "Semantic-Aware Contrastive Learning With Proposal Suppression for
Video Semantic Role Grounding",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "4",
MONTH = "April",
PAGES = "3003-3016",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236901"}
@article{bb241987,
AUTHOR = "Tang, W. and Li, L. and Liu, X.J. and Jin, L. and Tang, J.H. and Li, Z.C.",
TITLE = "Context Disentangling and Prototype Inheriting for Robust Visual
Grounding",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "3213-3229",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236902"}
@article{bb241988,
AUTHOR = "Shi, F.Y. and Huang, W.L. and Wang, L.M.",
TITLE = "End-to-end dense video grounding via parallel regression",
JOURNAL = CVIU,
VOLUME = "242",
YEAR = "2024",
PAGES = "103980",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236903"}
@article{bb241989,
AUTHOR = "Shao, R. and Wu, T.X. and Wu, J.L. and Nie, L.Q. and Liu, Z.W.",
TITLE = "Detecting and Grounding Multi-Modal Media Manipulation and Beyond",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "8",
MONTH = "August",
PAGES = "5556-5574",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236904"}
@inproceedings{bb241990,
AUTHOR = "Shao, R. and Wu, T.X. and Liu, Z.W.",
TITLE = "Detecting and Grounding Multi-Modal Media Manipulation",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6904-6913",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236905"}
@article{bb241991,
AUTHOR = "Chen, L. and Deng, Z. and Liu, L.B. and Yin, S.",
TITLE = "Multilevel Semantic Interaction Alignment for Video-Text Cross-Modal
Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "7",
MONTH = "July",
PAGES = "6559-6575",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236906"}
@article{bb241992,
AUTHOR = "Wu, Q.Q. and Guo, L.J. and Zhang, R. and Qian, J.B. and Gao, S.",
TITLE = "QSMT-net: A query-sensitive proposal and multi-temporal-span matching
network for video grounding",
JOURNAL = IVC,
VOLUME = "149",
YEAR = "2024",
PAGES = "105188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236907"}
@article{bb241993,
AUTHOR = "Wu, W. and Cao, M. and Hu, Y. and Peng, Y. and Qin, L. and Yin, Q.",
TITLE = "Visual Grounding With Dual Knowledge Distillation",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "10399-10410",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236908"}
@article{bb241994,
AUTHOR = "Li, S.T. and Li, B. and Sun, B. and Weng, Y.X.",
TITLE = "Towards Visual-Prompt Temporal Answer Grounding in Instructional
Video",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "12",
MONTH = "December",
PAGES = "8836-8853",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236909"}
@inproceedings{bb241995,
AUTHOR = "Fang, X. and Xiong, Z. and Fang, W.L. and Qu, X.Y. and Chen, C. and Dongd, J.F. and Tang, K. and Zhou, P. and Cheng, Y. and Liu, D.Z.",
TITLE = "Rethinking Weakly-supervised Video Temporal Grounding From a Game
Perspective",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLV: 290-311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236910"}
@article{bb241996,
AUTHOR = "Xiong, Z. and Liu, D.Z. and Fang, X. and Qu, X.Y. and Dong, J.F. and Zhu, J.H. and Tang, K. and Zhou, P.",
TITLE = "Rethinking Video Sentence Grounding from a Tracking Perspective With
Memory Network and Masked Attention",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "11204-11218",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236911"}
@article{bb241997,
AUTHOR = "Qi, Z.B. and Yuan, Y. and Ruan, X.W. and Wang, S.H. and Zhang, W.G. and Huang, Q.M.",
TITLE = "Collaborative Debias Strategy for Temporal Sentence Grounding in
Video",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "11",
MONTH = "November",
PAGES = "10972-10986",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236912"}
@article{bb241998,
AUTHOR = "Dong, J.X. and Yin, Z.Z.",
TITLE = "Graph-based Dense Event Grounding with relative positional encoding",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104257",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236913"}
@article{bb241999,
AUTHOR = "Tang, K.F. and He, L.H. and Wang, N.N. and Gao, X.B.",
TITLE = "Dual Semantic Reconstruction Network for Weakly Supervised Temporal
Sentence Grounding",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "95-107",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT236914"}
Last update:Mar 28, 2026 at 17:09:41