@inproceedings{bb243000,
AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H.W. and Qiu, Q.R.",
TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language
Queries at Phrase Level",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "8275-8282",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237914"}
@inproceedings{bb243001,
AUTHOR = "Koh, J.Y. and Baldridge, J. and Lee, H.L. and Yang, Y.F.",
TITLE = "Text-to-Image Generation Grounded by Fine-Grained User Attention",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "237-246",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237915"}
@inproceedings{bb243002,
AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10414-10424",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237916"}
@inproceedings{bb243003,
AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XVIII:353-370",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237917"}
@inproceedings{bb243004,
AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "III:752-768",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237918"}
@inproceedings{bb243005,
AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XIX:589-605",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237919"}
@inproceedings{bb243006,
AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
TITLE = "Visual Relation Grounding in Videos",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "VI:447-464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237920"}
@inproceedings{bb243007,
AUTHOR = "Mun, J. and Cho, M. and Han, B.",
TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10807-10816",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237921"}
@inproceedings{bb243008,
AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10213-10222",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237922"}
@inproceedings{bb243009,
AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
TITLE = "Object Grounding via Iterative Context Reasoning",
BOOKTITLE = MDALC19,
YEAR = "2019",
PAGES = "1407-1415",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237923"}
@inproceedings{bb243010,
AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "2601-2610",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237924"}
@inproceedings{bb243011,
AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6371-6381",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237925"}
@inproceedings{bb243012,
AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4252-4261",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237926"}
@inproceedings{bb243013,
AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.",
TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XIV:387-404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237927"}
@inproceedings{bb243014,
AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4672-4681",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237928"}
@inproceedings{bb243015,
AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4693-4702",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237929"}
@inproceedings{bb243016,
AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4682-4692",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237930"}
@inproceedings{bb243017,
AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
TITLE = "Generating Descriptions with Grounded and Co-referenced People",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "4196-4206",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237931"}
@article{bb243018,
AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Huang, Z.Y. and Li, X.M. and Tang, M.Q. and Liu, T.L. and Gao, X.B.",
TITLE = "Exploring Language Hierarchy for Video Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "4693-4706",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237932"}
@article{bb243019,
AUTHOR = "Xu, Z. and Chen, D. and Wei, K. and Deng, C. and Xue, H.",
TITLE = "HiSA: Hierarchically Semantic Associating for Video Temporal
Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "5178-5188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237933"}
@article{bb243020,
AUTHOR = "Gao, J.L. and Sun, X. and Ghanem, B. and Zhou, X. and Ge, S.M.",
TITLE = "Efficient Video Grounding With Which-Where Reading Comprehension",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6900-6913",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237934"}
@article{bb243021,
AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Hu, C.P. and Zhang, W.J.",
TITLE = "Thinking Inside Uncertainty: Interest Moment Perception for Diverse
Temporal Grounding",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "7190-7203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237935"}
@article{bb243022,
AUTHOR = "Tang, Z.H. and Liao, Y. and Liu, S. and Li, G.B. and Jin, X.J. and Jiang, H.X. and Yu, Q. and Xu, D.",
TITLE = "Human-Centric Spatio-Temporal Video Grounding With Visual
Transformers",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "8238-8249",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237936"}
@article{bb243023,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Causal Intervention",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "3",
MONTH = "March",
PAGES = "3933-3948",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237937"}
@article{bb243024,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Learning Uni-Modal
Associations",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6329-6340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237938"}
@article{bb243025,
AUTHOR = "Xu, Z. and Wei, K. and Yang, X. and Deng, C.",
TITLE = "Point-Supervised Video Temporal Grounding",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6121-6131",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237939"}
@article{bb243026,
AUTHOR = "Lu, Y. and Quan, R.J. and Zhu, L.C. and Yang, Y.",
TITLE = "Zero-Shot Video Grounding With Pseudo Query Lookup and Verification",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "1643-1654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237940"}
@article{bb243027,
AUTHOR = "Shi, F.Y. and Huang, W.L. and Wang, L.M.",
TITLE = "End-to-end dense video grounding via parallel regression",
JOURNAL = CVIU,
VOLUME = "242",
YEAR = "2024",
PAGES = "103980",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237941"}
@article{bb243028,
AUTHOR = "Xiong, Z. and Liu, D.Z. and Fang, X. and Qu, X.Y. and Dong, J.F. and Zhu, J.H. and Tang, K. and Zhou, P.",
TITLE = "Rethinking Video Sentence Grounding from a Tracking Perspective With
Memory Network and Masked Attention",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "11204-11218",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237942"}
@inproceedings{bb243029,
AUTHOR = "Fang, X. and Xiong, Z. and Fang, W.L. and Qu, X.Y. and Chen, C. and Dongd, J.F. and Tang, K. and Zhou, P. and Cheng, Y. and Liu, D.Z.",
TITLE = "Rethinking Weakly-supervised Video Temporal Grounding From a Game
Perspective",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLV: 290-311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237943"}
@article{bb243030,
AUTHOR = "Wu, Q.Q. and Guo, L.J. and Zhang, R. and Qian, J.B. and Gao, S.",
TITLE = "QSMT-net: A query-sensitive proposal and multi-temporal-span matching
network for video grounding",
JOURNAL = IVC,
VOLUME = "149",
YEAR = "2024",
PAGES = "105188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237944"}
@article{bb243031,
AUTHOR = "Dong, J.X. and Yin, Z.Z.",
TITLE = "Graph-based Dense Event Grounding with relative positional encoding",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104257",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237945"}
@article{bb243032,
AUTHOR = "Tang, K.F. and He, L.H. and Wang, N.N. and Gao, X.B.",
TITLE = "Dual Semantic Reconstruction Network for Weakly Supervised Temporal
Sentence Grounding",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "95-107",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237946"}
@article{bb243033,
AUTHOR = "Liu, K. and Qu, M.X. and Liu, Y. and Wei, Y.C. and Zhe, W.M. and Zhao, Y. and Liu, W.",
TITLE = "Single-Frame Supervision for Spatio-Temporal Video Grounding",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "5177-5191",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237947"}
@article{bb243034,
AUTHOR = "Hu, J.J. and Guo, D. and Li, K. and Si, Z. and Yang, X. and Chang, X.J. and Wang, M.",
TITLE = "Unified Static and Dynamic Network: Efficient Temporal Filtering for
Video Grounding",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "6445-6462",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237948"}
@article{bb243035,
AUTHOR = "Ran, R. and Wei, J. and He, S.Y. and Zhou, Y.Y. and Wang, P. and Yang, Y. and Shen, H.T.",
TITLE = "Fine-Grained Alignment and Interaction for Video Grounding With
Cross-Modal Semantic Hierarchical Graph",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11641-11654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237949"}
@article{bb243036,
AUTHOR = "Wang, M.Z. and Li, H.F. and Zhang, Y.F. and Li, J.X. and Tao, D.P. and Yu, Z.T.",
TITLE = "Disentangling Inter- and Intra-Video Relations for Multi-Event
Video-Text Retrieval and Grounding",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "7558-7571",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237950"}
@article{bb243037,
AUTHOR = "Yang, J. and Wei, P.",
TITLE = "Learning unified patterns of multimodalities for video temporal
grounding",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112484",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237951"}
@article{bb243038,
AUTHOR = "Liu, Y. and Zheng, M.H. and Chen, Q.C. and Gong, S.G. and Peng, Y.X.",
TITLE = "Large-Scale Pre-Trained Models Empowering Phrase Generalization in
Temporal Sentence Localization",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "53",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237952"}
@inproceedings{bb243039,
AUTHOR = "Zheng, M.H. and Cai, X.H. and Chen, Q.C. and Peng, Y.X. and Liu, Y.",
TITLE = "Training-Free Video Temporal Grounding Using Large-Scale Pre-Trained
Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXII: 20-37",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237953"}
@article{bb243040,
AUTHOR = "Li, A. and Liu, H.J. and Zhu, Y.Q. and Ge, Y.X.",
TITLE = "Efficient Pre-Trained Semantics Refinement for Video Temporal
Grounding",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "1406-1418",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237954"}
@article{bb243041,
AUTHOR = "Moon, W.J. and Hyun, S. and Lee, S. and Heo, J.P.",
TITLE = "Correlation-guided calibration of query dependency for video temporal
grounding",
JOURNAL = PR,
VOLUME = "174",
YEAR = "2026",
PAGES = "112984",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237955"}
@inproceedings{bb243042,
AUTHOR = "Cao, Z. and Zhang, B.Q. and Du, H.M. and Yu, X. and Li, X. and Wang, S.",
TITLE = "FlashVTG: Feature Layering and Adaptive Score Handling Network for
Video Temporal Grounding",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "9226-9236",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237956"}
@inproceedings{bb243043,
AUTHOR = "Weerakoon, D. and Subbaraju, V. and Lim, J.H. and Misra, A.",
TITLE = "NeuroViG:
Integrating Event Cameras for Resource-Efficient Video Grounding",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5781-5790",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237957"}
@inproceedings{bb243044,
AUTHOR = "Jin, Y. and Mu, Y.D.",
TITLE = "Weakly-supervised Spatio-temporal Video Grounding with Variational
Cross-modal Alignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 412-429",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237958"}
@inproceedings{bb243045,
AUTHOR = "Fujiwara, K. and Tanaka, M. and Yu, Q.",
TITLE = "Chronologically Accurate Retrieval for Temporal Grounding of
Motion-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVIII: 323-339",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237959"}
@inproceedings{bb243046,
AUTHOR = "Bao, P.J. and Shao, Z. and Yang, W.H. and Ng, B.P. and Kot, A.C.",
TITLE = "E3m: Zero-shot Spatio-temporal Video Grounding with
Expectation-maximization Multimodal Modulation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXIII: 227-243",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237960"}
@inproceedings{bb243047,
AUTHOR = "Hannan, T. and Islam, M.M. and Seidl, T. and Bertasius, G.",
TITLE = "RGNET: A Unified Clip Retrieval and Grounding Network for Long Videos",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXI: 352-369",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237961"}
@inproceedings{bb243048,
AUTHOR = "Gu, X. and Fan, H. and Huang, Y. and Luo, T.J. and Zhang, L.B.",
TITLE = "Context-Guided Spatio-Temporal Video Grounding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18330-18339",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237962"}
@inproceedings{bb243049,
AUTHOR = "Chen, B. and Shvetsova, N. and Rouditchenko, A. and Kondermann, D. and Thomas, S. and Chang, S.F. and Feris, R. and Glass, J. and Kuehne, H.",
TITLE = "What, When, and Where? Self-Supervised Spatio- Temporal Grounding in
Untrimmed Multi-Action Videos from Narrated Instructions",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18419-18429",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237963"}
@inproceedings{bb243050,
AUTHOR = "Wasim, S.T. and Naseer, M. and Khan, S. and Yang, M.H. and Khan, F.S.",
TITLE = "VideoGrounding-DINO: Towards Open-Vocabulary Spatio- Temporal Video
Grounding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18909-18918",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237964"}
@inproceedings{bb243051,
AUTHOR = "de la Jara, I.M. and Rodriguez Opazo, C. and Marrese Taylor, E. and Bravo Marquez, F.",
TITLE = "An empirical study of the effect of video encoders on Temporal Video
Grounding",
BOOKTITLE = CLVL23,
YEAR = "2023",
PAGES = "2842-2847",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237965"}
@inproceedings{bb243052,
AUTHOR = "Li, H.X. and Cao, M. and Cheng, X. and Li, Y.W. and Zhu, Z.H. and Zou, Y.X.",
TITLE = "G2L: Semantically Aligned and Uniform Video Grounding via Geodesic
and Game Theory",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "11998-12008",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237966"}
@inproceedings{bb243053,
AUTHOR = "Li, H. and Shu, X.J. and He, S. and Qiao, R.Z. and Wen, W. and Guo, T. and Gan, B. and Sun, X.",
TITLE = "D3G: Exploring Gaussian Prior for Temporal Sentence Grounding with
Glance Annotation",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "13688-13700",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237967"}
@inproceedings{bb243054,
AUTHOR = "Pan, Y.L. and He, X.T. and Gong, B. and Lv, Y.L. and Shen, Y.J. and Peng, Y.X. and Zhao, D.L.",
TITLE = "Scanning Only Once: An End-to-end Framework for Fast Temporal
Grounding in Long Videos",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "13721-13731",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237968"}
@inproceedings{bb243055,
AUTHOR = "Jang, J. and Park, J. and Kim, J. and Kwon, H. and Sohn, K.H.",
TITLE = "Knowing Where to Focus: Event-aware Transformer for Video Grounding",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "13800-13810",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237969"}
@inproceedings{bb243056,
AUTHOR = "Cao, M. and Wei, F.Y. and Xu, C. and Geng, X. and Chen, L. and Zhang, C. and Zou, Y.X. and Shen, T. and Jiang, D.X.",
TITLE = "Iterative Proposal Refinement for Weakly-Supervised Video Grounding",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6524-6534",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237970"}
@inproceedings{bb243057,
AUTHOR = "Lu, Z.J. and Iftekhar, A.S.M. and Mittal, G. and Meng, T.J. and Wang, X. and Zhao, C. and Kukkala, R. and Elhamifar, E. and Chen, M.",
TITLE = "DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in
Long Videos",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24066-24076",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237971"}
@inproceedings{bb243058,
AUTHOR = "Wang, L. and Mittal, G. and Sajeev, S. and Yu, Y. and Hall, M. and Boddeti, V.N. and Chen, M.",
TITLE = "ProTéGé: Untrimmed Pretraining for Video Temporal Grounding by Video
Temporal Grounding",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6575-6585",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237972"}
@inproceedings{bb243059,
AUTHOR = "Chen, J. and Gao, D.F. and Lin, K.Q.H. and Shou, M.Z.",
TITLE = "Affordance Grounding from Demonstration Video to Target Image",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "6799-6808",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237973"}
@inproceedings{bb243060,
AUTHOR = "Zhang, Y.M. and Chen, X. and Jia, J.H. and Liu, S. and Ding, K.",
TITLE = "Text-Visual Prompting for Efficient 2D Temporal Video Grounding",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "14794-14804",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237974"}
@inproceedings{bb243061,
AUTHOR = "Li, M.Z. and Wang, H. and Zhang, W.Q. and Miao, J.X. and Zhao, Z. and Zhang, S.Y. and Ji, W. and Wu, F.",
TITLE = "WINNER: Weakly-supervised hIerarchical decompositioN and aligNment
for spatio-tEmporal video gRounding",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23090-23099",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237975"}
@inproceedings{bb243062,
AUTHOR = "Lin, Z.H. and Tan, C.L. and Hu, J.F. and Jin, Z. and Ye, T. and Zheng, W.S.",
TITLE = "Collaborative Static and Dynamic Vision-Language Streams for
Spatio-Temporal Video Grounding",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23100-23109",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237976"}
@inproceedings{bb243063,
AUTHOR = "Yang, L. and Kong, Q. and Yang, H.K. and Kehl, W. and Sato, Y. and Kobori, N.",
TITLE = "DeCo: Decomposition and Reconstruction for Compositional Temporal
Grounding via Coarse-to-Fine Contrastive Ranking",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23130-23140",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237977"}
@inproceedings{bb243064,
AUTHOR = "Kim, D. and Park, J. and Lee, J.Y. and Park, S. and Sohn, K.H.",
TITLE = "Language-free Training for Zero-shot Video Grounding",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "2538-2547",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237978"}
@inproceedings{bb243065,
AUTHOR = "Dvornik, N. and Hadji, I. and Pham, H. and Bhatt, D. and Martinez, B. and Fazly, A. and Jepson, A.D.",
TITLE = "Flow Graph to Video Grounding for Weakly-Supervised Multi-step
Localization",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXV:319-335",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237979"}
@inproceedings{bb243066,
AUTHOR = "Xiong, Z. and Liu, D. and Zhou, P.",
TITLE = "Gaussian Kernel-Based Cross Modal Network for Spatio-Temporal Video
Grounding",
BOOKTITLE = ICIP22,
YEAR = "2022",
PAGES = "2481-2485",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237980"}
@inproceedings{bb243067,
AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Cheng, D. and Li, X.M. and Huang, Z.Y. and Tang, M.Q. and Gao, X.B.",
TITLE = "Support-Set Based Cross-Supervision for Video Grounding",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "11553-11562",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237981"}
@inproceedings{bb243068,
AUTHOR = "Su, R. and Yu, Q. and Xu, D.",
TITLE = "STVGBert: A Visual-linguistic Transformer based Framework for
Spatio-temporal Video Grounding",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1513-1522",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237982"}
@inproceedings{bb243069,
AUTHOR = "Soldan, M. and Xu, M.M. and Qu, S. and Tegner, J. and Ghanem, B.",
TITLE = "VLG-Net: Video-Language Graph Matching Network for Video Grounding",
BOOKTITLE = CVEU21,
YEAR = "2021",
PAGES = "3217-3227",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237983"}
@inproceedings{bb243070,
AUTHOR = "Nan, G.S. and Qiao, R. and Xiao, Y. and Liu, J. and Leng, S.C. and Zhang, H. and Lu, W.",
TITLE = "Interventional Video Grounding with Dual Contrastive Learning",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "2764-2774",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237984"}
@inproceedings{bb243071,
AUTHOR = "Zhao, Y. and Zhao, Z. and Zhang, Z. and Lin, Z.J.",
TITLE = "Cascaded Prediction Network via Segment Tree for Temporal Video
Grounding",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "4195-4204",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237985"}
@inproceedings{bb243072,
AUTHOR = "Zhang, Z. and Zhao, Z. and Zhao, Y. and Wang, Q. and Liu, H. and Gao, L.",
TITLE = "Where Does It Exist: Spatio-Temporal Video Grounding for Multi-Form
Sentences",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10665-10674",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237986"}
@inproceedings{bb243073,
AUTHOR = "Zeng, R.H. and Xu, H.M. and Huang, W.B. and Chen, P.H. and Tan, M.K. and Gan, C.",
TITLE = "Dense Regression Network for Video Grounding",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10284-10293",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237987"}
@inproceedings{bb243074,
AUTHOR = "Shi, J. and Xu, J. and Gong, B.Q. and Xu, C.L.",
TITLE = "Not All Frames Are Equal: Weakly-Supervised Video Grounding With
Contextual Similarity and Visual Clustering Losses",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "10436-10444",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237988"}
@article{bb243075,
AUTHOR = "Wang, Y.C. and Deng, J.J. and Zhou, W.G. and Li, H.Q.",
TITLE = "Weakly Supervised Temporal Adjacent Network for Language Grounding",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "3276-3286",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237989"}
@article{bb243076,
AUTHOR = "Tang, H.Y. and Zhu, J. and Wang, L. and Zheng, Q.H. and Zhang, T.W.",
TITLE = "Multi-Level Query Interaction for Temporal Language Grounding",
JOURNAL = ITS,
VOLUME = "23",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "25479-25488",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237990"}
@article{bb243077,
AUTHOR = "Zeng, Y.W. and Han, N. and Pan, K.Y. and Jin, Q.",
TITLE = "Temporally Language Grounding With Multi-Modal Multi-Prompt Tuning",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3366-3377",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237991"}
@article{bb243078,
AUTHOR = "Zhang, T. and Lu, X.K. and Zhang, H. and Nie, X.S. and Yin, Y.L. and Shen, J.B.",
TITLE = "Relational Network via Cascade CRF for Video Language Grounding",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "8297-8311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237992"}
@article{bb243079,
AUTHOR = "Dong, J.X. and Yin, Z.Z.",
TITLE = "Annotation-Efficient Hybrid Learning for Temporal Sentence Grounding",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "2594-2606",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237993"}
@inproceedings{bb243080,
AUTHOR = "Shen, S. and Zhu, Z. and Fan, L.Q. and Zhang, H. and Wu, X.X.",
TITLE = "DiffCLIP: Leveraging Stable Diffusion for Language Grounded 3D
Classification",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "3584-3593",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237994"}
@inproceedings{bb243081,
AUTHOR = "Dong, P.J. and Yang, X.F. and Wang, Q. and Li, Z.X. and Li, T. and Chu, X.W.",
TITLE = "Multi-task Domain Adaptation for Language Grounding with 3d Objects",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXXIV: 387-404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237995"}
@inproceedings{bb243082,
AUTHOR = "Hegde, D. and Valanarasu, J.M.J. and Patel, V.M.",
TITLE = "CLIP goes 3D: Leveraging Prompt Tuning for Language Grounded 3D
Recognition",
BOOKTITLE = OpenSUN3D,
PAGES = "2020-2030",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237996"}
@inproceedings{bb243083,
AUTHOR = "Jain, A. and Gkanatsios, N. and Mediratta, I. and Fragkiadaki, K.",
TITLE = "Bottom Up Top Down Detection Transformers for Language Grounding in
Images and Point Clouds",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVI:417-433",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237997"}
@inproceedings{bb243084,
AUTHOR = "Heisler, M. and Banitalebi Dehkordi, A. and Zhang, Y.",
TITLE = "SemAug: Semantically Meaningful Image Augmentations for Object
Detection Through Language Grounding",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVI:610-626",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237998"}
@inproceedings{bb243085,
AUTHOR = "Soldan, M. and Pardo, A. and Alcazar, J.L. and Heilbron, F.C. and Zhao, C. and Giancola, S. and Ghanem, B.",
TITLE = "MAD: A Scalable Dataset for Language Grounding in Videos from Movie
Audio Descriptions",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5016-5025",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT237999"}
@inproceedings{bb243086,
AUTHOR = "Prabhudesai, M. and Tung, H.Y.F. and Javed, S.A. and Sieb, M. and Harley, A.W. and Fragkiadaki, K.",
TITLE = "Embodied Language Grounding With 3D Visual Feature Representations",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "2217-2226",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT238000"}
@inproceedings{bb243087,
AUTHOR = "Bajaj, M. and Wang, L. and Sigal, L.",
TITLE = "G3raphGround: Graph-Based Language Grounding",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4280-4289",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803lagr3.html#TT238001"}
@article{bb243088,
AUTHOR = "Chen, Z.X. and Bie, Y. and Jin, H.B. and Chen, H.",
TITLE = "Large Language Model With Region-Guided Referring and Grounding for
CT Report Generation",
JOURNAL = MedImg,
VOLUME = "44",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "3139-3150",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238002"}
@article{bb243089,
AUTHOR = "Liu, Y. and Hou, H.W. and Ma, F. and Ni, S.G. and Yu, F.R.",
TITLE = "MLLM-TA: Leveraging Multimodal Large Language Models for Precise
Temporal Video Grounding",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "281-285",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238003"}
@article{bb243090,
AUTHOR = "Li, G.Z. and Ding, X.P. and Cheng, D. and Li, J. and Wang, N.N. and Gao, X.B.",
TITLE = "ETC: Temporal Boundary Expand Then Clarify for Weakly Supervised
Video Grounding With Multimodal Large Language Model",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "1772-1782",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238004"}
@article{bb243091,
AUTHOR = "Wu, J.L. and Liu, W. and Liu, Y. and Liu, M. and Nie, L.Q. and Lin, Z.C. and Chen, C.W.",
TITLE = "A Survey on Video Temporal Grounding With Multimodal Large Language
Model",
JOURNAL = PAMI,
VOLUME = "48",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "1521-1541",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238005"}
@article{bb243092,
AUTHOR = "Wang, P. and Liang, Y.X. and Cen, Y.G. and Cen, L.H. and Qu, Z. and Liu, J.L. and Kan, S.C.",
TITLE = "Integrating spatial features and dynamically learned temporal
features via contrastive learning for video temporal grounding in LLM",
JOURNAL = IVC,
VOLUME = "167",
YEAR = "2026",
PAGES = "105895",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238006"}
@inproceedings{bb243093,
AUTHOR = "Liu, Y. and Jiang, L. and Li, G.M. and Ye, X.Z. and Ouyang, Y.",
TITLE = "YOLO-VG: Enhancing Multi-Stage Feature Interaction for Visual
Grounding",
BOOKTITLE = ICIP25,
YEAR = "2025",
PAGES = "469-473",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238007"}
@inproceedings{bb243094,
AUTHOR = "Gao, J. and Li, Y.Q. and Cao, Z.Q. and Li, W.J.",
TITLE = "Interleaved-Modal Chain-of-Thought",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "19520-19529",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238008"}
@inproceedings{bb243095,
AUTHOR = "Yu, C.L. and Wang, H.Q. and Shi, Y. and Luo, H.Y. and Yang, S. and Yu, J.Y. and Wang, J.Y.",
TITLE = "SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large
Language Model",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "1691-1701",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238009"}
@inproceedings{bb243096,
AUTHOR = "Huang, Y. and Gao, T.Y. and Xu, H.R. and Zhao, Q.H. and Song, Y. and Gui, Z.P. and Lv, T.C. and Chen, H. and Cui, L. and Li, S. and Wei, F.",
TITLE = "PEACE: Empowering Geologic Map Holistic Understanding with MLLMs",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3899-3908",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238010"}
@inproceedings{bb243097,
AUTHOR = "Chen, W.B. and Xu, Z. and Xu, R. and Wu, S. and Wong, H.S.",
TITLE = "Task-aware Cross-modal Feature Refinement Transformer with Large
Language Models for Visual Grounding",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3931-3941",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238011"}
@inproceedings{bb243098,
AUTHOR = "Wu, S. and Jin, S. and Zhang, W.W. and Xu, L. and Liu, W.T. and Li, W. and Loy, C.C.",
TITLE = "F-LMM: Grounding Frozen Large Multimodal Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24710-24721",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238012"}
@inproceedings{bb243099,
AUTHOR = "Qian, R. and Yin, X. and Dou, D.",
TITLE = "Reasoning to Attend: Try to Understand How Token Works",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "24722-24731",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llmgr4.html#TT238013"}
Last update:May 24, 2026 at 14:46:09