@inproceedings{bb242700,
AUTHOR = "Ge, Y.Y. and Ge, Y.X. and Liu, X.H. and Wang, J.P. and Wu, J.P. and Shan, Y. and Qie, X. and Luo, P.",
TITLE = "MILES: Visual BERT Pre-training with Injected Language Semantics for
Video-Text Retrieval",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXV:691-708",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237614"}
@inproceedings{bb242701,
AUTHOR = "Wang, A.J.P. and Ge, Y.X. and Cai, G. and Yan, R. and Lin, X.D. and Shan, Y. and Qie, X. and Shou, M.Z.",
TITLE = "Object-aware Video-language Pre-training for Retrieval",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "3303-3312",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237615"}
@inproceedings{bb242702,
AUTHOR = "Li, D.X. and Li, J.N. and Li, H.D. and Niebles, J.C. and Hoi, S.C.H.",
TITLE = "Align and Prompt: Video-and-Language Pre-training with Entity Prompts",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "4943-4953",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237616"}
@inproceedings{bb242703,
AUTHOR = "Xue, H.W. and Hang, T. and Zeng, Y.H. and Sun, Y.C. and Liu, B. and Yang, H. and Fu, J.L. and Guo, B.N.",
TITLE = "Advancing High-Resolution Video-Language Representation with
Large-Scale Video Transcriptions",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5026-5035",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237617"}
@inproceedings{bb242704,
AUTHOR = "Sammani, F. and Mukherjee, T. and Deligiannis, N.",
TITLE = "NLX-GPT: A Model for Natural Language Explanations in Vision and
Vision-Language Tasks",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "8312-8322",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237618"}
@inproceedings{bb242705,
AUTHOR = "Dou, Z.Y. and Xu, Y.C. and Gan, Z. and Wang, J.F. and Wang, S.H. and Wang, L.J. and Zhu, C.G. and Zhang, P.C. and Yuan, L. and Peng, N. and Liu, Z.C. and Zeng, M.",
TITLE = "An Empirical Study of Training End-to-End Vision-and-Language
Transformers",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "18145-18155",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237619"}
@inproceedings{bb242706,
AUTHOR = "Xu, Z.P. and Lin, T.W. and Tang, H. and Li, F. and He, D.L. and Sebe, N. and Timofte, R. and Van Gool, L.J. and Ding, E.",
TITLE = "Predict, Prevent, and Evaluate: Disentangled Text-Driven Image
Manipulation Empowered by Pre-Trained Vision-Language Model",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "18208-18217",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237620"}
@inproceedings{bb242707,
AUTHOR = "Du, Y. and Wei, F.Y. and Zhang, Z.H. and Shi, M.J. and Gao, Y. and Li, G.Q.",
TITLE = "Learning to Prompt for Open-Vocabulary Object Detection with
Vision-Language Model",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "14064-14073",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237621"}
@inproceedings{bb242708,
AUTHOR = "Chang, Y.S. and Cao, G.H. and Narang, M. and Gao, J.F. and Suzuki, H. and Bisk, Y.",
TITLE = "WebQA: Multihop and Multimodal QA",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16474-16483",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237622"}
@inproceedings{bb242709,
AUTHOR = "Zellers, R. and Lu, J. and Lu, X.M. and Yu, Y. and Zhao, Y.P. and Salehi, M. and Kusupati, A. and Hessel, J. and Farhadi, A. and Choi, Y.",
TITLE = "MERLOT RESERVE:
Neural Script Knowledge through Vision and Language and Sound",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16354-16366",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237623"}
@inproceedings{bb242710,
AUTHOR = "Gupta, T. and Kamath, A. and Kembhavi, A. and Hoiem, D.",
TITLE = "Towards General Purpose Vision Systems:
An End-to-End Task-Agnostic Vision-Language Architecture",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16378-16388",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237624"}
@inproceedings{bb242711,
AUTHOR = "Suris, D. and Epstein, D. and Vondrick, C.",
TITLE = "Globetrotter: Connecting Languages by Connecting Images",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16453-16463",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237625"}
@inproceedings{bb242712,
AUTHOR = "Sung, Y.L. and Cho, J. and Bansal, M.",
TITLE = "VL-ADAPTER: Parameter-Efficient Transfer Learning for
Vision-and-Language Tasks",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "5217-5227",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237626"}
@inproceedings{bb242713,
AUTHOR = "Wu, D.M. and Dong, X.P. and Shao, L. and Shen, J.B.",
TITLE = "Multi-Level Representation Learning with Semantic Alignment for
Referring Video Object Segmentation",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "4986-4995",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237627"}
@inproceedings{bb242714,
AUTHOR = "Gao, K. and Chen, L. and Niu, Y. and Shao, J. and Xiao, J.",
TITLE = "Classification-Then-Grounding: Reformulating Video Scene Graphs as
Temporal Bipartite Graphs",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19475-19484",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237628"}
@inproceedings{bb242715,
AUTHOR = "Kesen, I. and Can, O.A. and Erdem, E. and Erdem, A. and Yuret, D.",
TITLE = "Modulating Bottom-Up and Top-Down Visual Processing via
Language-Conditional Filters",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4609-4619",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237629"}
@inproceedings{bb242716,
AUTHOR = "Nebbia, G. and Kovashka, A.",
TITLE = "Doubling down: sparse grounding with an additional, almost-matching
caption for detection-oriented multimodal pretraining",
BOOKTITLE = MULA22,
YEAR = "2022",
PAGES = "4641-4650",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237630"}
@inproceedings{bb242717,
AUTHOR = "Ye, J. and Tian, J.F. and Yan, M. and Yang, X.S. and Wang, X. and Zhang, J. and He, L. and Lin, X.",
TITLE = "Shifting More Attention to Visual Backbone: Query-modulated
Refinement Networks for End-to-End Visual Grounding",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15481-15491",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237631"}
@inproceedings{bb242718,
AUTHOR = "Jiang, H.J. and Lin, Y.Z. and Han, D.C. and Song, S. and Huang, G.",
TITLE = "Pseudo-Q: Generating Pseudo Language Queries for Visual Grounding",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15492-15502",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237632"}
@inproceedings{bb242719,
AUTHOR = "Huang, S. and Chen, Y.L. and Jia, J.Y. and Wang, L.W.",
TITLE = "Multi-View Transformer for 3D Visual Grounding",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15503-15512",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237633"}
@inproceedings{bb242720,
AUTHOR = "Chen, S. and Li, B.",
TITLE = "Multi-Modal Dynamic Graph Transformer for Visual Grounding",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15513-15522",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237634"}
@inproceedings{bb242721,
AUTHOR = "Mavroudi, E. and Vidal, R.",
TITLE = "Weakly-Supervised Generation and Grounding of Visual Descriptions
with Conditional Generative Models",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15523-15533",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237635"}
@inproceedings{bb242722,
AUTHOR = "Chen, S. and Zhao, Q.",
TITLE = "REX: Reasoning-aware and Grounded Explanation",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15565-15574",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237636"}
@inproceedings{bb242723,
AUTHOR = "Lou, C. and Han, W.J. and Lin, Y. and Zheng, Z.L.",
TITLE = "Unsupervised Vision-Language Parsing: Seamlessly Bridging Visual
Scene Graphs with Language Structures via Dependency Relationships",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "15586-15595",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237637"}
@inproceedings{bb242724,
AUTHOR = "Luo, J.Y. and Fu, J.H. and Kong, X.H. and Gao, C. and Ren, H.B. and Shen, H. and Xia, H.X. and Liu, S.",
TITLE = "3D-SPS: Single-Stage 3D Visual Grounding via Referred Point
Progressive Selection",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16433-16442",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237638"}
@inproceedings{bb242725,
AUTHOR = "Cai, D. and Zhao, L.C. and Zhang, J. and Sheng, L. and Xu, D.",
TITLE = "3DJCG: A Unified Framework for Joint Dense Captioning and Visual
Grounding on 3D Point Clouds",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "16443-16452",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237639"}
@inproceedings{bb242726,
AUTHOR = "Luo, H.C. and Zhai, W. and Zhang, J. and Cao, Y. and Tao, D.C.",
TITLE = "Learning Affordance Grounding from Exocentric Images",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "2242-2251",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237640"}
@inproceedings{bb242727,
AUTHOR = "Jiang, X. and Xu, X. and Zhang, J. and Shen, F.M. and Cao, Z. and Shen, H.T.",
TITLE = "Semi-supervised Video Paragraph Grounding with Contrastive Encoder",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "2456-2465",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237641"}
@inproceedings{bb242728,
AUTHOR = "Yu, W. and Chen, W.X. and Yin, S. and Easterbrook, S. and Garg, A.",
TITLE = "Modular Action Concept Grounding in Semantic Video Prediction",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "3595-3604",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237642"}
@inproceedings{bb242729,
AUTHOR = "Yang, L. and Xu, Y. and Yuan, C.F. and Liu, W. and Li, B. and Hu, W.M.",
TITLE = "Improving Visual Grounding with Visual-Linguistic Verification and
Iterative Reasoning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "9489-9498",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237643"}
@inproceedings{bb242730,
AUTHOR = "Li, L.H. and Zhang, P.C. and Zhang, H.T. and Yang, J.W. and Li, C.Y. and Zhong, Y. and Wang, L.J. and Yuan, L. and Zhang, L. and Hwang, J.N. and Chang, K.W. and Gao, J.F.",
TITLE = "Grounded Language-Image Pre-training",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "10955-10965",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237644"}
@inproceedings{bb242731,
AUTHOR = "Yang, Z.Y. and Zhang, S.Y. and Wang, L.W. and Luo, J.B.",
TITLE = "SAT: 2D Semantics Assisted Training for 3D Visual Grounding",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1836-1846",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237645"}
@inproceedings{bb242732,
AUTHOR = "Chen, J.W. and Golisano, Y.K.",
TITLE = "Explainable Video Entailment with Grounded Visual Evidence",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2001-2010",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237646"}
@inproceedings{bb242733,
AUTHOR = "Zhao, L.C. and Cai, D. and Sheng, L. and Xu, D.",
TITLE = "3DVG-Transformer: Relation Modeling for Visual Grounding on Point
Clouds",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "2908-2917",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237647"}
@inproceedings{bb242734,
AUTHOR = "Feng, M.T. and Li, Z. and Li, Q. and Zhang, L. and Zhang, X.D. and Zhu, G.M. and Zhang, H. and Wang, Y.N. and Mian, A.",
TITLE = "Free-form Description Guided 3D Visual Graph Network for Object
Grounding in Point Cloud",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "3702-3711",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237648"}
@inproceedings{bb242735,
AUTHOR = "Patel, S. and Wani, S. and Jain, U. and Schwing, A. and Lazebnik, S. and Savva, M. and Chang, A.X.",
TITLE = "Interpretation of Emergent Communication in Heterogeneous
Collaborative Embodied Agents",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "15993-15943",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237649"}
@inproceedings{bb242736,
AUTHOR = "Shi, J. and Zhong, Y. and Xu, N. and Li, Y. and Xu, C.L.",
TITLE = "A Simple Baseline for Weakly-Supervised Scene Graph Generation",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "16373-16382",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237650"}
@inproceedings{bb242737,
AUTHOR = "Cui, C.Y.Q. and Khandelwal, A. and Artzi, Y. and Snavely, N. and Averbuch Elor, H.",
TITLE = "Who's Waldo? Linking People Across Text and Images",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1354-1364",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237651"}
@inproceedings{bb242738,
AUTHOR = "Gonzalez, C. and Ayobi, N. and Hernandez, I. and Hernandez, J. and Pont Tuset, J. and Arbelaez, P.",
TITLE = "Panoptic Narrative Grounding",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1344-1353",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237652"}
@inproceedings{bb242739,
AUTHOR = "Hong, Y. and Li, Q. and Zhu, S.C. and Huang, S.Y.",
TITLE = "VLGrammar: Grounded Grammar Induction of Vision and Language",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1645-1654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237653"}
@inproceedings{bb242740,
AUTHOR = "Yuan, Z.H. and Yan, X. and Liao, Y.H. and Zhang, R.M. and Wang, S. and Li, Z. and Cui, S.G.",
TITLE = "InstanceRefer: Cooperative Holistic Understanding for Visual
Grounding on Point Clouds through Instance Multi-level Contextual
Referring",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1771-1780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237654"}
@inproceedings{bb242741,
AUTHOR = "Tian, Y.P. and Hu, D. and Xu, C.L.",
TITLE = "Cyclic Co-Learning of Sounding Object Visual Grounding and Sound
Separation",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "2744-2753",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237655"}
@inproceedings{bb242742,
AUTHOR = "Liu, H.L. and Lin, A. and Han, X.G. and Yang, L. and Yu, Y.Z. and Cui, S.G.",
TITLE = "Refer-it-in-RGBD: A Bottom-up Approach for 3D Visual Grounding in
RGBD Images",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "6028-6037",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237656"}
@inproceedings{bb242743,
AUTHOR = "Lin, X.R. and Li, G.B. and Yu, Y.Z.",
TITLE = "Scene-Intuitive Agent for Remote Embodied Visual Grounding",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "7032-7041",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237657"}
@inproceedings{bb242744,
AUTHOR = "Liu, D.Z. and Qu, X.Y. and Dong, J.F. and Zhou, P. and Cheng, Y. and Wei, W. and Xu, Z. and Xie, Y.",
TITLE = "Context-aware Biaffine Localizing Network for Temporal Sentence
Grounding",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "11230-11239",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237658"}
@inproceedings{bb242745,
AUTHOR = "Meng, Z.H. and Yu, L.C. and Zhang, N. and Berg, T. and Damavandi, B. and Singh, V. and Bearman, A.",
TITLE = "Connecting What to Say With Where to Look by Modeling Human Attention
Traces",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "12674-12683",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237659"}
@inproceedings{bb242746,
AUTHOR = "Wang, L.W. and Huang, J. and Li, Y. and Xu, K. and Yang, Z.Y. and Yu, D.",
TITLE = "Improving Weakly Supervised Visual Grounding by Contrastive Knowledge
Distillation",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "14085-14095",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237660"}
@inproceedings{bb242747,
AUTHOR = "Huang, B.B. and Lian, D.Z. and Luo, W.X. and Gao, S.H.",
TITLE = "Look Before You Leap:
Learning Landmark Features for One-Stage Visual Grounding",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "16883-16892",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237661"}
@inproceedings{bb242748,
AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Chen, Y.J. and Hu, C.P.",
TITLE = "Embracing Uncertainty: Decoupling and De-bias for Robust Temporal
Grounding",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "8441-8450",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237662"}
@inproceedings{bb242749,
AUTHOR = "Zhang, S.Y. and Jiang, T. and Wang, T. and Kuang, K. and Zhao, Z. and Zhu, J. and Yu, J. and Yang, H.X. and Wu, F.",
TITLE = "DeVLBert: Out-of-distribution Visio-Linguistic Pretraining with
Causality",
BOOKTITLE = CiV21,
YEAR = "2021",
PAGES = "1744-1747",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237663"}
@inproceedings{bb242750,
AUTHOR = "Nguyen, A.T. and Richards, L.E. and Kebe, G.Y. and Raff, E. and Darvish, K. and Ferraro, F. and Matuszek, C.",
TITLE = "Practical Cross-modal Manifold Alignment for Robotic Grounded
Language Learning",
BOOKTITLE = MULA21,
YEAR = "2021",
PAGES = "1613-1622",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237664"}
@inproceedings{bb242751,
AUTHOR = "Shrestha, A. and Pugdeethosapol, K. and Fang, H.W. and Qiu, Q.R.",
TITLE = "MAGNet: Multi-Region Attention-Assisted Grounding of Natural Language
Queries at Phrase Level",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "8275-8282",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237665"}
@inproceedings{bb242752,
AUTHOR = "Koh, J.Y. and Baldridge, J. and Lee, H.L. and Yang, Y.F.",
TITLE = "Text-to-Image Generation Grounded by Fine-Grained User Attention",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "237-246",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237666"}
@inproceedings{bb242753,
AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
TITLE = "Video Object Grounding Using Semantic Roles in Language Description",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10414-10424",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237667"}
@inproceedings{bb242754,
AUTHOR = "Ma, C.Y. and Kalantidis, Y. and AlRegib, G. and Vajda, P. and Rohrbach, M. and Kira, Z.",
TITLE = "Learning to Generate Grounded Visual Captions Without Localization
Supervision",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XVIII:353-370",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237668"}
@inproceedings{bb242755,
AUTHOR = "Gupta, T. and Vahdat, A. and Chechik, G. and Yang, X.D. and Kautz, J. and Hoiem, D.",
TITLE = "Contrastive Learning for Weakly Supervised Phrase Grounding",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "III:752-768",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237669"}
@inproceedings{bb242756,
AUTHOR = "Yang, S. and Li, G.B. and Yu, Y.Z.",
TITLE = "Propagating Over Phrase Relations for One-stage Visual Grounding",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XIX:589-605",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237670"}
@inproceedings{bb242757,
AUTHOR = "Xiao, J.B. and Shang, X. and Yang, X. and Tang, S. and Chua, T.S.",
TITLE = "Visual Relation Grounding in Videos",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "VI:447-464",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237671"}
@inproceedings{bb242758,
AUTHOR = "Mun, J. and Cho, M. and Han, B.",
TITLE = "Local-Global Video-Text Interactions for Temporal Grounding",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10807-10816",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237672"}
@inproceedings{bb242759,
AUTHOR = "Wu, C. and Lin, Z. and Cohen, S. and Bui, T. and Maji, S.",
TITLE = "PhraseCut: Language-Based Image Segmentation in the Wild",
BOOKTITLE = CVPR20,
YEAR = "2020",
PAGES = "10213-10222",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237673"}
@inproceedings{bb242760,
AUTHOR = "Chen, L. and Zhai, M.Y. and He, J.W. and Mori, G.",
TITLE = "Object Grounding via Iterative Context Reasoning",
BOOKTITLE = MDALC19,
YEAR = "2019",
PAGES = "1407-1415",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237674"}
@inproceedings{bb242761,
AUTHOR = "Datta, S. and Sikka, K. and Roy, A. and Ahuja, K. and Parikh, D. and Divakaran, A.",
TITLE = "Align2Ground: Weakly Supervised Phrase Grounding Guided by
Image-Caption Alignment",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "2601-2610",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237675"}
@inproceedings{bb242762,
AUTHOR = "Fang, Z.Y. and Kong, S. and Fowlkes, C.C. and Yang, Y.Z.",
TITLE = "Modularized Textual Grounding for Counterfactual Resilience",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "6371-6381",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237676"}
@inproceedings{bb242763,
AUTHOR = "Zhuang, B. and Wu, Q. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
TITLE = "Parallel Attention: A Unified Framework for Visual Object Discovery
Through Dialogs and Queries",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "4252-4261",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237677"}
@inproceedings{bb242764,
AUTHOR = "Yang, Z.Y. and Chen, T.L. and Wang, L.W. and Luo, J.B.",
TITLE = "Improving One-Stage Visual Grounding by Recursive Sub-query
Construction",
BOOKTITLE = ECCV20,
YEAR = "2020",
PAGES = "XIV:387-404",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237678"}
@inproceedings{bb242765,
AUTHOR = "Liu, D.Q. and Zhang, H.W. and Zha, Z.J. and Wu, F.",
TITLE = "Learning to Assemble Neural Module Tree Networks for Visual Grounding",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4672-4681",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237679"}
@inproceedings{bb242766,
AUTHOR = "Sadhu, A. and Chen, K. and Nevatia, R.",
TITLE = "Zero-Shot Grounding of Objects From Natural Language Queries",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4693-4702",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237680"}
@inproceedings{bb242767,
AUTHOR = "Yang, Z.Y. and Gong, B.Q. and Wang, L.W. and Huang, W.B. and Yu, D. and Luo, J.B.",
TITLE = "A Fast and Accurate One-Stage Approach to Visual Grounding",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "4682-4692",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237681"}
@inproceedings{bb242768,
AUTHOR = "Rohrbach, A. and Rohrbach, M. and Tang, S. and Oh, S.J. and Schiele, B.",
TITLE = "Generating Descriptions with Grounded and Co-referenced People",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "4196-4206",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT237682"}
@article{bb242769,
AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Huang, Z.Y. and Li, X.M. and Tang, M.Q. and Liu, T.L. and Gao, X.B.",
TITLE = "Exploring Language Hierarchy for Video Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "4693-4706",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237683"}
@article{bb242770,
AUTHOR = "Xu, Z. and Chen, D. and Wei, K. and Deng, C. and Xue, H.",
TITLE = "HiSA: Hierarchically Semantic Associating for Video Temporal
Grounding",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "5178-5188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237684"}
@article{bb242771,
AUTHOR = "Gao, J.L. and Sun, X. and Ghanem, B. and Zhou, X. and Ge, S.M.",
TITLE = "Efficient Video Grounding With Which-Where Reading Comprehension",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6900-6913",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237685"}
@article{bb242772,
AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Hu, C.P. and Zhang, W.J.",
TITLE = "Thinking Inside Uncertainty: Interest Moment Perception for Diverse
Temporal Grounding",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "7190-7203",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237686"}
@article{bb242773,
AUTHOR = "Tang, Z.H. and Liao, Y. and Liu, S. and Li, G.B. and Jin, X.J. and Jiang, H.X. and Yu, Q. and Xu, D.",
TITLE = "Human-Centric Spatio-Temporal Video Grounding With Visual
Transformers",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "8238-8249",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237687"}
@article{bb242774,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Causal Intervention",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "3",
MONTH = "March",
PAGES = "3933-3948",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237688"}
@article{bb242775,
AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
TITLE = "Weakly-Supervised Video Object Grounding via Learning Uni-Modal
Associations",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6329-6340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237689"}
@article{bb242776,
AUTHOR = "Xu, Z. and Wei, K. and Yang, X. and Deng, C.",
TITLE = "Point-Supervised Video Temporal Grounding",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "6121-6131",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237690"}
@article{bb242777,
AUTHOR = "Lu, Y. and Quan, R.J. and Zhu, L.C. and Yang, Y.",
TITLE = "Zero-Shot Video Grounding With Pseudo Query Lookup and Verification",
JOURNAL = IP,
VOLUME = "33",
YEAR = "2024",
PAGES = "1643-1654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237691"}
@article{bb242778,
AUTHOR = "Shi, F.Y. and Huang, W.L. and Wang, L.M.",
TITLE = "End-to-end dense video grounding via parallel regression",
JOURNAL = CVIU,
VOLUME = "242",
YEAR = "2024",
PAGES = "103980",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237692"}
@article{bb242779,
AUTHOR = "Xiong, Z. and Liu, D.Z. and Fang, X. and Qu, X.Y. and Dong, J.F. and Zhu, J.H. and Tang, K. and Zhou, P.",
TITLE = "Rethinking Video Sentence Grounding from a Tracking Perspective With
Memory Network and Masked Attention",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "11204-11218",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237693"}
@inproceedings{bb242780,
AUTHOR = "Fang, X. and Xiong, Z. and Fang, W.L. and Qu, X.Y. and Chen, C. and Dongd, J.F. and Tang, K. and Zhou, P. and Cheng, Y. and Liu, D.Z.",
TITLE = "Rethinking Weakly-supervised Video Temporal Grounding From a Game
Perspective",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLV: 290-311",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237694"}
@article{bb242781,
AUTHOR = "Wu, Q.Q. and Guo, L.J. and Zhang, R. and Qian, J.B. and Gao, S.",
TITLE = "QSMT-net: A query-sensitive proposal and multi-temporal-span matching
network for video grounding",
JOURNAL = IVC,
VOLUME = "149",
YEAR = "2024",
PAGES = "105188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237695"}
@article{bb242782,
AUTHOR = "Dong, J.X. and Yin, Z.Z.",
TITLE = "Graph-based Dense Event Grounding with relative positional encoding",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104257",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237696"}
@article{bb242783,
AUTHOR = "Tang, K.F. and He, L.H. and Wang, N.N. and Gao, X.B.",
TITLE = "Dual Semantic Reconstruction Network for Weakly Supervised Temporal
Sentence Grounding",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "95-107",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237697"}
@article{bb242784,
AUTHOR = "Liu, K. and Qu, M.X. and Liu, Y. and Wei, Y.C. and Zhe, W.M. and Zhao, Y. and Liu, W.",
TITLE = "Single-Frame Supervision for Spatio-Temporal Video Grounding",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "5177-5191",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237698"}
@article{bb242785,
AUTHOR = "Hu, J.J. and Guo, D. and Li, K. and Si, Z. and Yang, X. and Chang, X.J. and Wang, M.",
TITLE = "Unified Static and Dynamic Network: Efficient Temporal Filtering for
Video Grounding",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "8",
MONTH = "August",
PAGES = "6445-6462",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237699"}
@article{bb242786,
AUTHOR = "Ran, R. and Wei, J. and He, S.Y. and Zhou, Y.Y. and Wang, P. and Yang, Y. and Shen, H.T.",
TITLE = "Fine-Grained Alignment and Interaction for Video Grounding With
Cross-Modal Semantic Hierarchical Graph",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "11",
MONTH = "November",
PAGES = "11641-11654",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237700"}
@article{bb242787,
AUTHOR = "Wang, M.Z. and Li, H.F. and Zhang, Y.F. and Li, J.X. and Tao, D.P. and Yu, Z.T.",
TITLE = "Disentangling Inter- and Intra-Video Relations for Multi-Event
Video-Text Retrieval and Grounding",
JOURNAL = IP,
VOLUME = "34",
YEAR = "2025",
PAGES = "7558-7571",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237701"}
@article{bb242788,
AUTHOR = "Yang, J. and Wei, P.",
TITLE = "Learning unified patterns of multimodalities for video temporal
grounding",
JOURNAL = PR,
VOLUME = "172",
YEAR = "2026",
PAGES = "112484",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237702"}
@article{bb242789,
AUTHOR = "Liu, Y. and Zheng, M.H. and Chen, Q.C. and Gong, S.G. and Peng, Y.X.",
TITLE = "Large-Scale Pre-Trained Models Empowering Phrase Generalization in
Temporal Sentence Localization",
JOURNAL = IJCV,
VOLUME = "134",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "53",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237703"}
@inproceedings{bb242790,
AUTHOR = "Zheng, M.H. and Cai, X.H. and Chen, Q.C. and Peng, Y.X. and Liu, Y.",
TITLE = "Training-Free Video Temporal Grounding Using Large-Scale Pre-Trained
Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXII: 20-37",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237704"}
@article{bb242791,
AUTHOR = "Li, A. and Liu, H.J. and Zhu, Y.Q. and Ge, Y.X.",
TITLE = "Efficient Pre-Trained Semantics Refinement for Video Temporal
Grounding",
JOURNAL = CirSysVideo,
VOLUME = "36",
YEAR = "2026",
NUMBER = "2",
MONTH = "February",
PAGES = "1406-1418",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237705"}
@article{bb242792,
AUTHOR = "Moon, W.J. and Hyun, S. and Lee, S. and Heo, J.P.",
TITLE = "Correlation-guided calibration of query dependency for video temporal
grounding",
JOURNAL = PR,
VOLUME = "174",
YEAR = "2026",
PAGES = "112984",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237706"}
@inproceedings{bb242793,
AUTHOR = "Cao, Z. and Zhang, B.Q. and Du, H.M. and Yu, X. and Li, X. and Wang, S.",
TITLE = "FlashVTG: Feature Layering and Adaptive Score Handling Network for
Video Temporal Grounding",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "9226-9236",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237707"}
@inproceedings{bb242794,
AUTHOR = "Weerakoon, D. and Subbaraju, V. and Lim, J.H. and Misra, A.",
TITLE = "NeuroViG:
Integrating Event Cameras for Resource-Efficient Video Grounding",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5781-5790",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237708"}
@inproceedings{bb242795,
AUTHOR = "Jin, Y. and Mu, Y.D.",
TITLE = "Weakly-supervised Spatio-temporal Video Grounding with Variational
Cross-modal Alignment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 412-429",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237709"}
@inproceedings{bb242796,
AUTHOR = "Fujiwara, K. and Tanaka, M. and Yu, Q.",
TITLE = "Chronologically Accurate Retrieval for Temporal Grounding of
Motion-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVIII: 323-339",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237710"}
@inproceedings{bb242797,
AUTHOR = "Bao, P.J. and Shao, Z. and Yang, W.H. and Ng, B.P. and Kot, A.C.",
TITLE = "E3m: Zero-shot Spatio-temporal Video Grounding with
Expectation-maximization Multimodal Modulation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXIII: 227-243",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237711"}
@inproceedings{bb242798,
AUTHOR = "Hannan, T. and Islam, M.M. and Seidl, T. and Bertasius, G.",
TITLE = "RGNET: A Unified Clip Retrieval and Grounding Network for Long Videos",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXI: 352-369",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237712"}
@inproceedings{bb242799,
AUTHOR = "Gu, X. and Fan, H. and Huang, Y. and Luo, T.J. and Zhang, L.B.",
TITLE = "Context-Guided Spatio-Temporal Video Grounding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18330-18339",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vidgr3.html#TT237713"}
Last update:Apr 23, 2026 at 15:05:02