@article{bb212100,
        AUTHOR = "Yang, W.F. and Zhang, T.Z. and Zhang, Y.D. and Wu, F.",
        TITLE = "Local Correspondence Network for Weakly Supervised Temporal Sentence
Grounding",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "3252-3262",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207184"}

@inproceedings{bb212101,
        AUTHOR = "Luo, W. and Zhang, T.Z. and Yang, W.F. and Liu, J.G. and Mei, T. and Wu, F. and Zhang, Y.D.",
        TITLE = "Action Unit Memory Network for Weakly Supervised Temporal Action
Localization",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9964-9974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207185"}

@article{bb212102,
        AUTHOR = "Hong, R.C. and Liu, D. and Mo, X.Y. and He, X.N. and Zhang, H.W.",
        TITLE = "Learning to Compose and Reason with Language Tree Structures for
Visual Grounding",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "684-696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207186"}

@inproceedings{bb212103,
        AUTHOR = "Tang, K.H. and Zhang, H.W. and Wu, B.Y. and Luo, W.H. and Liu, W.",
        TITLE = "Learning to Compose Dynamic Tree Structures for Visual Contexts",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6612-6621",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207187"}

@article{bb212104,
        AUTHOR = "Bin, Y. and Ding, Y.J. and Peng, B. and Peng, L. and Yang, Y. and Chua, T.S.",
        TITLE = "Entity Slot Filling for Visual Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "52-62",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207188"}

@article{bb212105,
        AUTHOR = "Chu, C. and Oliveira, V. and Virgo, F.G. and Otani, M. and Garcia, N. and Nakashima, Y.",
        TITLE = "The semantic typology of visually grounded paraphrases",
        JOURNAL = CVIU,
        VOLUME = "215",
        YEAR = "2022",
        PAGES = "103333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207189"}

@article{bb212106,
        AUTHOR = "Deng, C.R. and Wu, Q. and Wu, Q.Y. and Hu, F.Y. and Lyu, F. and Tan, M.K.",
        TITLE = "Visual Grounding Via Accumulated Attention",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1670-1684",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207190"}

@inproceedings{bb212107,
        AUTHOR = "Tan, M.K. and Lyu, F. and Hu, F.Y. and Wu, Q.Y. and Wu, Q. and Deng, C.R.",
        TITLE = "Visual Grounding Via Accumulated Attention",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7746-7755",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207190"}

@article{bb212108,
        AUTHOR = "Plummer, B.A. and Shih, K.J. and Li, Y.C. and Xu, K. and Lazebnik, S. and Sclaroff, S. and Saenko, K.",
        TITLE = "Revisiting Image-Language Networks for Open-Ended Phrase Detection",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2155-2167",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207191"}

@inproceedings{bb212109,
        AUTHOR = "Burns, A. and Tan, R. and Saenko, K. and Sclaroff, S. and Plummer, B.A.",
        TITLE = "Language Features Matter: Effective Language Representations for
Vision-Language Tasks",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7473-7482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207192"}

@inproceedings{bb212110,
        AUTHOR = "Arbelle, A. and Doveh, S. and Alfassy, A. and Shtok, J. and Lev, G. and Schwartz, E. and Kuehne, H. and Levi, H.B. and Sattigeri, P. and Panda, R. and Chen, C.F. and Bronstein, A.M. and Saenko, K. and Ullman, S. and Giryes, R. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Detector-Free Weakly Supervised Grounding by Separation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1781-1792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207193"}

@inproceedings{bb212111,
        AUTHOR = "Whitehead, S. and Wu, H. and Ji, H. and Feris, R.S. and Saenko, K.",
        TITLE = "Separating Skills and Concepts for Novel Visual Question Answering",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5628-5637",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207194"}

@article{bb212112,
        AUTHOR = "Yu, X.T. and Zhang, H.M. and Hong, R.X. and Song, Y.Q. and Zhang, C.S.",
        TITLE = "VD-PCR: Improving visual dialog with pronoun coreference resolution",
        JOURNAL = PR,
        VOLUME = "125",
        YEAR = "2022",
        PAGES = "108540",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207195"}

@article{bb212113,
        AUTHOR = "Yuan, Y.T. and Ma, L. and Wang, J.W. and Liu, W. and Zhu, W.W.",
        TITLE = "Semantic Conditioned Dynamic Modulation for Temporal Sentence
Grounding in Videos",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2725-2741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207196"}

@article{bb212114,
        AUTHOR = "He, S. and Yang, X.F. and Lin, G.S.",
        TITLE = "Learning language to symbol and language to vision mapping for visual
grounding",
        JOURNAL = IVC,
        VOLUME = "122",
        YEAR = "2022",
        PAGES = "104451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207197"}

@article{bb212115,
        AUTHOR = "Jiang, W.H. and Zhu, M. and Fang, Y.M. and Shi, G.M. and Zhao, X.W. and Liu, Y.",
        TITLE = "Visual Cluster Grounding for Image Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "3920-3934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207198"}

@article{bb212116,
        AUTHOR = "Liao, Y. and Zhang, A. and Chen, Z.Y. and Hui, T.R. and Liu, S.",
        TITLE = "Progressive Language-Customized Visual Feature Learning for One-Stage
Visual Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4266-4277",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207199"}

@article{bb212117,
        AUTHOR = "Ding, X.P. and Wang, N.N. and Zhang, S.W. and Huang, Z.Y. and Li, X.M. and Tang, M.Q. and Liu, T.L. and Gao, X.B.",
        TITLE = "Exploring Language Hierarchy for Video Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4693-4706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207200"}

@article{bb212118,
        AUTHOR = "Wang, Y. and Deng, J.J. and Zhou, W.G. and Li, H.Q.",
        TITLE = "Weakly Supervised Temporal Adjacent Network for Language Grounding",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "3276-3286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207201"}

@article{bb212119,
        AUTHOR = "Xu, Z. and Chen, D. and Wei, K. and Deng, C. and Xue, H.",
        TITLE = "HiSA: Hierarchically Semantic Associating for Video Temporal
Grounding",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5178-5188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207202"}

@article{bb212120,
        AUTHOR = "Gao, J.L. and Sun, X. and Ghanem, B. and Zhou, X. and Ge, S.M.",
        TITLE = "Efficient Video Grounding With Which-Where Reading Comprehension",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6900-6913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207203"}

@article{bb212121,
        AUTHOR = "Zhou, H. and Zhang, C.Y. and Luo, Y. and Hu, C.P. and Zhang, W.J.",
        TITLE = "Thinking Inside Uncertainty: Interest Moment Perception for Diverse
Temporal Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "7190-7203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207204"}

@article{bb212122,
        AUTHOR = "Tang, Z.H. and Liao, Y. and Liu, S. and Li, G.B. and Jin, X.J. and Jiang, H.X. and Yu, Q. and Xu, D.",
        TITLE = "Human-Centric Spatio-Temporal Video Grounding With Visual
Transformers",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8238-8249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207205"}

@article{bb212123,
        AUTHOR = "Tang, H.Y. and Zhu, J. and Wang, L. and Zheng, Q.H. and Zhang, T.W.",
        TITLE = "Multi-Level Query Interaction for Temporal Language Grounding",
        JOURNAL = ITS,
        VOLUME = "23",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "25479-25488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207206"}

@article{bb212124,
        AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Weakly-Supervised Video Object Grounding via Causal Intervention",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3933-3948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207207"}

@article{bb212125,
        AUTHOR = "Wang, W. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Weakly-Supervised Video Object Grounding via Learning Uni-Modal
Associations",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6329-6340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207208"}

@article{bb212126,
        AUTHOR = "Nayyeri, M. and Xu, C.J. and Alam, M.M. and Lehmann, J. and Yazdi, H.S.",
        TITLE = "LogicENN: A Neural Based Knowledge Graphs Embedding Model With
Logical Rules",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7050-7062",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207209"}

@article{bb212127,
        AUTHOR = "Zhao, L.C. and Cai, D.G. and Zhang, J. and Sheng, L. and Xu, D. and Zheng, R. and Zhao, Y.J. and Wang, L.P. and Fan, X.",
        TITLE = "Toward Explainable 3D Grounded Visual Question Answering: A New
Benchmark and Strong Baseline",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2935-2949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207210"}

@article{bb212128,
        AUTHOR = "Zhu, L.J. and Peng, L. and Zhou, W.N. and Yang, J.",
        TITLE = "Dual-decoder transformer network for answer grounding in visual
question answering",
        JOURNAL = PRL,
        VOLUME = "171",
        YEAR = "2023",
        PAGES = "53-60",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207211"}

@article{bb212129,
        AUTHOR = "Chen, T. and Wang, W. and Han, K. and Xu, H.J.",
        TITLE = "SaGCN: Semantic-Aware Graph Calibration Network for Temporal Sentence
Grounding",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3003-3016",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207212"}

@article{bb212130,
        AUTHOR = "Zhang, H. and Sun, A. and Jing, W. and Zhou, J.T.Y.",
        TITLE = "Temporal Sentence Grounding in Videos: A Survey and Future Directions",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "10443-10465",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207213"}

@article{bb212131,
        AUTHOR = "Deng, J.J. and Yang, Z.Y. and Liu, D. and Chen, T.L. and Zhou, W.G. and Zhang, Y. and Li, H.Q. and Ouyang, W.L.",
        TITLE = "TransVG++: End-to-End Visual Grounding With Language Conditioned
Vision Transformer",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "13636-13652",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207214"}

@inproceedings{bb212132,
        AUTHOR = "Deng, J.J. and Yang, Z.Y. and Chen, T.L. and Zhou, W.G. and Li, H.Q.",
        TITLE = "TransVG: End-to-End Visual Grounding with Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1749-1759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207215"}

@article{bb212133,
        AUTHOR = "Li, J.C. and Tang, S.L. and Zhu, L.C. and Zhang, W.Q. and Yang, Y. and Chua, T.S. and Wu, F. and Zhuang, Y.T.",
        TITLE = "Variational Cross-Graph Reasoning and Adaptive Structured Semantics
Learning for Compositional Temporal Grounding",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12601-12617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207216"}

@inproceedings{bb212134,
        AUTHOR = "Li, J.C. and Xie, J.L. and Qian, L. and Zhu, L.C. and Tang, S.L. and Wu, F. and Yang, Y. and Zhuang, Y.T. and Wang, X.E.",
        TITLE = "Compositional Temporal Grounding with Structured Variational
Cross-Graph Correspondence Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "3022-3031",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207217"}

@article{bb212135,
        AUTHOR = "Gonzalez, C. and Ayobi, N. and Hernandez, I. and Pont Tuset, J. and Arbelaez, P.",
        TITLE = "PiGLET:
Pixel-Level Grounding of Language Expressions With Transformers",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12206-12221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207218"}

@article{bb212136,
        AUTHOR = "Zhang, R.S. and Wang, C. and Liu, C.L.",
        TITLE = "Cycle-Consistent Weakly Supervised Visual Grounding With Individual
and Contextual Representations",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5167-5180",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207219"}

@article{bb212137,
        AUTHOR = "Wang, Y. and Su, Y.T. and Li, W.H. and Xiao, J. and Li, X.Y. and Liu, A.A.",
        TITLE = "Dual-Path Rare Content Enhancement Network for Image and Text
Matching",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6144-6158",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207220"}

@article{bb212138,
        AUTHOR = "Xu, Z. and Wei, K. and Yang, X. and Deng, C.",
        TITLE = "Point-Supervised Video Temporal Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6121-6131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207221"}

@article{bb212139,
        AUTHOR = "Luo, F. and Chen, S.X. and Chen, J.J. and Wu, Z. and Jiang, Y.G.",
        TITLE = "Self-Supervised Learning for Semi-Supervised Temporal Language
Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7747-7757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207222"}

@article{bb212140,
        AUTHOR = "Liu, D.Z. and Fang, X. and Hu, W. and Zhou, P.",
        TITLE = "Exploring Optical-Flow-Guided Motion and Detection-Based Appearance
for Temporal Sentence Grounding",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8539-8553",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207223"}

@article{bb212141,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Effective End-to-End Vision Language Pretraining With Semantic Visual
Loss",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8408-8417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207224"}

@article{bb212142,
        AUTHOR = "Ma, G.Q. and Bai, Y. and Zhang, W. and Yao, T. and Shihada, B. and Mei, T.",
        TITLE = "Boosting Generic Visual-Linguistic Representation With Dynamic
Contexts",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8445-8457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207225"}

@article{bb212143,
        AUTHOR = "Su, C. and Li, Z. and Lei, T.Y. and Peng, D.Z. and Wang, X.",
        TITLE = "MetaVG: A Meta-Learning Framework for Visual Grounding",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "236-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207226"}

@article{bb212144,
        AUTHOR = "Zeng, Y.W. and Han, N. and Pan, K.Y. and Jin, Q.",
        TITLE = "Temporally Language Grounding With Multi-Modal Multi-Prompt Tuning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3366-3377",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207227"}

@article{bb212145,
        AUTHOR = "Fang, X. and Liu, D. and Zhou, P. and Xu, Z. and Li, R.X.",
        TITLE = "Hierarchical Local-Global Transformer for Temporal Sentence Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3263-3277",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207228"}

@article{bb212146,
        AUTHOR = "Wang, Z. and Yang, C. and Jiang, B. and Yuan, J.S.",
        TITLE = "A Dual Reinforcement Learning Framework for Weakly Supervised Phrase
Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "394-405",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207229"}

@inproceedings{bb212147,
        AUTHOR = "Gong, R. and Huang, J. and Zhao, Y.Z. and Geng, H.R. and Gao, X.F. and Wu, Q.Y. and Ai, W. and Zhou, Z.H. and Terzopoulos, D. and Zhu, S.C. and Jia, B.X. and Huang, S.Y.",
        TITLE = "ARNOLD: A Benchmark for Language-Grounded Task Learning With
Continuous States in Realistic 3D Scenes",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20426-20438",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207230"}

@inproceedings{bb212148,
        AUTHOR = "Wu, Y. and Wei, Y. and Wang, H.Z. and Liu, Y.F. and Yang, S. and He, X.M.",
        TITLE = "Grounded Image Text Matching with Mismatched Relation Reasoning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2964-2975",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207231"}

@inproceedings{bb212149,
        AUTHOR = "Song, C.H. and Sadler, B.M. and Wu, J. and Chao, W.L. and Washington, C. and Su, Y.",
        TITLE = "LLM-Planner: Few-Shot Grounded Planning for Embodied Agents with
Large Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2986-2997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207232"}

@inproceedings{bb212150,
        AUTHOR = "Lee, C. and Kumar, M.G. and Tan, C.",
        TITLE = "DetermiNet: A Large-Scale Diagnostic Dataset for Complex
Visually-Grounded Referencing using Determiners",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "19962-19971",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207233"}

@inproceedings{bb212151,
        AUTHOR = "Lin, K.Q. and Zhang, P. and Chen, J. and Pramanick, S. and Gao, D.F. and Wang, A.J.P. and Yan, R. and Shou, M.Z.",
        TITLE = "UniVTG: Towards Unified Video-Language Temporal Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2782-2792",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207234"}

@inproceedings{bb212152,
        AUTHOR = "Liu, Y. and Zhang, J.H. and Chen, Q.C. and Peng, Y.X.",
        TITLE = "Confidence-aware Pseudo-label Learning for Weakly Supervised Visual
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2816-2826",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207235"}

@inproceedings{bb212153,
        AUTHOR = "Khoshsirat, S. and Kambhamettu, C.",
        TITLE = "Sentence Attention Blocks for Answer Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "6057-6067",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207236"}

@inproceedings{bb212154,
        AUTHOR = "Li, H.X. and Cao, M. and Cheng, X. and Li, Y. and Zhu, Z.H. and Zou, Y.X.",
        TITLE = "G2L: Semantically Aligned and Uniform Video Grounding via Geodesic
and Game Theory",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11998-12008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207237"}

@inproceedings{bb212155,
        AUTHOR = "Li, H. and Shu, X.J. and He, S. and Qiao, R.Z. and Wen, W. and Guo, T. and Gan, B. and Sun, X.",
        TITLE = "D3G: Exploring Gaussian Prior for Temporal Sentence Grounding with
Glance Annotation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13688-13700",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207238"}

@inproceedings{bb212156,
        AUTHOR = "Pan, Y.L. and He, X.T. and Gong, B. and Lv, Y.L. and Shen, Y.J. and Peng, Y.X. and Zhao, D.L.",
        TITLE = "Scanning Only Once: An End-to-end Framework for Fast Temporal
Grounding in Long Videos",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13721-13731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207239"}

@inproceedings{bb212157,
        AUTHOR = "Jang, J. and Park, J. and Kim, J. and Kwon, H. and Sohn, K.H.",
        TITLE = "Knowing Where to Focus: Event-aware Transformer for Video Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13800-13810",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207240"}

@inproceedings{bb212158,
        AUTHOR = "Zhang, Y.M. and Gong, Z. and Chang, A.X.",
        TITLE = "Multi3DRefer: Grounding Text Description to Multiple 3D Objects",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15179-15179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207241"}

@inproceedings{bb212159,
        AUTHOR = "Chen, C. and Anjum, S. and Gurari, D.",
        TITLE = "VQA Therapy: Exploring Answer Differences by Visually Grounding
Answers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15269-15279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207242"}

@inproceedings{bb212160,
        AUTHOR = "Li, H. and Wei, P. and Ma, Z. and Zheng, N.N.",
        TITLE = "Inverse Compositional Learning for Weakly-supervised Relation
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15431-15441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207243"}

@inproceedings{bb212161,
        AUTHOR = "Chen, D.Z.Y. and Hu, R. and Chen, X.L. and Nießner, M. and Chang, A.X.",
        TITLE = "UniT3D: A Unified Transformer for 3D Dense Captioning and Visual
Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "18063-18073",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207244"}

@inproceedings{bb212162,
        AUTHOR = "de la Jara, I.M. and Rodriguez Opazo, C. and Marrese Taylor, E. and Bravo Marquez, F.",
        TITLE = "An empirical study of the effect of video encoders on Temporal Video
Grounding",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2842-2847",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207245"}

@inproceedings{bb212163,
        AUTHOR = "Wang, Z. and Huang, H.F. and Zhao, Y. and Li, L.J. and Cheng, X. and Zhu, Y.C. and Yin, A. and Zhao, Z.",
        TITLE = "Distilling Coarse-to-Fine Semantic Matching Knowledge for Weakly
Supervised 3D Visual Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2662-2671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207246"}

@inproceedings{bb212164,
        AUTHOR = "Guo, Z. and Tang, Y. and Zhang, R. and Wang, D. and Wang, Z.G. and Zhao, B. and Li, X.L.",
        TITLE = "ViewRefer: Grasp the Multi-view Knowledge for 3D Visual Grounding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15326-15337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207247"}

@inproceedings{bb212165,
        AUTHOR = "Li, M. and Wang, C.L. and Feng, W. and Lyu, S.C. and Cheng, G.L. and Li, X.T. and Liu, B. and Zhao, Q.",
        TITLE = "Iterative Robust Visual Grounding with Masked Reference based
Centerpoint Supervision",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4653-4658",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207248"}

@inproceedings{bb212166,
        AUTHOR = "Hsu, J. and Mao, J.Y. and Wu, J.J.",
        TITLE = "NS3D: Neuro-Symbolic Grounding of 3D Objects and Relations",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2614-2623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207249"}

@inproceedings{bb212167,
        AUTHOR = "Uzkent, B. and Garg, A. and Zhu, W.T. and Doshi, K. and Yi, J. and Wang, X.L. and Omar, M.",
        TITLE = "Dynamic Inference with Grounding Based Vision and Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2624-2633",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207250"}

@inproceedings{bb212168,
        AUTHOR = "Cao, M. and Wei, F.Y. and Xu, C. and Geng, X. and Chen, L. and Zhang, C. and Zou, Y.X. and Shen, T. and Jiang, D.X.",
        TITLE = "Iterative Proposal Refinement for Weakly-Supervised Video Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6524-6534",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207251"}

@inproceedings{bb212169,
        AUTHOR = "Wang, L. and Mittal, G. and Sajeev, S. and Yu, Y. and Hall, M. and Boddeti, V.N. and Chen, M.",
        TITLE = "ProTéGé: Untrimmed Pretraining for Video Temporal Grounding by Video
Temporal Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6575-6585",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207252"}

@inproceedings{bb212170,
        AUTHOR = "Hwang, M.Y. and Jeong, J.Y. and Kim, M.S. and Oh, Y. and Oh, S.H.",
        TITLE = "Meta-Explore: Exploratory Hierarchical Vision-and-Language Navigation
Using Scene Object Spectrum Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6683-6693",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207253"}

@inproceedings{bb212171,
        AUTHOR = "Chen, J. and Gao, D.F. and Lin, K.Q. and Shou, M.Z.",
        TITLE = "Affordance Grounding from Demonstration Video to Target Image",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6799-6808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207254"}

@inproceedings{bb212172,
        AUTHOR = "Shao, R. and Wu, T.X. and Liu, Z.W.",
        TITLE = "Detecting and Grounding Multi-Modal Media Manipulation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6904-6913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207255"}

@inproceedings{bb212173,
        AUTHOR = "Shaharabany, T. and Wolf, L.",
        TITLE = "Similarity Maps for Self-Training Weakly-Supervised Phrase Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6925-6934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207256"}

@inproceedings{bb212174,
        AUTHOR = "Su, W. and Miao, P. and Dou, H.Z. and Wang, G. and Qiao, L. and Li, Z. and Li, X.",
        TITLE = "Language Adaptive Weight Generation for Multi-Task Visual Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10857-10866",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207257"}

@inproceedings{bb212175,
        AUTHOR = "Li, G. and Jampani, V. and Sun, D.Q. and Sevilla Lara, L.",
        TITLE = "LOCATE: Localize and Transfer Object Parts for Weakly Supervised
Affordance Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10922-10931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207258"}

@inproceedings{bb212176,
        AUTHOR = "Kim, S. and Oh, J. and Lee, S. and Yu, S. and Do, J. and Taghavi, T.",
        TITLE = "Grounding Counterfactual Explanation of Image Classifiers to Textual
Concept Space",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10942-10950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207259"}

@inproceedings{bb212177,
        AUTHOR = "Zhang, Y.M. and Chen, X. and Jia, J.H. and Liu, S. and Ding, K.",
        TITLE = "Text-Visual Prompting for Efficient 2D Temporal Video Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14794-14804",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207260"}

@inproceedings{bb212178,
        AUTHOR = "Chen, Z.H. and Zhang, R. and Song, Y.B. and Wan, X. and Li, G.B.",
        TITLE = "Advancing Visual Grounding with Scene Knowledge: Benchmark and Method",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15039-15049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207261"}

@inproceedings{bb212179,
        AUTHOR = "Huang, Y.F. and Yang, L. and Sato, Y.",
        TITLE = "Weakly Supervised Temporal Sentence Grounding with Uncertainty-Guided
Self-training",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18908-18918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207262"}

@inproceedings{bb212180,
        AUTHOR = "Tan, C. and Lin, Z. and Hu, J.F. and Zheng, W.S. and Lai, J.H.",
        TITLE = "Hierarchical Semantic Correspondence Networks for Video Paragraph
Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18973-18982",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207263"}

@inproceedings{bb212181,
        AUTHOR = "Yang, Z.Y. and Kafle, K. and Dernoncourt, F. and Ordonez, V.",
        TITLE = "Improving Visual Grounding by Encouraging Consistent Gradient-Based
Explanations",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19165-19174",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207264"}

@inproceedings{bb212182,
        AUTHOR = "Wu, Y.M. and Cheng, X.H. and Zhang, R. and Cheng, Z. and Zhang, J.",
        TITLE = "EDA: Explicit Text-Decoupling and Dense Alignment for 3D Visual
Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "19231-19242",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207265"}

@inproceedings{bb212183,
        AUTHOR = "Li, M.Z. and Wang, H. and Zhang, W.Q. and Miao, J.X. and Zhao, Z. and Zhang, S.Y. and Ji, W. and Wu, F.",
        TITLE = "WINNER: Weakly-supervised hIerarchical decompositioN and aligNment
for spatio-tEmporal video gRounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23090-23099",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207266"}

@inproceedings{bb212184,
        AUTHOR = "Lin, Z.H. and Tan, C.L. and Hu, J.F. and Jin, Z. and Ye, T. and Zheng, W.S.",
        TITLE = "Collaborative Static and Dynamic Vision-Language Streams for
Spatio-Temporal Video Grounding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23100-23109",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207267"}

@inproceedings{bb212185,
        AUTHOR = "Yang, L. and Kong, Q. and Yang, H.K. and Kehl, W. and Sato, Y. and Kobori, N.",
        TITLE = "DeCo: Decomposition and Reconstruction for Compositional Temporal
Grounding via Coarse-to-Fine Contrastive Ranking",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23130-23140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207268"}

@inproceedings{bb212186,
        AUTHOR = "Zhou, L. and Zhou, Z. and Mao, K. and He, Z.Y.",
        TITLE = "Joint Visual Grounding and Tracking with Natural Language
Specification",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23151-23160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207269"}

@inproceedings{bb212187,
        AUTHOR = "Devaraj, C. and Fermuller, C. and Aloimonos, Y.F.",
        TITLE = "Incorporating Visual Grounding In GCN For Zero-shot Learning Of Human
Object Interaction Actions",
        BOOKTITLE = L3D-IVU23,
        YEAR = "2023",
        PAGES = "5008-5017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207270"}

@inproceedings{bb212188,
        AUTHOR = "Fang, X. and Liu, D.Z. and Zhou, P. and Nan, G.S.",
        TITLE = "You Can Ground Earlier than See: An Effective and Efficient Pipeline
for Temporal Sentence Grounding in Compressed Videos",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2448-2460",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207271"}

@inproceedings{bb212189,
        AUTHOR = "Fu, T.J. and Li, L.J. and Gan, Z. and Lin, K. and Wang, W.Y. and Wang, L.J. and Liu, Z.C.",
        TITLE = "An Empirical Study of End-to-End Video-Language Transformers with
Masked Visual Modeling",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22898-22909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207272"}

@inproceedings{bb212190,
        AUTHOR = "Li, L.J. and Gan, Z. and Lin, K. and Lin, C.C. and Liu, Z.C. and Liu, C. and Wang, L.J.",
        TITLE = "LAVENDER: Unifying Video-Language Understanding as Masked Language
Modeling",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "23119-23129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207273"}

@inproceedings{bb212191,
        AUTHOR = "Dong, J.X. and Yin, Z.Z.",
        TITLE = "Boundary-aware Temporal Sentence Grounding with Adaptive Proposal
Refinement",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:641-657",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207274"}

@inproceedings{bb212192,
        AUTHOR = "Gao, Y.Z. and Lu, Z.W.",
        TITLE = "SST-VLM: Sparse Sampling-twice Inspired Video-language Model",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:537-553",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207275"}

@inproceedings{bb212193,
        AUTHOR = "Pacheco Ortega, A. and Mayol Cuervas, W.",
        TITLE = "One-shot Learning for Human Affordance Detection",
        BOOKTITLE = CVMeta22,
        YEAR = "2022",
        PAGES = "758-766",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207276"}

@inproceedings{bb212194,
        AUTHOR = "Ho, C.H. and Appalaraju, S. and Jasani, B. and Manmatha, R. and Vasconcelos, N.M.",
        TITLE = "YORO - Lightweight End to End Visual Grounding",
        BOOKTITLE = CMMP22,
        YEAR = "2022",
        PAGES = "3-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207277"}

@inproceedings{bb212195,
        AUTHOR = "Kim, D. and Park, J. and Lee, J.Y. and Park, S. and Sohn, K.H.",
        TITLE = "Language-free Training for Zero-shot Video Grounding",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2538-2547",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207278"}

@inproceedings{bb212196,
        AUTHOR = "Le, T.M. and Le, V. and Gupta, S.I. and Venkatesh, S. and Tran, T.",
        TITLE = "Guiding Visual Question Answering with Attention Priors",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4370-4379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207279"}

@inproceedings{bb212197,
        AUTHOR = "Chou, S.H. and Fan, Z.C. and Little, J.J. and Sigal, L.",
        TITLE = "Semi-Supervised Grounding Alignment for Multi-Modal Feature Learning",
        BOOKTITLE = CRV22,
        YEAR = "2022",
        PAGES = "48-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207280"}

@inproceedings{bb212198,
        AUTHOR = "Gupta, K. and Gautam, D. and Mamidi, R.",
        TITLE = "cViL: Cross-Lingual Training of Vision-Language Models using
Knowledge Distillation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1734-1741",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207281"}

@inproceedings{bb212199,
        AUTHOR = "Chen, D.Z.Y. and Wu, Q.R. and Nießner, M. and Chang, A.X.",
        TITLE = "D 3 Net: A Unified Speaker-Listener Architecture for
3D Dense Captioning and Visual Grounding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXII:487-505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vgr2.html#TT207282"}

Last update:Feb 29, 2024 at 09:13:14