@article{bb246100,
        AUTHOR = "Ma, C.X. and Lyu, L. and Lu, G.L. and Lyu, C.",
        TITLE = "Adaptive Multiview Graph Difference Analysis for Video Summarization",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8795-8808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240947"}

@article{bb246101,
        AUTHOR = "Zhu, Y. and Zhao, W.T. and Hua, R. and Wu, X.X.",
        TITLE = "Topic-aware video summarization using multimodal transformer",
        JOURNAL = PR,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "109578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240948"}

@inproceedings{bb246102,
        AUTHOR = "Udhayanan, P. and Bv, S. and Laturia, P. and Chauhan, D. and Khandelwal, D. and Petrangeli, S. and Srinivasan, B.V.",
        TITLE = "Recipe2Video: Synthesizing Personalized Videos from Recipe Texts",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2267-2276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240949"}

@inproceedings{bb246103,
        AUTHOR = "Saquil, Y. and Chen, D. and He, Y. and Li, C. and Yang, Y.L.",
        TITLE = "Multiple Pairwise Ranking Networks for Personalized Video
Summarization",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1698-1707",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240950"}

@inproceedings{bb246104,
        AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E.R. and Neves, A.C. and Neves, A.C.",
        TITLE = "Personalizing Fast-Forward Videos Based on Visual and Textual
Features from Social Network",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "3260-3269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240951"}

@inproceedings{bb246105,
        AUTHOR = "Teraguchi, M. and Masumitsu, K. and Echigo, T. and Sekiguchi, S. and Etoh, M.",
        TITLE = "Rapid generation of event-based indexes for personalized video digests",
        BOOKTITLE = ICPR02,
        YEAR = "2002",
        PAGES = "II: 1041-1044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240952"}

@inproceedings{bb246106,
        AUTHOR = "Oh, T.H. and Joo, K. and Joshi, N. and Wang, B.Y. and Kweon, I.S. and Kang, S.B.",
        TITLE = "Personalized Cinemagraphs Using Semantic Understanding and
Collaborative Learning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5170-5179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240953"}

@inproceedings{bb246107,
        AUTHOR = "Shafeian, H. and Bhanu, B.",
        TITLE = "Integrated personalized video summarization and retrieval",
        BOOKTITLE = ICPR12,
        YEAR = "2012",
        PAGES = "996-999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240954"}

@inproceedings{bb246108,
        AUTHOR = "Han, B.H. and Hamm, J. and Sim, J.",
        TITLE = "Personalized video summarization with human in the loop",
        BOOKTITLE = WACV11,
        YEAR = "2011",
        PAGES = "51-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240955"}

@inproceedings{bb246109,
        AUTHOR = "Miyamori, H.",
        TITLE = "Automatic Generation of Personalized Digest Based on Context Flow and
Distinctive Events",
        BOOKTITLE = CIVR04,
        YEAR = "2004",
        PAGES = "179-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240956"}

@inproceedings{bb246110,
        AUTHOR = "Miyamori, H.",
        TITLE = "Automatic Generation of Personalized Video Summary Based on Context
Flow and Distinctive Events",
        BOOKTITLE = VLBV03,
        YEAR = "2003",
        PAGES = "111-121",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240957"}

@inproceedings{bb246111,
        AUTHOR = "Jaimes, A. and Echigo, T. and Teraguchi, M. and Satoh, F.",
        TITLE = "Learning personalized video highlights from detailed MPEG-7 metadata",
        BOOKTITLE = ICIP02,
        YEAR = "2002",
        PAGES = "I: 133-136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240958"}

@article{bb246112,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
        TITLE = "Learning Deep Spatio-Temporal Dependence for Semantic Video
Segmentation",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "939-949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240959"}

@inproceedings{bb246113,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
        TITLE = "Learning Spatio-Temporal Representation with Pseudo-3D Residual
Networks",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5534-5542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240960"}

@inproceedings{bb246114,
        AUTHOR = "Qiu, Z.F. and Yao, T. and Ngo, C.W. and Tian, X.M. and Mei, T.",
        TITLE = "Learning Spatio-Temporal Representation With Local and Global Diffusion",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12048-12057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240961"}

@inproceedings{bb246115,
        AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Qiu, Z. and Mei, T.",
        TITLE = "Boosting Image Captioning with Attributes",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4904-4912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240962"}

@inproceedings{bb246116,
        AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.",
        TITLE = "Video Captioning with Transferred Semantic Attributes",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "984-992",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240963"}

@article{bb246117,
        AUTHOR = "Zhao, B. and Li, X. and Lu, X.",
        TITLE = "CAM-RNN: Co-Attention Model Based RNN for Video Captioning",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5552-5565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240964"}

@article{bb246118,
        AUTHOR = "Yan, C. and Tu, Y. and Wang, X. and Zhang, Y. and Hao, X. and Zhang, Y. and Dai, Q.",
        TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "229-241",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240965"}

@article{bb246119,
        AUTHOR = "Dai, Q. and Zhang, Y. and Hao, X. and Zhang, Y. and Wang, X. and Tu, Y. and Yan, C.",
        TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "830-830",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240965"}

@article{bb246120,
        AUTHOR = "Aafaq, N. and Mian, A. and Liu, W. and Gilani, S.Z. and Shah, M.",
        TITLE = "Video Description:
A Survey of Methods, Datasets, and Evaluation Metrics",
        JOURNAL = Surveys,
        VOLUME = "52",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "October",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240966"}

@article{bb246121,
        AUTHOR = "Zhang, Z. and Xu, D. and Ouyang, W. and Tan, C.",
        TITLE = "Show, Tell and Summarize: Dense Video Captioning Using Visual Cue
Aided Sentence Summarization",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "3130-3139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240967"}

@article{bb246122,
        AUTHOR = "Zhang, W. and Wang, B.R. and Ma, L. and Liu, W.",
        TITLE = "Reconstruct and Represent Video Contents for Captioning via
Reinforcement Learning",
        JOURNAL = PAMI,
        VOLUME = "42",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3088-3101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240968"}

@article{bb246123,
        AUTHOR = "Lee, S. and Kim, I.",
        TITLE = "DVC-Net: A deep neural network model for dense video captioning",
        JOURNAL = IET-CV,
        VOLUME = "15",
        YEAR = "2021",
        NUMBER = "1",
        PAGES = "12-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240969"}

@article{bb246124,
        AUTHOR = "Qi, S.S. and Yang, L.X.",
        TITLE = "Video captioning via a symmetric bidirectional decoder",
        JOURNAL = IET-CV,
        VOLUME = "15",
        YEAR = "2021",
        NUMBER = "4",
        PAGES = "283-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240970"}

@article{bb246125,
        AUTHOR = "Li, L.H. and Zhang, Y.D. and Tang, S. and Xie, L.X. and Li, X.Y. and Tian, Q.",
        TITLE = "Adaptive Spatial Location With Balanced Loss for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "17-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240971"}

@article{bb246126,
        AUTHOR = "Zheng, Y. and Zhang, Y. and Feng, R. and Zhang, T. and Fan, W.G.",
        TITLE = "Stacked Multimodal Attention Network for Context-Aware Video
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "31-42",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240972"}

@article{bb246127,
        AUTHOR = "Li, L. and Gao, X.Y. and Deng, J.C. and Tu, Y.B. and Zha, Z.J. and Huang, Q.M.",
        TITLE = "Long Short-Term Relation Transformer With Global Gating for Video
Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "2726-2738",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240973"}

@article{bb246128,
        AUTHOR = "Munusamy, H. and Sekhar, C.C.",
        TITLE = "Video captioning using Semantically Contextual Generative Adversarial
Network",
        JOURNAL = CVIU,
        VOLUME = "221",
        YEAR = "2022",
        PAGES = "103453",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240974"}

@article{bb246129,
        AUTHOR = "Wang, H. and Lin, G.S. and Hoi, S.C.H. and Miao, C.Y.",
        TITLE = "Cross-Modal Graph With Meta Concepts for Video Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5150-5162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240975"}

@article{bb246130,
        AUTHOR = "Xiao, H. and Shi, J.L.",
        TITLE = "Diverse video captioning through latent variable expansion",
        JOURNAL = PRL,
        VOLUME = "160",
        YEAR = "2022",
        PAGES = "19-25",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240976"}

@article{bb246131,
        AUTHOR = "Prudviraj, J. and Reddy, M.I. and Vishnu, C. and Mohan, C.K.",
        TITLE = "AAP-MIT: Attentive Atrous Pyramid Network and Memory Incorporated
Transformer for Multisentence Video Description",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "5559-5569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240977"}

@article{bb246132,
        AUTHOR = "Xu, W. and Miao, Z.J. and Yu, J. and Tian, Y. and Wan, L. and Ji, Q.",
        TITLE = "Bridging Video and Text:
A Two-Step Polishing Transformer for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "6293-6307",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240978"}

@article{bb246133,
        AUTHOR = "Wu, B.F. and Niu, G.C. and Yu, J. and Xiao, X.Y. and Zhang, J. and Wu, H.",
        TITLE = "Towards Knowledge-Aware Video Captioning via Transitive Visual
Relationship Detection",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6753-6765",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240979"}

@article{bb246134,
        AUTHOR = "Yan, L.Q. and Ma, S.Q. and Wang, Q.F. and Chen, Y.J. and Zhang, X.Y. and Savakis, A. and Liu, D.F.",
        TITLE = "Video Captioning Using Global-Local Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "6642-6656",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240980"}

@article{bb246135,
        AUTHOR = "Subramaniam, A. and Vaidya, J. and Ameen, M.A.M. and Nambiar, A. and Mittal, A.",
        TITLE = "Co-segmentation inspired attention module for video-based computer
vision tasks",
        JOURNAL = CVIU,
        VOLUME = "223",
        YEAR = "2022",
        PAGES = "103532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240981"}

@article{bb246136,
        AUTHOR = "Liu, F.L. and Wu, X. and You, C.Y. and Ge, S. and Zou, Y.X. and Sun, X.",
        TITLE = "Aligning Source Visual and Target Language Domains for Unpaired Video
Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "9255-9268",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240982"}

@article{bb246137,
        AUTHOR = "Yuan, Y.T. and Ma, L. and Zhu, W.W.",
        TITLE = "Syntax Customized Video Captioning by Imitating Exemplar Sentences",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10209-10221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240983"}

@article{bb246138,
        AUTHOR = "Chen, H.R. and Li, J.M. and Frintrop, S. and Hu, X.L.",
        TITLE = "The MSR-Video to Text dataset with clean annotations",
        JOURNAL = CVIU,
        VOLUME = "225",
        YEAR = "2022",
        PAGES = "103581",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240984"}

@article{bb246139,
        AUTHOR = "Moctezuma, D. and Ramirez delReal, T. and Ruiz, G. and Gonzalez Chavez, O.",
        TITLE = "Video captioning: A comparative review of where we are and which
could be the route",
        JOURNAL = CVIU,
        VOLUME = "231",
        YEAR = "2023",
        PAGES = "103671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240985"}

@article{bb246140,
        AUTHOR = "Aafaq, N. and Mian, A. and Akhtar, N. and Liu, W. and Shah, M.",
        TITLE = "Dense Video Captioning With Early Linguistic Information Fusion",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2309-2322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240986"}

@inproceedings{bb246141,
        AUTHOR = "Wang, J.W. and Jiang, W.H. and Ma, L. and Liu, W. and Xu, Y.",
        TITLE = "Bidirectional Attentive Fusion with Context Gating for Dense Video
Captioning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7190-7198",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240987"}

@article{bb246142,
        AUTHOR = "He, M.G. and Du, W.J. and Wen, Z.Q. and Du, Q. and Xie, Y.T. and Wu, Q.",
        TITLE = "Multi-Granularity Aggregation Transformer for Joint Video-Audio-Text
Representation Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2990-3002",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240988"}

@article{bb246143,
        AUTHOR = "Qian, Y. and Mao, Y.C. and Chen, Z.H. and Li, C. and Bloh, O.T. and Huang, Q.",
        TITLE = "Dense video captioning based on local attention",
        JOURNAL = IET-IPR,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "9",
        PAGES = "2673-2685",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240989"}

@article{bb246144,
        AUTHOR = "Tang, M.K. and Wang, Z.Y. and Zeng, Z.Y. and Li, X. and Zhou, L.P.",
        TITLE = "Stay in Grid: Improving Video Captioning via Fully Grid-Level
Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3319-3332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240990"}

@article{bb246145,
        AUTHOR = "Velda, V. and Immanuel, S.A. and Hendria, W.F. and Jeong, C.",
        TITLE = "Improving distinctiveness in video captioning with text-video
similarity",
        JOURNAL = IVC,
        VOLUME = "136",
        YEAR = "2023",
        PAGES = "104728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240991"}

@article{bb246146,
        AUTHOR = "Zhu, J.K. and Zeng, P.P. and Gao, L.L. and Li, G.F. and Liao, D.L. and Song, J.K.",
        TITLE = "Complementarity-Aware Space Learning for Video-Text Retrieval",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "4362-4374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240992"}

@article{bb246147,
        AUTHOR = "Wang, H. and Zhang, L.B. and Fan, H. and Luo, T.J.",
        TITLE = "Collaborative three-stream transformers for video captioning",
        JOURNAL = CVIU,
        VOLUME = "235",
        YEAR = "2023",
        PAGES = "103799",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240993"}

@inproceedings{bb246148,
        AUTHOR = "Gu, X. and Chen, G. and Wang, Y.F. and Zhang, L.B. and Luo, T.J. and Wen, L.Y.",
        TITLE = "Text with Knowledge Graph Augmented Transformer for Video Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18941-18951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240994"}

@article{bb246149,
        AUTHOR = "Xu, T. and Cui, Y.Y. and He, X.Y. and Liu, C.H.",
        TITLE = "A latent topic-aware network for dense video captioning",
        JOURNAL = IET-CV,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "7",
        PAGES = "795-803",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240995"}

@inproceedings{bb246150,
        AUTHOR = "Lu, M. and Li, X.Y. and Liu, C.H.",
        TITLE = "Context Visual Information-based Deliberation Network for Video
Captioning",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "9812-9818",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240996"}

@article{bb246151,
        AUTHOR = "Wu, B. and Liu, B. and Huang, P. and Bao, J. and Xi, P. and Yu, J.",
        TITLE = "Concept Parser With Multimodal Graph Learning for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "4484-4495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240997"}

@article{bb246152,
        AUTHOR = "Liu, S. and Li, A. and Wang, J.H. and Wang, Y.H.",
        TITLE = "Bidirectional Maximum Entropy Training With Word Co-Occurrence for
Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "4494-4507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240998"}

@article{bb246153,
        AUTHOR = "Yang, B. and Cao, M. and Zou, Y.X.",
        TITLE = "Concept-Aware Video Captioning:
Describing Videos With Effective Prior Information",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "5366-5378",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240999"}

@article{bb246154,
        AUTHOR = "Luo, X.M. and Luo, X.T. and Wang, D. and Liu, J.H. and Wan, B. and Zhao, L.",
        TITLE = "Global semantic enhancement network for video captioning",
        JOURNAL = PR,
        VOLUME = "145",
        YEAR = "2024",
        PAGES = "109906",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241000"}

@article{bb246155,
        AUTHOR = "Liu, Z. and Wang, T. and Zhang, J. and Zheng, F. and Jiang, W.H. and Lu, K.",
        TITLE = "Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage
Progressive Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "7894-7905",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241001"}

@article{bb246156,
        AUTHOR = "Rao, Q. and Yu, X. and Li, G. and Zhu, L.C.",
        TITLE = "CMGNet: Collaborative multi-modal graph network for video captioning",
        JOURNAL = CVIU,
        VOLUME = "238",
        YEAR = "2024",
        PAGES = "103864",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241002"}

@article{bb246157,
        AUTHOR = "Li, G.R. and Ye, H.H. and Qi, Y.K. and Wang, S.H. and Qing, L.Y. and Huang, Q.M. and Yang, M.H.",
        TITLE = "Learning Hierarchical Modular Networks for Video Captioning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1049-1064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241003"}

@inproceedings{bb246158,
        AUTHOR = "Ye, H.H. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Huang, Q.M. and Yang, M.H.",
        TITLE = "Hierarchical Modular Network for Video Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17918-17927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241004"}

@article{bb246159,
        AUTHOR = "Xie, Y.L. and Niu, J.J. and Zhang, Y. and Ren, F.",
        TITLE = "Global-Shared Text Representation Based Multi-Stage Fusion
Transformer Network for Multi-Modal Dense Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3164-3179",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241005"}

@article{bb246160,
        AUTHOR = "Jing, S. and Zhang, H. and Zeng, P.P. and Gao, L.L. and Song, J.K. and Shen, H.T.",
        TITLE = "Memory-Based Augmentation Network for Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2367-2379",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241006"}

@article{bb246161,
        AUTHOR = "Liang, Y.Z. and Zhu, L.C. and Wang, X.H. and Yang, Y.",
        TITLE = "IcoCap: Improving Video Captioning by Compounding Images",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4389-4400",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241007"}

@article{bb246162,
        AUTHOR = "Wang, Z.H. and Li, L. and Xie, Z.W. and Liu, C.B.",
        TITLE = "Video Frame-wise Explanation Driven Contrastive Learning for
Procedural Text Generation",
        JOURNAL = CVIU,
        VOLUME = "241",
        YEAR = "2024",
        PAGES = "103954",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241008"}

@article{bb246163,
        AUTHOR = "Chen, Y.X. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Wang, J. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.",
        TITLE = "DARTScore: DuAl-Reconstruction Transformer for Video Captioning
Evaluation",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2041-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241009"}

@article{bb246164,
        AUTHOR = "Liu, C.S. and Zhang, X. and Chang, F. and Li, S. and Hao, P.H. and Lu, Y. and Wang, Y.H.",
        TITLE = "Traffic Scenario Understanding and Video Captioning via Guidance
Attention Captioning Network",
        JOURNAL = ITS,
        VOLUME = "25",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3615-3627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241010"}

@article{bb246165,
        AUTHOR = "Zhang, Y.J. and Xu, T.Y. and Song, X.N. and Zhu, X.F. and Feng, Z.H. and Wu, X.J.",
        TITLE = "Towards accurate unsupervised video captioning with implicit visual
feature injection and explicit",
        JOURNAL = PRL,
        VOLUME = "183",
        YEAR = "2024",
        PAGES = "133-139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241011"}

@article{bb246166,
        AUTHOR = "Im, S.K. and Chan, K.H.",
        TITLE = "Local feature-based video captioning with multiple classifier and
CARU-attention",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "9",
        PAGES = "2304-2317",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241012"}

@article{bb246167,
        AUTHOR = "Putra, B.H.H. and Jeong, C.",
        TITLE = "Video captioning based on dual learning via multiple reconstruction
blocks",
        JOURNAL = IVC,
        VOLUME = "148",
        YEAR = "2024",
        PAGES = "105119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241013"}

@article{bb246168,
        AUTHOR = "Chou, S.H. and Little, J.J. and Sigal, L.",
        TITLE = "Implicit and explicit commonsense for multi-sentence video captioning",
        JOURNAL = CVIU,
        VOLUME = "247",
        YEAR = "2024",
        PAGES = "104064",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241014"}

@article{bb246169,
        AUTHOR = "Tian, M. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Sheng, Q.Z. and Huang, Q.M.",
        TITLE = "Rethink video retrieval representation for video captioning",
        JOURNAL = PR,
        VOLUME = "156",
        YEAR = "2024",
        PAGES = "110744",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241015"}

@article{bb246170,
        AUTHOR = "Liu, S. and Li, A. and Zhao, Y.W. and Wang, J.H. and Wang, Y.H.",
        TITLE = "EvCap: Element-Aware Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9718-9731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241016"}

@article{bb246171,
        AUTHOR = "Lou, Y. and Zhang, W.J. and Song, X.N. and Hua, Y. and Wu, X.J.",
        TITLE = "EDS: Exploring deeper into semantics for video captioning",
        JOURNAL = PRL,
        VOLUME = "186",
        YEAR = "2024",
        PAGES = "133-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241017"}

@article{bb246172,
        AUTHOR = "Yuan, F.N. and Gu, S. and Zhang, X.F. and Fang, Z.J.",
        TITLE = "Fully exploring object relation interaction and hidden state
attention for video captioning",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241018"}

@article{bb246173,
        AUTHOR = "Che, N. and Liu, J. and Yu, F. and Cheng, L.C. and Wang, Y.X. and Li, Y.H. and Liu, C.R.",
        TITLE = "Multimodality-guided Visual-Caption Semantic Enhancement",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241019"}

@article{bb246174,
        AUTHOR = "Liu, Y.Y. and Zhu, H. and Wu, Z. and Du, S. and Wu, S. and Shi, J.",
        TITLE = "Adaptive semantic guidance network for video captioning",
        JOURNAL = CVIU,
        VOLUME = "251",
        YEAR = "2025",
        PAGES = "104255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241020"}

@article{bb246175,
        AUTHOR = "Jin, P. and Li, H. and Yuan, L. and Yan, S.C. and Chen, J.",
        TITLE = "Hierarchical Banzhaf Interaction for General Video-Language
Representation Learning",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2125-2139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241021"}

@article{bb246176,
        AUTHOR = "Qasim, I. and Horsch, A. and Prasad, D.",
        TITLE = "Dense Video Captioning: A Survey of Techniques, Datasets and
Evaluation Protocols",
        JOURNAL = Surveys,
        VOLUME = "57",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241022"}

@article{bb246177,
        AUTHOR = "Estevam, V. and Laroca, R. and Pedrini, H. and Menotti, D.",
        TITLE = "Dense video captioning using unsupervised semantic information",
        JOURNAL = JVCIR,
        VOLUME = "107",
        YEAR = "2025",
        PAGES = "104385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241023"}

@article{bb246178,
        AUTHOR = "Verma, D. and Dutta, T.",
        TITLE = "Syntactically and semantically enhanced captioning network via hybrid
attention and POS tagging prompt",
        JOURNAL = CVIU,
        VOLUME = "255",
        YEAR = "2025",
        PAGES = "104340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241024"}

@article{bb246179,
        AUTHOR = "Han, T.T. and Xu, Y.C. and Yu, J. and Yu, Z. and Zhao, S.C.",
        TITLE = "Action-Driven Semantic Representation and Aggregation for Video
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "3383-3395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241025"}

@article{bb246180,
        AUTHOR = "Jiang, W.H. and Liu, L. and Fang, Y.M. and Cheng, Y. and Peng, Y.X. and Liu, Y.",
        TITLE = "Learning Comprehensive Visual Grounding for Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "3355-3367",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241026"}

@article{bb246181,
        AUTHOR = "Ren, X. and Han, Y.F. and Wei, B. and Tang, X.S. and Hao, K.R.",
        TITLE = "From visual features to key concepts: A Dynamic and Static
Concept-driven approach for video captioning",
        JOURNAL = PRL,
        VOLUME = "193",
        YEAR = "2025",
        PAGES = "64-70",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241027"}

@article{bb246182,
        AUTHOR = "Wang, Y. and Liu, Y.Y. and Zhou, S.P. and Huang, Y.X. and Tang, C. and Zhou, W. and Chen, Z.",
        TITLE = "Emotion-Oriented Cross-Modal Prompting and Alignment for
Human-Centric Emotional Video Captioning",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "3766-3780",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241028"}

@article{bb246183,
        AUTHOR = "Luo, H.L. and Cai, X. and Shark, L.K.",
        TITLE = "Frame-by-Frame Multi-Object Tracking-Guided Video Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6357-6370",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241029"}

@article{bb246184,
        AUTHOR = "Choi, W. and Chen, J. and Yoon, J.W.",
        TITLE = "ADVC: Adversarial dense video captioning with unsupervised
pretraining",
        JOURNAL = IVC,
        VOLUME = "161",
        YEAR = "2025",
        PAGES = "105595",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241030"}

@article{bb246185,
        AUTHOR = "Ma, Y.C. and Qing, L.Y. and Li, G.R. and Qi, Y.K. and Beheshti, A. and Sheng, Q.Z. and Huang, Q.M.",
        TITLE = "RETTA: Retrieval-enhanced test-time adaptation for zero-shot video
captioning",
        JOURNAL = PR,
        VOLUME = "171",
        YEAR = "2026",
        PAGES = "112170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241031"}

@article{bb246186,
        AUTHOR = "Verma, D. and Dutta, T.",
        TITLE = "Seeing the Rare: Meta-Aware Pointer Networks for Long-Tailed Video
Captioning",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "4269-4273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241032"}

@inproceedings{bb246187,
        AUTHOR = "Wu, K. and Li, P. and Fu, J.W. and Li, Y.Z. and Wu, Y. and Liu, Y.H. and Wang, J.J. and Zhou, S.P.",
        TITLE = "Event-Equalized Dense Video Captioning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "8417-8427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241033"}

@inproceedings{bb246188,
        AUTHOR = "Xue, Z.H. and An, J.B. and Yang, X.T. and Grauman, K.",
        TITLE = "Progress-Aware Video Frame Captioning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13639-13650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241034"}

@inproceedings{bb246189,
        AUTHOR = "Lee, S.H. and Wang, J. and Fan, D. and Zhang, Z.K. and Liu, L. and Hao, X. and Bhat, V. and Li, X.Y.",
        TITLE = "Now you see Me: Context-Aware Automatic Audio Description",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "5530-5539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241035"}

@inproceedings{bb246190,
        AUTHOR = "Kainulainen, J. and Guo, Z.X. and Laaksonen, J.",
        TITLE = "Diffusion-based Multimodal Video Captioning",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 148-165",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241036"}

@inproceedings{bb246191,
        AUTHOR = "Perrett, T. and Han, T. and Damen, D. and Zisserman, A.",
        TITLE = "It's Just Another Day:
Unique Video Captioning by Discriminitive Prompting",
        BOOKTITLE = ACCV24,
        YEAR = "2024",
        PAGES = "III: 275-293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241037"}

@inproceedings{bb246192,
        AUTHOR = "Kim, B. and Hwang, D. and Cho, S.J. and Jang, Y.S. and Lee, H.L. and Lee, M.",
        TITLE = "Show, Think, and Tell: Thought-Augmented Fine-Tuning of Large
Language Models for Video Captioning",
        BOOKTITLE = WhatNext24,
        YEAR = "2024",
        PAGES = "1808-1817",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241038"}

@inproceedings{bb246193,
        AUTHOR = "Shen, Y.H. and Yang, L.J. and Wen, L.Y. and Yu, H.C. and Elhamifar, E. and Wang, H.",
        TITLE = "Exploring the Role of Audio in Video Captioning",
        BOOKTITLE = MULA24,
        YEAR = "2024",
        PAGES = "2090-2100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241039"}

@inproceedings{bb246194,
        AUTHOR = "Shoman, M. and Wang, D.D. and Aboah, A. and Abdel Aty, M.",
        TITLE = "Enhancing Traffic Safety with Parallel Dense Video Captioning for
End-to-End Event Analysis",
        BOOKTITLE = AICity24,
        YEAR = "2024",
        PAGES = "7125-7133",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241040"}

@inproceedings{bb246195,
        AUTHOR = "Wu, H. and Liu, H. and Qiao, Y. and Sun, X.",
        TITLE = "DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via
Pseudo Boundary Enrichment and Online Refinement",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18699-18708",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241041"}

@inproceedings{bb246196,
        AUTHOR = "Zhou, X.Y. and Arnab, A. and Buch, S. and Yan, S. and Myers, A. and Xiong, X. and Nagrani, A. and Schmid, C.",
        TITLE = "Streaming Dense Video Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "18243-18252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241042"}

@inproceedings{bb246197,
        AUTHOR = "Kim, M. and Kim, H.B. and Moon, J. and Choi, J. and Kim, S.T.",
        TITLE = "Do You Remember? Dense Video Captioning with Cross-Modal Memory
Retrieval",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13894-13904",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241043"}

@inproceedings{bb246198,
        AUTHOR = "Xu, J. and Huang, Y.F. and Hou, J.L. and Chen, G. and Zhang, Y. and Feng, R. and Xie, W.",
        TITLE = "Retrieval-Augmented Egocentric Video Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13525-13536",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241044"}

@inproceedings{bb246199,
        AUTHOR = "Malakan, Z.M. and Hassan, G.M. and Mian, A.",
        TITLE = "Sequential Image Storytelling Model Based on Transformer Attention
Pooling",
        BOOKTITLE = IVCNZ23,
        YEAR = "2023",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241045"}

Last update:Jan 8, 2026 at 12:52:16