@article{bb246100,
AUTHOR = "Ma, C.X. and Lyu, L. and Lu, G.L. and Lyu, C.",
TITLE = "Adaptive Multiview Graph Difference Analysis for Video Summarization",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "8795-8808",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240947"}
@article{bb246101,
AUTHOR = "Zhu, Y. and Zhao, W.T. and Hua, R. and Wu, X.X.",
TITLE = "Topic-aware video summarization using multimodal transformer",
JOURNAL = PR,
VOLUME = "140",
YEAR = "2023",
PAGES = "109578",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240948"}
@inproceedings{bb246102,
AUTHOR = "Udhayanan, P. and Bv, S. and Laturia, P. and Chauhan, D. and Khandelwal, D. and Petrangeli, S. and Srinivasan, B.V.",
TITLE = "Recipe2Video: Synthesizing Personalized Videos from Recipe Texts",
BOOKTITLE = WACV23,
YEAR = "2023",
PAGES = "2267-2276",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240949"}
@inproceedings{bb246103,
AUTHOR = "Saquil, Y. and Chen, D. and He, Y. and Li, C. and Yang, Y.L.",
TITLE = "Multiple Pairwise Ranking Networks for Personalized Video
Summarization",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1698-1707",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240950"}
@inproceedings{bb246104,
AUTHOR = "Ramos, W.L.S. and Silva, M.M. and Araujo, E.R. and Neves, A.C. and Neves, A.C.",
TITLE = "Personalizing Fast-Forward Videos Based on Visual and Textual
Features from Social Network",
BOOKTITLE = WACV20,
YEAR = "2020",
PAGES = "3260-3269",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240951"}
@inproceedings{bb246105,
AUTHOR = "Teraguchi, M. and Masumitsu, K. and Echigo, T. and Sekiguchi, S. and Etoh, M.",
TITLE = "Rapid generation of event-based indexes for personalized video digests",
BOOKTITLE = ICPR02,
YEAR = "2002",
PAGES = "II: 1041-1044",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240952"}
@inproceedings{bb246106,
AUTHOR = "Oh, T.H. and Joo, K. and Joshi, N. and Wang, B.Y. and Kweon, I.S. and Kang, S.B.",
TITLE = "Personalized Cinemagraphs Using Semantic Understanding and
Collaborative Learning",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "5170-5179",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240953"}
@inproceedings{bb246107,
AUTHOR = "Shafeian, H. and Bhanu, B.",
TITLE = "Integrated personalized video summarization and retrieval",
BOOKTITLE = ICPR12,
YEAR = "2012",
PAGES = "996-999",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240954"}
@inproceedings{bb246108,
AUTHOR = "Han, B.H. and Hamm, J. and Sim, J.",
TITLE = "Personalized video summarization with human in the loop",
BOOKTITLE = WACV11,
YEAR = "2011",
PAGES = "51-57",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240955"}
@inproceedings{bb246109,
AUTHOR = "Miyamori, H.",
TITLE = "Automatic Generation of Personalized Digest Based on Context Flow and
Distinctive Events",
BOOKTITLE = CIVR04,
YEAR = "2004",
PAGES = "179-188",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240956"}
@inproceedings{bb246110,
AUTHOR = "Miyamori, H.",
TITLE = "Automatic Generation of Personalized Video Summary Based on Context
Flow and Distinctive Events",
BOOKTITLE = VLBV03,
YEAR = "2003",
PAGES = "111-121",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240957"}
@inproceedings{bb246111,
AUTHOR = "Jaimes, A. and Echigo, T. and Teraguchi, M. and Satoh, F.",
TITLE = "Learning personalized video highlights from detailed MPEG-7 metadata",
BOOKTITLE = ICIP02,
YEAR = "2002",
PAGES = "I: 133-136",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825persum5.html#TT240958"}
@article{bb246112,
AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
TITLE = "Learning Deep Spatio-Temporal Dependence for Semantic Video
Segmentation",
JOURNAL = MultMed,
VOLUME = "20",
YEAR = "2018",
NUMBER = "4",
MONTH = "April",
PAGES = "939-949",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240959"}
@inproceedings{bb246113,
AUTHOR = "Qiu, Z.F. and Yao, T. and Mei, T.",
TITLE = "Learning Spatio-Temporal Representation with Pseudo-3D Residual
Networks",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "5534-5542",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240960"}
@inproceedings{bb246114,
AUTHOR = "Qiu, Z.F. and Yao, T. and Ngo, C.W. and Tian, X.M. and Mei, T.",
TITLE = "Learning Spatio-Temporal Representation With Local and Global Diffusion",
BOOKTITLE = CVPR19,
YEAR = "2019",
PAGES = "12048-12057",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240961"}
@inproceedings{bb246115,
AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Qiu, Z. and Mei, T.",
TITLE = "Boosting Image Captioning with Attributes",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "4904-4912",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240962"}
@inproceedings{bb246116,
AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.",
TITLE = "Video Captioning with Transferred Semantic Attributes",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "984-992",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240963"}
@article{bb246117,
AUTHOR = "Zhao, B. and Li, X. and Lu, X.",
TITLE = "CAM-RNN: Co-Attention Model Based RNN for Video Captioning",
JOURNAL = IP,
VOLUME = "28",
YEAR = "2019",
NUMBER = "11",
MONTH = "November",
PAGES = "5552-5565",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240964"}
@article{bb246118,
AUTHOR = "Yan, C. and Tu, Y. and Wang, X. and Zhang, Y. and Hao, X. and Zhang, Y. and Dai, Q.",
TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
JOURNAL = MultMed,
VOLUME = "22",
YEAR = "2020",
NUMBER = "1",
MONTH = "January",
PAGES = "229-241",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240965"}
@article{bb246119,
AUTHOR = "Dai, Q. and Zhang, Y. and Hao, X. and Zhang, Y. and Wang, X. and Tu, Y. and Yan, C.",
TITLE = "STAT: Spatial-Temporal Attention Mechanism for Video Captioning",
JOURNAL = MultMed,
VOLUME = "22",
YEAR = "2020",
NUMBER = "3",
MONTH = "March",
PAGES = "830-830",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240965"}
@article{bb246120,
AUTHOR = "Aafaq, N. and Mian, A. and Liu, W. and Gilani, S.Z. and Shah, M.",
TITLE = "Video Description:
A Survey of Methods, Datasets, and Evaluation Metrics",
JOURNAL = Surveys,
VOLUME = "52",
YEAR = "2019",
NUMBER = "6",
MONTH = "October",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240966"}
@article{bb246121,
AUTHOR = "Zhang, Z. and Xu, D. and Ouyang, W. and Tan, C.",
TITLE = "Show, Tell and Summarize: Dense Video Captioning Using Visual Cue
Aided Sentence Summarization",
JOURNAL = CirSysVideo,
VOLUME = "30",
YEAR = "2020",
NUMBER = "9",
MONTH = "September",
PAGES = "3130-3139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240967"}
@article{bb246122,
AUTHOR = "Zhang, W. and Wang, B.R. and Ma, L. and Liu, W.",
TITLE = "Reconstruct and Represent Video Contents for Captioning via
Reinforcement Learning",
JOURNAL = PAMI,
VOLUME = "42",
YEAR = "2020",
NUMBER = "12",
MONTH = "December",
PAGES = "3088-3101",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240968"}
@article{bb246123,
AUTHOR = "Lee, S. and Kim, I.",
TITLE = "DVC-Net: A deep neural network model for dense video captioning",
JOURNAL = IET-CV,
VOLUME = "15",
YEAR = "2021",
NUMBER = "1",
PAGES = "12-23",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240969"}
@article{bb246124,
AUTHOR = "Qi, S.S. and Yang, L.X.",
TITLE = "Video captioning via a symmetric bidirectional decoder",
JOURNAL = IET-CV,
VOLUME = "15",
YEAR = "2021",
NUMBER = "4",
PAGES = "283-296",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240970"}
@article{bb246125,
AUTHOR = "Li, L.H. and Zhang, Y.D. and Tang, S. and Xie, L.X. and Li, X.Y. and Tian, Q.",
TITLE = "Adaptive Spatial Location With Balanced Loss for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "1",
MONTH = "January",
PAGES = "17-30",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240971"}
@article{bb246126,
AUTHOR = "Zheng, Y. and Zhang, Y. and Feng, R. and Zhang, T. and Fan, W.G.",
TITLE = "Stacked Multimodal Attention Network for Context-Aware Video
Captioning",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "1",
MONTH = "January",
PAGES = "31-42",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240972"}
@article{bb246127,
AUTHOR = "Li, L. and Gao, X.Y. and Deng, J.C. and Tu, Y.B. and Zha, Z.J. and Huang, Q.M.",
TITLE = "Long Short-Term Relation Transformer With Global Gating for Video
Captioning",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "2726-2738",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240973"}
@article{bb246128,
AUTHOR = "Munusamy, H. and Sekhar, C.C.",
TITLE = "Video captioning using Semantically Contextual Generative Adversarial
Network",
JOURNAL = CVIU,
VOLUME = "221",
YEAR = "2022",
PAGES = "103453",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240974"}
@article{bb246129,
AUTHOR = "Wang, H. and Lin, G.S. and Hoi, S.C.H. and Miao, C.Y.",
TITLE = "Cross-Modal Graph With Meta Concepts for Video Captioning",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "5150-5162",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240975"}
@article{bb246130,
AUTHOR = "Xiao, H. and Shi, J.L.",
TITLE = "Diverse video captioning through latent variable expansion",
JOURNAL = PRL,
VOLUME = "160",
YEAR = "2022",
PAGES = "19-25",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240976"}
@article{bb246131,
AUTHOR = "Prudviraj, J. and Reddy, M.I. and Vishnu, C. and Mohan, C.K.",
TITLE = "AAP-MIT: Attentive Atrous Pyramid Network and Memory Incorporated
Transformer for Multisentence Video Description",
JOURNAL = IP,
VOLUME = "31",
YEAR = "2022",
PAGES = "5559-5569",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240977"}
@article{bb246132,
AUTHOR = "Xu, W. and Miao, Z.J. and Yu, J. and Tian, Y. and Wan, L. and Ji, Q.",
TITLE = "Bridging Video and Text:
A Two-Step Polishing Transformer for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "9",
MONTH = "September",
PAGES = "6293-6307",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240978"}
@article{bb246133,
AUTHOR = "Wu, B.F. and Niu, G.C. and Yu, J. and Xiao, X.Y. and Zhang, J. and Wu, H.",
TITLE = "Towards Knowledge-Aware Video Captioning via Transitive Visual
Relationship Detection",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6753-6765",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240979"}
@article{bb246134,
AUTHOR = "Yan, L.Q. and Ma, S.Q. and Wang, Q.F. and Chen, Y.J. and Zhang, X.Y. and Savakis, A. and Liu, D.F.",
TITLE = "Video Captioning Using Global-Local Representation",
JOURNAL = CirSysVideo,
VOLUME = "32",
YEAR = "2022",
NUMBER = "10",
MONTH = "October",
PAGES = "6642-6656",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240980"}
@article{bb246135,
AUTHOR = "Subramaniam, A. and Vaidya, J. and Ameen, M.A.M. and Nambiar, A. and Mittal, A.",
TITLE = "Co-segmentation inspired attention module for video-based computer
vision tasks",
JOURNAL = CVIU,
VOLUME = "223",
YEAR = "2022",
PAGES = "103532",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240981"}
@article{bb246136,
AUTHOR = "Liu, F.L. and Wu, X. and You, C.Y. and Ge, S. and Zou, Y.X. and Sun, X.",
TITLE = "Aligning Source Visual and Target Language Domains for Unpaired Video
Captioning",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "9255-9268",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240982"}
@article{bb246137,
AUTHOR = "Yuan, Y.T. and Ma, L. and Zhu, W.W.",
TITLE = "Syntax Customized Video Captioning by Imitating Exemplar Sentences",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "10209-10221",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240983"}
@article{bb246138,
AUTHOR = "Chen, H.R. and Li, J.M. and Frintrop, S. and Hu, X.L.",
TITLE = "The MSR-Video to Text dataset with clean annotations",
JOURNAL = CVIU,
VOLUME = "225",
YEAR = "2022",
PAGES = "103581",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240984"}
@article{bb246139,
AUTHOR = "Moctezuma, D. and Ramirez delReal, T. and Ruiz, G. and Gonzalez Chavez, O.",
TITLE = "Video captioning: A comparative review of where we are and which
could be the route",
JOURNAL = CVIU,
VOLUME = "231",
YEAR = "2023",
PAGES = "103671",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240985"}
@article{bb246140,
AUTHOR = "Aafaq, N. and Mian, A. and Akhtar, N. and Liu, W. and Shah, M.",
TITLE = "Dense Video Captioning With Early Linguistic Information Fusion",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "2309-2322",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240986"}
@inproceedings{bb246141,
AUTHOR = "Wang, J.W. and Jiang, W.H. and Ma, L. and Liu, W. and Xu, Y.",
TITLE = "Bidirectional Attentive Fusion with Context Gating for Dense Video
Captioning",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "7190-7198",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240987"}
@article{bb246142,
AUTHOR = "He, M.G. and Du, W.J. and Wen, Z.Q. and Du, Q. and Xie, Y.T. and Wu, Q.",
TITLE = "Multi-Granularity Aggregation Transformer for Joint Video-Audio-Text
Representation Learning",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "6",
MONTH = "June",
PAGES = "2990-3002",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240988"}
@article{bb246143,
AUTHOR = "Qian, Y. and Mao, Y.C. and Chen, Z.H. and Li, C. and Bloh, O.T. and Huang, Q.",
TITLE = "Dense video captioning based on local attention",
JOURNAL = IET-IPR,
VOLUME = "17",
YEAR = "2023",
NUMBER = "9",
PAGES = "2673-2685",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240989"}
@article{bb246144,
AUTHOR = "Tang, M.K. and Wang, Z.Y. and Zeng, Z.Y. and Li, X. and Zhou, L.P.",
TITLE = "Stay in Grid: Improving Video Captioning via Fully Grid-Level
Representation",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "7",
MONTH = "July",
PAGES = "3319-3332",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240990"}
@article{bb246145,
AUTHOR = "Velda, V. and Immanuel, S.A. and Hendria, W.F. and Jeong, C.",
TITLE = "Improving distinctiveness in video captioning with text-video
similarity",
JOURNAL = IVC,
VOLUME = "136",
YEAR = "2023",
PAGES = "104728",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240991"}
@article{bb246146,
AUTHOR = "Zhu, J.K. and Zeng, P.P. and Gao, L.L. and Li, G.F. and Liao, D.L. and Song, J.K.",
TITLE = "Complementarity-Aware Space Learning for Video-Text Retrieval",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "8",
MONTH = "August",
PAGES = "4362-4374",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240992"}
@article{bb246147,
AUTHOR = "Wang, H. and Zhang, L.B. and Fan, H. and Luo, T.J.",
TITLE = "Collaborative three-stream transformers for video captioning",
JOURNAL = CVIU,
VOLUME = "235",
YEAR = "2023",
PAGES = "103799",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240993"}
@inproceedings{bb246148,
AUTHOR = "Gu, X. and Chen, G. and Wang, Y.F. and Zhang, L.B. and Luo, T.J. and Wen, L.Y.",
TITLE = "Text with Knowledge Graph Augmented Transformer for Video Captioning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "18941-18951",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240994"}
@article{bb246149,
AUTHOR = "Xu, T. and Cui, Y.Y. and He, X.Y. and Liu, C.H.",
TITLE = "A latent topic-aware network for dense video captioning",
JOURNAL = IET-CV,
VOLUME = "17",
YEAR = "2023",
NUMBER = "7",
PAGES = "795-803",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240995"}
@inproceedings{bb246150,
AUTHOR = "Lu, M. and Li, X.Y. and Liu, C.H.",
TITLE = "Context Visual Information-based Deliberation Network for Video
Captioning",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "9812-9818",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240996"}
@article{bb246151,
AUTHOR = "Wu, B. and Liu, B. and Huang, P. and Bao, J. and Xi, P. and Yu, J.",
TITLE = "Concept Parser With Multimodal Graph Learning for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "33",
YEAR = "2023",
NUMBER = "9",
MONTH = "September",
PAGES = "4484-4495",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240997"}
@article{bb246152,
AUTHOR = "Liu, S. and Li, A. and Wang, J.H. and Wang, Y.H.",
TITLE = "Bidirectional Maximum Entropy Training With Word Co-Occurrence for
Video Captioning",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "4494-4507",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240998"}
@article{bb246153,
AUTHOR = "Yang, B. and Cao, M. and Zou, Y.X.",
TITLE = "Concept-Aware Video Captioning:
Describing Videos With Effective Prior Information",
JOURNAL = IP,
VOLUME = "32",
YEAR = "2023",
PAGES = "5366-5378",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT240999"}
@article{bb246154,
AUTHOR = "Luo, X.M. and Luo, X.T. and Wang, D. and Liu, J.H. and Wan, B. and Zhao, L.",
TITLE = "Global semantic enhancement network for video captioning",
JOURNAL = PR,
VOLUME = "145",
YEAR = "2024",
PAGES = "109906",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241000"}
@article{bb246155,
AUTHOR = "Liu, Z. and Wang, T. and Zhang, J. and Zheng, F. and Jiang, W.H. and Lu, K.",
TITLE = "Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage
Progressive Training",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "7894-7905",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241001"}
@article{bb246156,
AUTHOR = "Rao, Q. and Yu, X. and Li, G. and Zhu, L.C.",
TITLE = "CMGNet: Collaborative multi-modal graph network for video captioning",
JOURNAL = CVIU,
VOLUME = "238",
YEAR = "2024",
PAGES = "103864",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241002"}
@article{bb246157,
AUTHOR = "Li, G.R. and Ye, H.H. and Qi, Y.K. and Wang, S.H. and Qing, L.Y. and Huang, Q.M. and Yang, M.H.",
TITLE = "Learning Hierarchical Modular Networks for Video Captioning",
JOURNAL = PAMI,
VOLUME = "46",
YEAR = "2024",
NUMBER = "2",
MONTH = "February",
PAGES = "1049-1064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241003"}
@inproceedings{bb246158,
AUTHOR = "Ye, H.H. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Huang, Q.M. and Yang, M.H.",
TITLE = "Hierarchical Modular Network for Video Captioning",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "17918-17927",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241004"}
@article{bb246159,
AUTHOR = "Xie, Y.L. and Niu, J.J. and Zhang, Y. and Ren, F.",
TITLE = "Global-Shared Text Representation Based Multi-Stage Fusion
Transformer Network for Multi-Modal Dense Video Captioning",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "3164-3179",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241005"}
@article{bb246160,
AUTHOR = "Jing, S. and Zhang, H. and Zeng, P.P. and Gao, L.L. and Song, J.K. and Shen, H.T.",
TITLE = "Memory-Based Augmentation Network for Video Captioning",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "2367-2379",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241006"}
@article{bb246161,
AUTHOR = "Liang, Y.Z. and Zhu, L.C. and Wang, X.H. and Yang, Y.",
TITLE = "IcoCap: Improving Video Captioning by Compounding Images",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "4389-4400",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241007"}
@article{bb246162,
AUTHOR = "Wang, Z.H. and Li, L. and Xie, Z.W. and Liu, C.B.",
TITLE = "Video Frame-wise Explanation Driven Contrastive Learning for
Procedural Text Generation",
JOURNAL = CVIU,
VOLUME = "241",
YEAR = "2024",
PAGES = "103954",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241008"}
@article{bb246163,
AUTHOR = "Chen, Y.X. and Zhang, Z.Q. and Qi, Z.A. and Yuan, C.F. and Wang, J. and Shan, Y. and Li, B. and Hu, W.M. and Qie, X. and Wu, J.P.",
TITLE = "DARTScore: DuAl-Reconstruction Transformer for Video Captioning
Evaluation",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "4",
MONTH = "April",
PAGES = "2041-2055",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241009"}
@article{bb246164,
AUTHOR = "Liu, C.S. and Zhang, X. and Chang, F. and Li, S. and Hao, P.H. and Lu, Y. and Wang, Y.H.",
TITLE = "Traffic Scenario Understanding and Video Captioning via Guidance
Attention Captioning Network",
JOURNAL = ITS,
VOLUME = "25",
YEAR = "2024",
NUMBER = "5",
MONTH = "May",
PAGES = "3615-3627",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241010"}
@article{bb246165,
AUTHOR = "Zhang, Y.J. and Xu, T.Y. and Song, X.N. and Zhu, X.F. and Feng, Z.H. and Wu, X.J.",
TITLE = "Towards accurate unsupervised video captioning with implicit visual
feature injection and explicit",
JOURNAL = PRL,
VOLUME = "183",
YEAR = "2024",
PAGES = "133-139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241011"}
@article{bb246166,
AUTHOR = "Im, S.K. and Chan, K.H.",
TITLE = "Local feature-based video captioning with multiple classifier and
CARU-attention",
JOURNAL = IET-IPR,
VOLUME = "18",
YEAR = "2024",
NUMBER = "9",
PAGES = "2304-2317",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241012"}
@article{bb246167,
AUTHOR = "Putra, B.H.H. and Jeong, C.",
TITLE = "Video captioning based on dual learning via multiple reconstruction
blocks",
JOURNAL = IVC,
VOLUME = "148",
YEAR = "2024",
PAGES = "105119",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241013"}
@article{bb246168,
AUTHOR = "Chou, S.H. and Little, J.J. and Sigal, L.",
TITLE = "Implicit and explicit commonsense for multi-sentence video captioning",
JOURNAL = CVIU,
VOLUME = "247",
YEAR = "2024",
PAGES = "104064",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241014"}
@article{bb246169,
AUTHOR = "Tian, M. and Li, G.R. and Qi, Y.K. and Wang, S.H. and Sheng, Q.Z. and Huang, Q.M.",
TITLE = "Rethink video retrieval representation for video captioning",
JOURNAL = PR,
VOLUME = "156",
YEAR = "2024",
PAGES = "110744",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241015"}
@article{bb246170,
AUTHOR = "Liu, S. and Li, A. and Zhao, Y.W. and Wang, J.H. and Wang, Y.H.",
TITLE = "EvCap: Element-Aware Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "34",
YEAR = "2024",
NUMBER = "10",
MONTH = "October",
PAGES = "9718-9731",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241016"}
@article{bb246171,
AUTHOR = "Lou, Y. and Zhang, W.J. and Song, X.N. and Hua, Y. and Wu, X.J.",
TITLE = "EDS: Exploring deeper into semantics for video captioning",
JOURNAL = PRL,
VOLUME = "186",
YEAR = "2024",
PAGES = "133-140",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241017"}
@article{bb246172,
AUTHOR = "Yuan, F.N. and Gu, S. and Zhang, X.F. and Fang, Z.J.",
TITLE = "Fully exploring object relation interaction and hidden state
attention for video captioning",
JOURNAL = PR,
VOLUME = "159",
YEAR = "2025",
PAGES = "111138",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241018"}
@article{bb246173,
AUTHOR = "Che, N. and Liu, J. and Yu, F. and Cheng, L.C. and Wang, Y.X. and Li, Y.H. and Liu, C.R.",
TITLE = "Multimodality-guided Visual-Caption Semantic Enhancement",
JOURNAL = CVIU,
VOLUME = "249",
YEAR = "2024",
PAGES = "104139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241019"}
@article{bb246174,
AUTHOR = "Liu, Y.Y. and Zhu, H. and Wu, Z. and Du, S. and Wu, S. and Shi, J.",
TITLE = "Adaptive semantic guidance network for video captioning",
JOURNAL = CVIU,
VOLUME = "251",
YEAR = "2025",
PAGES = "104255",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241020"}
@article{bb246175,
AUTHOR = "Jin, P. and Li, H. and Yuan, L. and Yan, S.C. and Chen, J.",
TITLE = "Hierarchical Banzhaf Interaction for General Video-Language
Representation Learning",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "3",
MONTH = "March",
PAGES = "2125-2139",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241021"}
@article{bb246176,
AUTHOR = "Qasim, I. and Horsch, A. and Prasad, D.",
TITLE = "Dense Video Captioning: A Survey of Techniques, Datasets and
Evaluation Protocols",
JOURNAL = Surveys,
VOLUME = "57",
YEAR = "2025",
NUMBER = "6",
MONTH = "February",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241022"}
@article{bb246177,
AUTHOR = "Estevam, V. and Laroca, R. and Pedrini, H. and Menotti, D.",
TITLE = "Dense video captioning using unsupervised semantic information",
JOURNAL = JVCIR,
VOLUME = "107",
YEAR = "2025",
PAGES = "104385",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241023"}
@article{bb246178,
AUTHOR = "Verma, D. and Dutta, T.",
TITLE = "Syntactically and semantically enhanced captioning network via hybrid
attention and POS tagging prompt",
JOURNAL = CVIU,
VOLUME = "255",
YEAR = "2025",
PAGES = "104340",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241024"}
@article{bb246179,
AUTHOR = "Han, T.T. and Xu, Y.C. and Yu, J. and Yu, Z. and Zhao, S.C.",
TITLE = "Action-Driven Semantic Representation and Aggregation for Video
Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "3383-3395",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241025"}
@article{bb246180,
AUTHOR = "Jiang, W.H. and Liu, L. and Fang, Y.M. and Cheng, Y. and Peng, Y.X. and Liu, Y.",
TITLE = "Learning Comprehensive Visual Grounding for Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "4",
MONTH = "April",
PAGES = "3355-3367",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241026"}
@article{bb246181,
AUTHOR = "Ren, X. and Han, Y.F. and Wei, B. and Tang, X.S. and Hao, K.R.",
TITLE = "From visual features to key concepts: A Dynamic and Static
Concept-driven approach for video captioning",
JOURNAL = PRL,
VOLUME = "193",
YEAR = "2025",
PAGES = "64-70",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241027"}
@article{bb246182,
AUTHOR = "Wang, Y. and Liu, Y.Y. and Zhou, S.P. and Huang, Y.X. and Tang, C. and Zhou, W. and Chen, Z.",
TITLE = "Emotion-Oriented Cross-Modal Prompting and Alignment for
Human-Centric Emotional Video Captioning",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "3766-3780",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241028"}
@article{bb246183,
AUTHOR = "Luo, H.L. and Cai, X. and Shark, L.K.",
TITLE = "Frame-by-Frame Multi-Object Tracking-Guided Video Captioning",
JOURNAL = CirSysVideo,
VOLUME = "35",
YEAR = "2025",
NUMBER = "7",
MONTH = "July",
PAGES = "6357-6370",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241029"}
@article{bb246184,
AUTHOR = "Choi, W. and Chen, J. and Yoon, J.W.",
TITLE = "ADVC: Adversarial dense video captioning with unsupervised
pretraining",
JOURNAL = IVC,
VOLUME = "161",
YEAR = "2025",
PAGES = "105595",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241030"}
@article{bb246185,
AUTHOR = "Ma, Y.C. and Qing, L.Y. and Li, G.R. and Qi, Y.K. and Beheshti, A. and Sheng, Q.Z. and Huang, Q.M.",
TITLE = "RETTA: Retrieval-enhanced test-time adaptation for zero-shot video
captioning",
JOURNAL = PR,
VOLUME = "171",
YEAR = "2026",
PAGES = "112170",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241031"}
@article{bb246186,
AUTHOR = "Verma, D. and Dutta, T.",
TITLE = "Seeing the Rare: Meta-Aware Pointer Networks for Long-Tailed Video
Captioning",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "4269-4273",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241032"}
@inproceedings{bb246187,
AUTHOR = "Wu, K. and Li, P. and Fu, J.W. and Li, Y.Z. and Wu, Y. and Liu, Y.H. and Wang, J.J. and Zhou, S.P.",
TITLE = "Event-Equalized Dense Video Captioning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8417-8427",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241033"}
@inproceedings{bb246188,
AUTHOR = "Xue, Z.H. and An, J.B. and Yang, X.T. and Grauman, K.",
TITLE = "Progress-Aware Video Frame Captioning",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13639-13650",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241034"}
@inproceedings{bb246189,
AUTHOR = "Lee, S.H. and Wang, J. and Fan, D. and Zhang, Z.K. and Liu, L. and Hao, X. and Bhat, V. and Li, X.Y.",
TITLE = "Now you see Me: Context-Aware Automatic Audio Description",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5530-5539",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241035"}
@inproceedings{bb246190,
AUTHOR = "Kainulainen, J. and Guo, Z.X. and Laaksonen, J.",
TITLE = "Diffusion-based Multimodal Video Captioning",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 148-165",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241036"}
@inproceedings{bb246191,
AUTHOR = "Perrett, T. and Han, T. and Damen, D. and Zisserman, A.",
TITLE = "It's Just Another Day:
Unique Video Captioning by Discriminitive Prompting",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 275-293",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241037"}
@inproceedings{bb246192,
AUTHOR = "Kim, B. and Hwang, D. and Cho, S.J. and Jang, Y.S. and Lee, H.L. and Lee, M.",
TITLE = "Show, Think, and Tell: Thought-Augmented Fine-Tuning of Large
Language Models for Video Captioning",
BOOKTITLE = WhatNext24,
YEAR = "2024",
PAGES = "1808-1817",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241038"}
@inproceedings{bb246193,
AUTHOR = "Shen, Y.H. and Yang, L.J. and Wen, L.Y. and Yu, H.C. and Elhamifar, E. and Wang, H.",
TITLE = "Exploring the Role of Audio in Video Captioning",
BOOKTITLE = MULA24,
YEAR = "2024",
PAGES = "2090-2100",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241039"}
@inproceedings{bb246194,
AUTHOR = "Shoman, M. and Wang, D.D. and Aboah, A. and Abdel Aty, M.",
TITLE = "Enhancing Traffic Safety with Parallel Dense Video Captioning for
End-to-End Event Analysis",
BOOKTITLE = AICity24,
YEAR = "2024",
PAGES = "7125-7133",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241040"}
@inproceedings{bb246195,
AUTHOR = "Wu, H. and Liu, H. and Qiao, Y. and Sun, X.",
TITLE = "DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via
Pseudo Boundary Enrichment and Online Refinement",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18699-18708",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241041"}
@inproceedings{bb246196,
AUTHOR = "Zhou, X.Y. and Arnab, A. and Buch, S. and Yan, S. and Myers, A. and Xiong, X. and Nagrani, A. and Schmid, C.",
TITLE = "Streaming Dense Video Captioning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18243-18252",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241042"}
@inproceedings{bb246197,
AUTHOR = "Kim, M. and Kim, H.B. and Moon, J. and Choi, J. and Kim, S.T.",
TITLE = "Do You Remember? Dense Video Captioning with Cross-Modal Memory
Retrieval",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13894-13904",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241043"}
@inproceedings{bb246198,
AUTHOR = "Xu, J. and Huang, Y.F. and Hou, J.L. and Chen, G. and Zhang, Y. and Feng, R. and Xie, W.",
TITLE = "Retrieval-Augmented Egocentric Video Captioning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13525-13536",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241044"}
@inproceedings{bb246199,
AUTHOR = "Malakan, Z.M. and Hassan, G.M. and Mian, A.",
TITLE = "Sequential Image Storytelling Model Based on Transformer Attention
Pooling",
BOOKTITLE = IVCNZ23,
YEAR = "2023",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vc4.html#TT241045"}
Last update:Jan 8, 2026 at 12:52:16