@article{bb133300,
        AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
        TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2942-2956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129337"}

@article{bb133301,
        AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
        TITLE = "Modeling coverage with semantic embedding for image caption generation",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129338"}

@article{bb133302,
        AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.",
        TITLE = "Leveraging unpaired out-of-domain data for image captioning",
        JOURNAL = PRL,
        VOLUME = "132",
        YEAR = "2020",
        PAGES = "132-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129339"}

@article{bb133303,
        AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.",
        TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning
Framework for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1372-1383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129340"}

@article{bb133304,
        AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.",
        TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2149-2162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129341"}

@article{bb133305,
        AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.",
        TITLE = "Cascaded Revision Network for Novel Object Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "3413-3421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129342"}

@article{bb133306,
        AUTHOR = "Shilpa, M. and He, J. and Zhao, Y.J. and Sun, B. and Yu, L.J.",
        TITLE = "Feedback evaluations to promote image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "13",
        MONTH = "November",
        PAGES = "3021-3027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129343"}

@article{bb133307,
        AUTHOR = "Liu, H. and Zhang, S. and Lin, K. and Wen, J. and Li, J. and Hu, X.",
        TITLE = "Vocabulary-Wide Credit Assignment for Training Image Captioning
Models",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "2450-2460",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129344"}

@article{bb133308,
        AUTHOR = "Xu, N. and Tian, H.S. and Wang, Y.H. and Nie, W.Z. and Song, D. and Liu, A.A. and Liu, W.",
        TITLE = "Coupled-dynamic learning for vision and language:
Exploring Interaction between different tasks",
        JOURNAL = PR,
        VOLUME = "113",
        YEAR = "2021",
        PAGES = "107829",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129345"}

@article{bb133309,
        AUTHOR = "Yang, L. and Wang, H. and Tang, P. and Li, Q.",
        TITLE = "CaptionNet: A Tailor-made Recurrent Neural Network for Generating
Image Descriptions",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "835-845",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129346"}

@article{bb133310,
        AUTHOR = "Liu, A.A. and Wang, Y.H. and Xu, N. and Liu, S. and Li, X.Y.",
        TITLE = "Scene-Graph-Guided message passing network for dense captioning",
        JOURNAL = PRL,
        VOLUME = "145",
        YEAR = "2021",
        PAGES = "187-193",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129347"}

@article{bb133311,
        AUTHOR = "Zhang, L. and Zhang, Y.S. and Zhao, X. and Zou, Z.X.",
        TITLE = "Image captioning via proximal policy optimization",
        JOURNAL = IVC,
        VOLUME = "108",
        YEAR = "2021",
        PAGES = "104126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129348"}

@article{bb133312,
        AUTHOR = "Wu, J. and Chen, T.S. and Wu, H.F. and Yang, Z. and Luo, G.C. and Lin, L.",
        TITLE = "Fine-Grained Image Captioning With Global-Local Discriminative
Objective",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2413-2427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129349"}

@article{bb133313,
        AUTHOR = "Wu, L.X. and Xu, M. and Sang, L. and Yao, T. and Mei, T.",
        TITLE = "Noise Augmented Double-Stream Graph Convolutional Networks for Image
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "3118-3127",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129350"}

@article{bb133314,
        AUTHOR = "Nivedita, M. and Chandrashekar, P. and Mahapatra, S. and Phamila, Y.A.V. and Selvaperumal, S.K.",
        TITLE = "Image Captioning for Video Surveillance System using Neural Networks",
        JOURNAL = IJIG,
        VOLUME = "21",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "2150044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129351"}

@article{bb133315,
        AUTHOR = "Zha, Z.J. and Liu, D. and Zhang, H.W. and Zhang, Y.D. and Wu, F.",
        TITLE = "Context-Aware Visual Policy Network for Fine-Grained Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "710-722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129352"}

@article{bb133316,
        AUTHOR = "Luo, G.F. and Cheng, L.J. and Jing, C. and Zhao, C. and Song, G.Z.",
        TITLE = "A thorough review of models, evaluation metrics, and datasets on
image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "2",
        PAGES = "311-332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129353"}

@article{bb133317,
        AUTHOR = "Ben, H.X. and Pan, Y.W. and Li, Y. and Yao, T. and Hong, R.C. and Wang, M. and Mei, T.",
        TITLE = "Unpaired Image Captioning With semantic-Constrained Self-Learning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "904-916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129354"}

@article{bb133318,
        AUTHOR = "Song, P.P. and Guo, D. and Zhou, J.X. and Xu, M.L. and Wang, M.",
        TITLE = "Memorial GAN With Joint Semantic Optimization for Unpaired Image
Captioning",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4388-4399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129355"}

@inproceedings{bb133319,
        AUTHOR = "Li, Y. and Yao, T. and Pan, Y.W. and Chao, H.Y. and Mei, T.",
        TITLE = "Pointing Novel Objects in Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12489-12498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129356"}

@article{bb133320,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding and Distilling Scene Graphs for Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2313-2327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129357"}

@article{bb133321,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Deconfounded Image Captioning: A Causal Retrospect",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12996-13010",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129358"}

@inproceedings{bb133322,
        AUTHOR = "Yang, X. and Tang, K. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding Scene Graphs for Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10677-10686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129359"}

@article{bb133323,
        AUTHOR = "Yang, Z.P. and Wang, P.B. and Chu, T.S. and Yang, J.",
        TITLE = "Human-Centric Image Captioning",
        JOURNAL = PR,
        VOLUME = "126",
        YEAR = "2022",
        PAGES = "108545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129360"}

@article{bb133324,
        AUTHOR = "Zhang, M. and Chen, J.X. and Li, P.F. and Jiang, M. and Zhou, Z.",
        TITLE = "Topic scene graphs for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "4",
        PAGES = "364-375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129361"}

@article{bb133325,
        AUTHOR = "Yu, Q. and Zhang, C.X. and Weng, L. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Scene captioning with deep fusion of images and point clouds",
        JOURNAL = PRL,
        VOLUME = "158",
        YEAR = "2022",
        PAGES = "9-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129362"}

@article{bb133326,
        AUTHOR = "Chaudhari, C.P. and Devane, S.",
        TITLE = "Improved Framework using Rider Optimization Algorithm for Precise Image
Caption Generation",
        JOURNAL = IJIG,
        VOLUME = "22",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "2250021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129363"}

@article{bb133327,
        AUTHOR = "Li, Y.C. and Wu, C. and Li, L. and Liu, Y.H. and Zhu, J.",
        TITLE = "Caption Generation From Road Images for Traffic Scene Modeling",
        JOURNAL = ITS,
        VOLUME = "23",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "7805-7816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129364"}

@article{bb133328,
        AUTHOR = "Wang, Y.H. and Xu, N. and Liu, A.A. and Li, W.H. and Zhang, Y.D.",
        TITLE = "High-Order Interaction Learning for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4417-4430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129365"}

@article{bb133329,
        AUTHOR = "Guo, D.D. and Lu, R.Y. and Chen, B. and Zeng, Z.Q. and Zhou, M.Y.",
        TITLE = "Matching Visual Features to Hierarchical Semantic Topics for Image
Paragraph Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1920-1937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129366"}

@article{bb133330,
        AUTHOR = "Demirel, B. and Cinbis, R.G.",
        TITLE = "Caption generation on scenes with seen and unseen object categories",
        JOURNAL = IVC,
        VOLUME = "124",
        YEAR = "2022",
        PAGES = "104515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129367"}

@article{bb133331,
        AUTHOR = "Wu, X.X. and Zhao, W.T. and Luo, J.B.",
        TITLE = "Learning Cooperative Neural Modules for Stylized Image Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2305-2320",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129368"}

@article{bb133332,
        AUTHOR = "Stefanini, M. and Cornia, M. and Baraldi, L. and Cascianelli, S. and Fiameni, G. and Cucchiara, R.",
        TITLE = "From Show to Tell: A Survey on Deep Learning-Based Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "539-559",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129369"}

@article{bb133333,
        AUTHOR = "Wu, Y. and Jiang, L. and Yang, Y.",
        TITLE = "Switchable Novel Object Captioner",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1162-1173",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129370"}

@article{bb133334,
        AUTHOR = "Yang, X. and Zhang, H.W. and Gao, C.Y. and Cai, J.F.",
        TITLE = "Learning to Collocate Visual-Linguistic Neural Modules for Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "82-100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129371"}

@inproceedings{bb133335,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Learning to Collocate Neural Modules for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4249-4259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129372"}

@article{bb133336,
        AUTHOR = "Feng, J.L. and Zhao, J.P.",
        TITLE = "Effectively Utilizing the Category Labels for Image Captioning",
        JOURNAL = IEICE,
        VOLUME = "E106-D",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "617-624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129373"}

@article{bb133337,
        AUTHOR = "Wang, D.P. and Hu, Z.Z. and Zhou, Y. and Hong, R.C. and Wang, M.",
        TITLE = "A Text-Guided Generation and Refinement Model for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2966-2977",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129374"}

@article{bb133338,
        AUTHOR = "Al Qatf, M. and Wang, X. and Hawbani, A. and Abdussalam, A. and Alsamhi, S.H.",
        TITLE = "Image Captioning With Novel Topics Guidance and Retrieval-Based
Topics Re-Weighting",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "5984-5999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129375"}

@article{bb133339,
        AUTHOR = "Zhu, P.P. and Wang, X. and Luo, Y. and Sun, Z.L. and Zheng, W.S. and Wang, Y.W. and Chen, C.",
        TITLE = "Unpaired Image Captioning by Image-Level Weakly-Supervised Visual
Concept Recognition",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6702-6716",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129376"}

@article{bb133340,
        AUTHOR = "Hu, N.N. and Ming, Y. and Fan, C.X. and Feng, F. and Lyu, B.Y.",
        TITLE = "TSFNet: Triple-Steam Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6904-6916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129377"}

@article{bb133341,
        AUTHOR = "Gonzalez Chavez, O. and Ruiz, G. and Moctezuma, D. and Ramirez delReal, T.",
        TITLE = "Are metrics measuring what they should? An evaluation of Image
Captioning task metrics",
        JOURNAL = SP:IC,
        VOLUME = "120",
        YEAR = "2024",
        PAGES = "117071",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129378"}

@article{bb133342,
        AUTHOR = "Padate, R. and Jain, A. and Kalla, M. and Sharma, A.",
        TITLE = "A Widespread Assessment and Open Issues on Image Captioning Models",
        JOURNAL = IJIG,
        VOLUME = "23",
        YEAR = "2023",
        NUMBER = "6 2023",
        PAGES = "2350057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129379"}

@article{bb133343,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "Textual Context-Aware Dense Captioning With Diverse Words",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8753-8766",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129380"}

@article{bb133344,
        AUTHOR = "Cheng, J. and Wu, F. and Liu, L. and Zhang, Q. and Rutkowski, L. and Tao, D.C.",
        TITLE = "InDecGAN: Learning to Generate Complex Images From Captions via
Independent Object-Level Decomposition and Enhancement",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8279-8293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129381"}

@article{bb133345,
        AUTHOR = "Ding, N. and Deng, C.R. and Tan, M.K. and Du, Q. and Ge, Z.W. and Wu, Q.",
        TITLE = "Image Captioning With Controllable and Adaptive Length Levels",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "764-779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129382"}

@inproceedings{bb133346,
        AUTHOR = "Xu, G.H. and Niu, S.C. and Tan, M.K. and Luo, Y.C. and Du, Q. and Wu, Q.",
        TITLE = "Towards Accurate Text-based Image Captioning with Content Diversity
Exploration",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12632-12641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129383"}

@article{bb133347,
        AUTHOR = "Zhu, P.P. and Wang, X. and Zhu, L. and Sun, Z.L. and Zheng, W.S. and Wang, Y.W. and Chen, C.W.",
        TITLE = "Prompt-Based Learning for Unpaired Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "379-393",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129384"}

@article{bb133348,
        AUTHOR = "Liu, A.A. and Zhai, Y.C. and Xu, N. and Tian, H. and Nie, W.Z. and Zhang, Y.D.",
        TITLE = "Event-Aware Retrospective Learning for Knowledge-Based Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4898-4911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129385"}

@article{bb133349,
        AUTHOR = "Ajankar, S. and Dutta, T.",
        TITLE = "Image-Relevant Entities Knowledge-Aware News Image Captioning",
        JOURNAL = MultMedMag,
        VOLUME = "31",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "88-98",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129386"}

@article{bb133350,
        AUTHOR = "Dai, Z.Z. and Tran, V. and Markham, A. and Trigoni, N. and Rahman, M.A. and Wijayasingha, L.N.S. and Stankovic, J. and Li, C.",
        TITLE = "EgoCap and EgoFormer:
First-person image captioning with context fusion",
        JOURNAL = PRL,
        VOLUME = "181",
        YEAR = "2024",
        PAGES = "50-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129387"}

@article{bb133351,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "DCMSTRD: End-to-end Dense Captioning via Multi-Scale Transformer
Decoding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "7581-7593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129388"}

@article{bb133352,
        AUTHOR = "Cornia, M. and Baraldi, L. and Fiameni, G. and Cucchiara, R.",
        TITLE = "Generating More Pertinent Captions by Leveraging Semantics and Style on
Multi-Source Datasets",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1701-1720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129389"}

@inproceedings{bb133353,
        AUTHOR = "Barraco, M. and Sarto, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "With a Little Help from your own Past: Prototypical Memory Networks
for Image Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3009-3019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129390"}

@inproceedings{bb133354,
        AUTHOR = "Barraco, M. and Stefanini, M. and Cornia, M. and Cascianelli, S. and Baraldi, L. and Cucchiara, R.",
        TITLE = "CaMEL: Mean Teacher Learning for Image Captioning",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "4087-4094",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129391"}

@inproceedings{bb133355,
        AUTHOR = "Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Show, Control and Tell: A Framework for Generating Controllable and
Grounded Captions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8299-8308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129392"}

@article{bb133356,
        AUTHOR = "Wang, L.X. and Qiu, H.Q. and Qiu, B. and Meng, F.M. and Wu, Q.B. and Li, H.L.",
        TITLE = "TridentCap: Image-Fact-Style Trident Semantic Framework for Stylized
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3563-3575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129393"}

@article{bb133357,
        AUTHOR = "Zhang, H. and Zeng, P.P. and Gao, L. and Lyu, X.Y. and Song, J.K. and Shen, H.T.",
        TITLE = "SPT: Spatial Pyramid Transformer for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4829-4842",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129394"}

@article{bb133358,
        AUTHOR = "Wang, H.Y. and Song, K. and Jiang, X. and He, Z.Q.",
        TITLE = "ragBERT: Relationship-aligned and grammar-wise BERT model for image
captioning",
        JOURNAL = IVC,
        VOLUME = "148",
        YEAR = "2024",
        PAGES = "105105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129395"}

@article{bb133359,
        AUTHOR = "Li, J.Y. and Zhang, L. and Zhang, K. and Hu, B. and Xie, H.T. and Mao, Z.D.",
        TITLE = "Cascade Semantic Prompt Alignment Network for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5266-5281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129396"}

@article{bb133360,
        AUTHOR = "Zou, Y. and Liao, S.Y. and Wang, Q.F.",
        TITLE = "Chinese image captioning with fusion encoder and visual keyword
search",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "11",
        PAGES = "3055-3069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129397"}

@article{bb133361,
        AUTHOR = "Chen, S.J. and Zhu, H.Y. and Li, M.S. and Chen, X. and Guo, P. and Lei, Y.J. and Yu, G. and Li, T. and Chen, T.",
        TITLE = "Vote2Cap-DETR++: Decoupling Localization and Describing for
End-to-End 3D Dense Captioning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7331-7347",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129398"}

@inproceedings{bb133362,
        AUTHOR = "Chen, S.J. and Zhu, H.Y. and Chen, X. and Lei, Y.J. and Yu, G. and Chen, T.",
        TITLE = "End-to-End 3D Dense Captioning with Vote2Cap-DETR",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11124-11133",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129399"}

@article{bb133363,
        AUTHOR = "Lv, F.X. and Wang, R. and Jing, L.H. and Dai, P.W.",
        TITLE = "HIST: Hierarchical and sequential transformer for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "7",
        PAGES = "1043-1056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129400"}

@article{bb133364,
        AUTHOR = "Yang, X.B. and He, S. and Zhang, J. and Ma, S. and Hou, Z.Q. and Sun, W.",
        TITLE = "Memory positional encoding for image captioning",
        JOURNAL = SP:IC,
        VOLUME = "130",
        YEAR = "2025",
        PAGES = "117201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129401"}

@article{bb133365,
        AUTHOR = "Wang, L. and Chen, H.P. and Liu, Y. and Lyu, Y.D.",
        TITLE = "Regular Constrained Multimodal Fusion for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11900-11913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129402"}

@article{bb133366,
        AUTHOR = "Yang, Z. and Han, B. and Gao, X.B. and Zhan, Z.H.",
        TITLE = "Eye-movement-prompted large image captioning model",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111097",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129403"}

@article{bb133367,
        AUTHOR = "Liang, X. and Li, C. and Tian, L.H.",
        TITLE = "Generative adversarial network for semi-supervised image captioning",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129404"}

@article{bb133368,
        AUTHOR = "Zheng, Q. and Wang, C.Y. and Wang, D.D.",
        TITLE = "Bypass network for semantics driven image paragraph captioning",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129405"}

@article{bb133369,
        AUTHOR = "Meena, P. and Kumar, H. and Yadav, S.K.",
        TITLE = "A Volumetric Saliency Guided Image Summarization for RGB-D Indoor
Scene Classification",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10917-10929",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129406"}

@article{bb133370,
        AUTHOR = "Rahman, M.M. and Uzzaman, A. and Sami, S.I. and Khatun, F. and Bhuiyan, M.A.A.",
        TITLE = "A comprehensive construction of deep neural network-based
encoder-decoder framework for automatic image captioning systems",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "14",
        PAGES = "4778-4798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129407"}

@article{bb133371,
        AUTHOR = "Wang, Z. and Xiao, J. and Zhuang, Y.T. and Gao, F. and Shao, J. and Chen, L.",
        TITLE = "Learning Combinatorial Prompts for Universal Controllable Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "129-150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129408"}

@article{bb133372,
        AUTHOR = "Zhang, C. and Tao, L. and Yamasaki, T.",
        TITLE = "UTStyleCap4K: Generating Image Captions with Sentimental Styles",
        JOURNAL = IEICE,
        VOLUME = "E108-D",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "266-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129409"}

@article{bb133373,
        AUTHOR = "Wei, J.H. and Li, Z.X. and Zhang, C. and Ma, H.F.",
        TITLE = "Fusing grid and adaptive region features for image captioning",
        JOURNAL = IVC,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "105513",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129410"}

@article{bb133374,
        AUTHOR = "Xu, D.S. and Huang, Q. and Zhang, X. and Cheng, H. and Shuang, F. and Cai, Y.",
        TITLE = "DEVICE: Depth and Visual Concepts Aware Transformer for OCR-based
image captioning",
        JOURNAL = PR,
        VOLUME = "164",
        YEAR = "2025",
        PAGES = "111522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129411"}

@article{bb133375,
        AUTHOR = "Liu, A.A. and Wu, Q. and Xu, N. and Tian, H.S. and Wang, L.",
        TITLE = "Enriched Image Captioning Based on Knowledge Divergence and Focus",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "4937-4948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129412"}

@article{bb133376,
        AUTHOR = "Shi, L. and Kan, S.C. and Jin, Y. and Zhang, L. and Cen, Y.G.",
        TITLE = "Multi-Modal Self-Perception Enhanced Large Language Model for 3D
Region-of-Interest Captioning With Limited Data",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "2935-2948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129413"}

@article{bb133377,
        AUTHOR = "Wang, B. and Zhang, Z. and Zhao, M.B. and Jin, X.J. and Xu, M.L. and Wang, M.",
        TITLE = "SeaCap: Multi-Sight Embedding and Alignment for One-Stage Image
Captioner",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "3411-3425",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129414"}

@article{bb133378,
        AUTHOR = "Zhang, J. and Zhang, K.X. and Xie, Y. and Wang, Z.",
        TITLE = "Deep Reciprocal Learning for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6684-6697",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129415"}

@article{bb133379,
        AUTHOR = "Gao, J.C. and Zhang, L. and Li, J.Y. and Mao, Z.D.",
        TITLE = "Fully Semantic Gap Recovery for End-to-End Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "9365-9383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129416"}

@article{bb133380,
        AUTHOR = "Gao, Y.Q. and Suo, W. and Sun, M.Y. and Liu, L. and Wang, P.",
        TITLE = "More video-relevant paragraph captioning via Perturbed Attention
Self-Distillation",
        JOURNAL = PR,
        VOLUME = "169",
        YEAR = "2026",
        PAGES = "111871",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129417"}

@article{bb133381,
        AUTHOR = "Li, H. and Xiao, J. and Sun, M.J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Auxiliary captioning: Bridging image-text matching and image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "138",
        YEAR = "2025",
        PAGES = "117337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129418"}

@article{bb133382,
        AUTHOR = "Bao, Q.Y. and Liu, F. and Jiao, L.C. and Liu, Y. and Li, S. and Li, L.L. and Liu, X. and Chen, P.",
        TITLE = "Visual-Language Scene-Relation-Aware Zero-Shot Captioner",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "8725-8739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129419"}

@inproceedings{bb133383,
        AUTHOR = "Samira, D. and Habler, E. and Elovici, Y. and Shabtai, A.",
        TITLE = "Variance-Based Membership Inference Attacks Against Large-Scale Image
Captioning Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9210-9219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129420"}

@inproceedings{bb133384,
        AUTHOR = "Yang, Z.T. and Feng, R. and Yan, K.Y. and Wang, H.J. and Wang, Z.C. and Zhu, S.W. and Zhang, H. and Xiao, J. and Wu, P.Y. and Zhu, K. and Chen, J. and Xie, C.W. and Yang, Y. and Zhang, H.Y. and Liu, Y. and Cheng, F.",
        TITLE = "BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14380-14389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129421"}

@inproceedings{bb133385,
        AUTHOR = "Hua, H. and Liu, Q. and Zhang, L.Z. and Shi, J. and Kim, S.Y. and Zhang, Z.F. and Wang, Y.L. and Zhang, J.M. and Lin, Z. and Luo, J.B.",
        TITLE = "FineCaption: Compositional Image Captioning Focusing on Wherever You
Want at Any Granularity",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24763-24773",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129422"}

@inproceedings{bb133386,
        AUTHOR = "Ye, A. and Santy, S. and Hwang, J.D. and Zhang, A.X. and Krishna, R.",
        TITLE = "Semantic and Expressive Variation in Image Captions Across Languages",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29667-29679",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129423"}

@inproceedings{bb133387,
        AUTHOR = "Lee, J.R. and Shin, Y. and Son, G. and Hwang, D.",
        TITLE = "Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality
Gap Between Text and Vision for Zero-Shot Image Captioning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4050-4059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129424"}

@inproceedings{bb133388,
        AUTHOR = "Fan, X.Y. and Tao, Q. and Tang, Y.F.",
        TITLE = "G-DPPD: Gated Data-dependent Prior Probability Distribution for
Unsupervised Image Captioning",
        BOOKTITLE = ICIVC24,
        YEAR = "2024",
        PAGES = "467-472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129425"}

@inproceedings{bb133389,
        AUTHOR = "Wu, J.L. and Wang, J.F. and Yang, Z.Y. and Gan, Z. and Liu, Z.C. and Yuan, J.S. and Wang, L.J.",
        TITLE = "Grit: A Generative Region-to-text Transformer for Object Understanding",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXX: 207-224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129426"}

@inproceedings{bb133390,
        AUTHOR = "Song, G.R. and Kim, N.R. and Lee, J.S. and Lee, J.H.",
        TITLE = "IGNORE: Information Gap-based False Negative Loss Rejection for Single
Positive Multi-label Learning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXIV: 472-488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129427"}

@inproceedings{bb133391,
        AUTHOR = "Basioti, K. and Abdelsalam, M.A. and Fancellu, F. and Pavlovic, V. and Fazly, A.",
        TITLE = "CIC-BART-SSA: Controllable Image Captioning with Structured Semantic
Augmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVI: 444-461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129428"}

@inproceedings{bb133392,
        AUTHOR = "Udo, H. and Koshinaka, T.",
        TITLE = "Reading is Believing: Revisiting Language Bottleneck Models for Image
Classification",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "943-949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129429"}

@inproceedings{bb133393,
        AUTHOR = "Das, S. and Sekhar, C.C.",
        TITLE = "Leveraging Generated Image Captions for Visual Commonsense Reasoning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2508-2514",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129430"}

@inproceedings{bb133394,
        AUTHOR = "Chaffin, A. and Kijak, E. and Claveau, V.",
        TITLE = "Distinctive Image Captioning: Leveraging Ground Truth Captions in
Clip Guided Reinforcement Learning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2550-2556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129431"}

@inproceedings{bb133395,
        AUTHOR = "Jeong, K. and Lee, W. and Nam, W. and Ma, M. and Kang, P.",
        TITLE = "Technical Report of NICE Challenge at CVPR 2024: Caption Re-ranking
Evaluation Using Ensembled CLIP and Consensus Scores",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7366-7372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129432"}

@inproceedings{bb133396,
        AUTHOR = "Kim, T. and Marsden, M. and Ahn, P. and Kim, S. and Lee, S. and Sala, A. and Kim, S.H.",
        TITLE = "Large-Scale Bidirectional Training for Zero-Shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7373-7383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129433"}

@inproceedings{bb133397,
        AUTHOR = "Kim, T. and Ahn, P. and Kim, S. and Lee, S. and Marsden, M. and Sala, A. and Kim, S.H. and Han, B.H. and Lee, K.M. and Lee, H.L. and Bae, K. and Wu, X.Y. and Gao, Y. and Zhang, H.L. and Yang, Y. and Guo, W. and Lu, J.F. and Oh, Y. and Cho, J.W. and Kim, D.J. and Kweon, I.S. and Kim, J. and Kang, W. and Jhoo, W.Y. and Roh, B. and Mun, J. and Oh, S. and Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W. and Hwang, K. and Shin, W.S. and Lee, K. and Park, W. and Lee, D. and Kwak, N. and Wang, Y.J. and Wang, Y. and Gu, T.C. and Lv, X.C. and Sun, M.",
        TITLE = "NICE: CVPR 2023 Challenge on Zero-shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7356-7365",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129434"}

@inproceedings{bb133398,
        AUTHOR = "Urbanek, J. and Bordes, F. and Astolfi, P. and Williamson, M. and Sharma, V. and Romero Soriano, A.",
        TITLE = "A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style
Models on Dense Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26690-26699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129435"}

@inproceedings{bb133399,
        AUTHOR = "Nebbia, G. and Kovashka, A.",
        TITLE = "Image-caption difficulty for efficient weakly-supervised object
detection from in-the-wild data",
        BOOKTITLE = L3D-IVU24,
        YEAR = "2024",
        PAGES = "2596-2605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT129436"}

Last update:Nov 26, 2025 at 20:24:09