@article{bb123600,
        AUTHOR = "Zhou, Y.J. and Long, J.F. and Xu, S.P. and Shang, L.",
        TITLE = "Attribute-driven image captioning via soft-switch pointer",
        JOURNAL = PRL,
        VOLUME = "152",
        YEAR = "2021",
        PAGES = "34-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119695"}

@article{bb123601,
        AUTHOR = "Zha, Z.J. and Liu, D. and Zhang, H.W. and Zhang, Y.D. and Wu, F.",
        TITLE = "Context-Aware Visual Policy Network for Fine-Grained Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "710-722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119696"}

@article{bb123602,
        AUTHOR = "Wang, Q.Z. and Wan, J. and Chan, A.B.",
        TITLE = "On Diversity in Image Captioning: Metrics and Methods",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1035-1049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119697"}

@inproceedings{bb123603,
        AUTHOR = "Wang, J. and Xu, W.J. and Wang, Q.Z. and Chan, A.B.",
        TITLE = "Compare and Reweight:
Distinctive Image Captioning Using Similar Images Sets",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "I:370-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119698"}

@article{bb123604,
        AUTHOR = "Luo, G.F. and Cheng, L.J. and Jing, C. and Zhao, C. and Song, G.Z.",
        TITLE = "A thorough review of models, evaluation metrics, and datasets on
image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "2",
        PAGES = "311-332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119699"}

@article{bb123605,
        AUTHOR = "Ben, H.X. and Pan, Y.W. and Li, Y. and Yao, T. and Hong, R.C. and Wang, M. and Mei, T.",
        TITLE = "Unpaired Image Captioning With semantic-Constrained Self-Learning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "904-916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119700"}

@article{bb123606,
        AUTHOR = "Song, P.P. and Guo, D. and Zhou, J.X. and Xu, M.L. and Wang, M.",
        TITLE = "Memorial GAN With Joint Semantic Optimization for Unpaired Image
Captioning",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4388-4399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119701"}

@inproceedings{bb123607,
        AUTHOR = "Li, Y. and Yao, T. and Pan, Y.W. and Chao, H.Y. and Mei, T.",
        TITLE = "Pointing Novel Objects in Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12489-12498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119702"}

@article{bb123608,
        AUTHOR = "Liu, M.F. and Hu, H.J. and Li, L.J. and Yu, Y. and Guan, W.L.",
        TITLE = "Chinese Image Caption Generation via Visual Attention and Topic
Modeling",
        JOURNAL = Cyber,
        VOLUME = "52",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1247-1257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119703"}

@article{bb123609,
        AUTHOR = "Yang, Q.Q. and Ni, Z.H. and Ren, P.",
        TITLE = "Meta captioning:
A meta learning based remote sensing image captioning framework",
        JOURNAL = PandRS,
        VOLUME = "186",
        YEAR = "2022",
        PAGES = "190-200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119704"}

@article{bb123610,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding and Distilling Scene Graphs for Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2313-2327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119705"}

@article{bb123611,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Deconfounded Image Captioning: A Causal Retrospect",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12996-13010",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119706"}

@inproceedings{bb123612,
        AUTHOR = "Yang, X. and Tang, K. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding Scene Graphs for Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10677-10686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119707"}

@article{bb123613,
        AUTHOR = "Yang, Z.P. and Wang, P.B. and Chu, T.S. and Yang, J.",
        TITLE = "Human-Centric Image Captioning",
        JOURNAL = PR,
        VOLUME = "126",
        YEAR = "2022",
        PAGES = "108545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119708"}

@article{bb123614,
        AUTHOR = "Li, X. and Zhang, W.K. and Sun, X. and Gao, X.",
        TITLE = "Without detection: Two-step clustering features with local-global
attention for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "3",
        PAGES = "280-294",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119709"}

@article{bb123615,
        AUTHOR = "Yu, L.T. and Zhang, J. and Wu, Q.",
        TITLE = "Dual Attention on Pyramid Feature Maps for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "1775-1786",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119710"}

@article{bb123616,
        AUTHOR = "Zhang, M. and Chen, J.X. and Li, P.F. and Jiang, M. and Zhou, Z.",
        TITLE = "Topic scene graphs for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "4",
        PAGES = "364-375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119711"}

@article{bb123617,
        AUTHOR = "Yu, Q. and Zhang, C.X. and Weng, L. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Scene captioning with deep fusion of images and point clouds",
        JOURNAL = PRL,
        VOLUME = "158",
        YEAR = "2022",
        PAGES = "9-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119712"}

@article{bb123618,
        AUTHOR = "Chaudhari, C.P. and Devane, S.",
        TITLE = "Improved Framework using Rider Optimization Algorithm for Precise Image
Caption Generation",
        JOURNAL = IJIG,
        VOLUME = "22",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "2250021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119713"}

@article{bb123619,
        AUTHOR = "Shao, X.J. and Xiang, Z.L. and Li, Y.X. and Zhang, M.J.",
        TITLE = "Variational joint self-attention for image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "8",
        PAGES = "2075-2086",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119714"}

@article{bb123620,
        AUTHOR = "Li, Y.C. and Wu, C. and Li, L. and Liu, Y.H. and Zhu, J.",
        TITLE = "Caption Generation From Road Images for Traffic Scene Modeling",
        JOURNAL = ITS,
        VOLUME = "23",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "7805-7816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119715"}

@article{bb123621,
        AUTHOR = "Wang, Y.H. and Xu, N. and Liu, A.A. and Li, W.H. and Zhang, Y.D.",
        TITLE = "High-Order Interaction Learning for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4417-4430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119716"}

@article{bb123622,
        AUTHOR = "Guo, D.D. and Lu, R.Y. and Chen, B. and Zeng, Z.Q. and Zhou, M.Y.",
        TITLE = "Matching Visual Features to Hierarchical Semantic Topics for Image
Paragraph Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1920-1937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119717"}

@article{bb123623,
        AUTHOR = "Demirel, B. and Cinbis, R.G.",
        TITLE = "Caption generation on scenes with seen and unseen object categories",
        JOURNAL = IVC,
        VOLUME = "124",
        YEAR = "2022",
        PAGES = "104515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119718"}

@article{bb123624,
        AUTHOR = "Liu, Z.Y. and Dong, A.M. and Yu, J.G. and Han, Y.B. and Zhou, Y. and Zhao, K.",
        TITLE = "Scene classification for remote sensing images with self-attention
augmented CNN",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "11",
        PAGES = "3085-3096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119719"}

@article{bb123625,
        AUTHOR = "Wu, X.X. and Zhao, W.T. and Luo, J.B.",
        TITLE = "Learning Cooperative Neural Modules for Stylized Image Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2305-2320",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119720"}

@article{bb123626,
        AUTHOR = "Zhou, H. and Du, X.P. and Xia, L. and Li, S.",
        TITLE = "Self-Learning for Few-Shot Remote Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "18",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119721"}

@article{bb123627,
        AUTHOR = "Stefanini, M. and Cornia, M. and Baraldi, L. and Cascianelli, S. and Fiameni, G. and Cucchiara, R.",
        TITLE = "From Show to Tell: A Survey on Deep Learning-Based Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "539-559",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119722"}

@article{bb123628,
        AUTHOR = "Wu, Y. and Jiang, L. and Yang, Y.",
        TITLE = "Switchable Novel Object Captioner",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1162-1173",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119723"}

@article{bb123629,
        AUTHOR = "Yang, X. and Zhang, H.W. and Gao, C.Y. and Cai, J.F.",
        TITLE = "Learning to Collocate Visual-Linguistic Neural Modules for Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "82-100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119724"}

@inproceedings{bb123630,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Learning to Collocate Neural Modules for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4249-4259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119725"}

@article{bb123631,
        AUTHOR = "Ma, Y.W. and Ji, J.Y. and Sun, X.S. and Zhou, Y. and Ji, R.R.",
        TITLE = "Towards local visual modeling for image captioning",
        JOURNAL = PR,
        VOLUME = "138",
        YEAR = "2023",
        PAGES = "109420",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119726"}

@article{bb123632,
        AUTHOR = "Barati, A. and Farsi, H. and Mohamadzadeh, S.",
        TITLE = "Integration of the latent variable knowledge into deep image
captioning with Bayesian modeling",
        JOURNAL = IET-IPR,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "7",
        PAGES = "2256-2271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119727"}

@article{bb123633,
        AUTHOR = "Feng, J.L. and Zhao, J.P.",
        TITLE = "Effectively Utilizing the Category Labels for Image Captioning",
        JOURNAL = IEICE,
        VOLUME = "E106-D",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "617-624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119728"}

@article{bb123634,
        AUTHOR = "Wang, D.P. and Hu, Z.Z. and Zhou, Y. and Hong, R.C. and Wang, M.",
        TITLE = "A Text-Guided Generation and Refinement Model for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2966-2977",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119729"}

@article{bb123635,
        AUTHOR = "Wang, Q. and Huang, W. and Zhang, X.T. and Li, X.L.",
        TITLE = "GLCM: Global-Local Captioning Model for Remote Sensing Image
Captioning",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "6910-6922",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119730"}

@article{bb123636,
        AUTHOR = "Ji, J.Y. and Huang, X.Y. and Sun, X.S. and Zhou, Y. and Luo, G. and Cao, L.J. and Liu, J.Z. and Shao, L. and Ji, R.R.",
        TITLE = "Multi-Branch Distance-Sensitive Self-Attention Network for Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3962-3974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119731"}

@article{bb123637,
        AUTHOR = "Cornia, M. and Baraldi, L. and Tal, A. and Cucchiara, R.",
        TITLE = "Fully-attentive iterative networks for region-based controllable
image and video captioning",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103857",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119732"}

@article{bb123638,
        AUTHOR = "Al Qatf, M. and Wang, X. and Hawbani, A. and Abdussalam, A. and Alsamhi, S.H.",
        TITLE = "Image Captioning With Novel Topics Guidance and Retrieval-Based
Topics Re-Weighting",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "5984-5999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119733"}

@article{bb123639,
        AUTHOR = "Zhu, P.P. and Wang, X. and Luo, Y. and Sun, Z.L. and Zheng, W.S. and Wang, Y. and Chen, C.",
        TITLE = "Unpaired Image Captioning by Image-Level Weakly-Supervised Visual
Concept Recognition",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6702-6716",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119734"}

@article{bb123640,
        AUTHOR = "Hu, N.N. and Ming, Y. and Fan, C.X. and Feng, F. and Lyu, B.Y.",
        TITLE = "TSFNet: Triple-Steam Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6904-6916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119735"}

@article{bb123641,
        AUTHOR = "Gonzalez Chavez, O. and Ruiz, G. and Moctezuma, D. and Ramirez delReal, T.",
        TITLE = "Are metrics measuring what they should? An evaluation of Image
Captioning task metrics",
        JOURNAL = SP:IC,
        VOLUME = "120",
        YEAR = "2024",
        PAGES = "117071",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119736"}

@article{bb123642,
        AUTHOR = "Padate, R. and Jain, A. and Kalla, M. and Sharma, A.",
        TITLE = "A Widespread Assessment and Open Issues on Image Captioning Models",
        JOURNAL = IJIG,
        VOLUME = "23",
        YEAR = "2023",
        NUMBER = "6 2023",
        PAGES = "2350057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119737"}

@article{bb123643,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "Textual Context-Aware Dense Captioning With Diverse Words",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8753-8766",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119738"}

@article{bb123644,
        AUTHOR = "Cheng, J. and Wu, F. and Liu, L. and Zhang, Q. and Rutkowski, L. and Tao, D.C.",
        TITLE = "InDecGAN: Learning to Generate Complex Images From Captions via
Independent Object-Level Decomposition and Enhancement",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8279-8293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119739"}

@article{bb123645,
        AUTHOR = "Ding, N. and Deng, C.R. and Tan, M.K. and Du, Q. and Ge, Z.W. and Wu, Q.",
        TITLE = "Image Captioning With Controllable and Adaptive Length Levels",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "764-779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119740"}

@inproceedings{bb123646,
        AUTHOR = "Xu, G.H. and Niu, S.C. and Tan, M.K. and Luo, Y.C. and Du, Q. and Wu, Q.",
        TITLE = "Towards Accurate Text-based Image Captioning with Content Diversity
Exploration",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12632-12641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119741"}

@article{bb123647,
        AUTHOR = "Zhu, P.P. and Wang, X. and Zhu, L. and Sun, Z.L. and Zheng, W.S. and Wang, Y. and Chen, C.W.",
        TITLE = "Prompt-Based Learning for Unpaired Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "379-393",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119742"}

@article{bb123648,
        AUTHOR = "Liu, A.A. and Zhai, Y.C. and Xu, N. and Tian, H. and Nie, W.Z. and Zhang, Y.D.",
        TITLE = "Event-Aware Retrospective Learning for Knowledge-Based Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4898-4911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119743"}

@article{bb123649,
        AUTHOR = "Song, L.F. and Li, F. and Wang, Y. and Liu, Y. and Wang, Y. and Xiang, S.M.",
        TITLE = "Image captioning: Semantic selection unit with stacked residual
attention",
        JOURNAL = IVC,
        VOLUME = "144",
        YEAR = "2024",
        PAGES = "104965",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119744"}

@article{bb123650,
        AUTHOR = "Ajankar, S. and Dutta, T.",
        TITLE = "Image-Relevant Entities Knowledge-Aware News Image Captioning",
        JOURNAL = MultMedMag,
        VOLUME = "31",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "88-98",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119745"}

@article{bb123651,
        AUTHOR = "Dai, Z.Z. and Tran, V. and Markham, A. and Trigoni, N. and Rahman, M.A. and Wijayasingha, L.N.S. and Stankovic, J. and Li, C.",
        TITLE = "EgoCap and EgoFormer:
First-person image captioning with context fusion",
        JOURNAL = PRL,
        VOLUME = "181",
        YEAR = "2024",
        PAGES = "50-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119746"}

@article{bb123652,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "DCMSTRD: End-to-end Dense Captioning via Multi-Scale Transformer
Decoding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "7581-7593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119747"}

@article{bb123653,
        AUTHOR = "Cornia, M. and Baraldi, L. and Fiameni, G. and Cucchiara, R.",
        TITLE = "Generating More Pertinent Captions by Leveraging Semantics and Style on
Multi-Source Datasets",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1701-1720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119748"}

@inproceedings{bb123654,
        AUTHOR = "Barraco, M. and Sarto, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "With a Little Help from your own Past: Prototypical Memory Networks
for Image Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3009-3019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119749"}

@inproceedings{bb123655,
        AUTHOR = "Barraco, M. and Stefanini, M. and Cornia, M. and Cascianelli, S. and Baraldi, L. and Cucchiara, R.",
        TITLE = "CaMEL: Mean Teacher Learning for Image Captioning",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "4087-4094",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119750"}

@inproceedings{bb123656,
        AUTHOR = "Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Show, Control and Tell: A Framework for Generating Controllable and
Grounded Captions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8299-8308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119751"}

@article{bb123657,
        AUTHOR = "Wang, L.X. and Qiu, H.Q. and Qiu, B. and Meng, F.M. and Wu, Q.B. and Li, H.L.",
        TITLE = "TridentCap: Image-Fact-Style Trident Semantic Framework for Stylized
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3563-3575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119752"}

@article{bb123658,
        AUTHOR = "Zhang, H. and Zeng, P.P. and Gao, L. and Lyu, X.Y. and Song, J.K. and Shen, H.T.",
        TITLE = "SPT: Spatial Pyramid Transformer for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4829-4842",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119753"}

@article{bb123659,
        AUTHOR = "Wang, H.Y. and Song, K. and Jiang, X. and He, Z.Q.",
        TITLE = "ragBERT: Relationship-aligned and grammar-wise BERT model for image
captioning",
        JOURNAL = IVC,
        VOLUME = "148",
        YEAR = "2024",
        PAGES = "105105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119754"}

@article{bb123660,
        AUTHOR = "Li, J.Y. and Zhang, L. and Zhang, K. and Hu, B. and Xie, H.T. and Mao, Z.D.",
        TITLE = "Cascade Semantic Prompt Alignment Network for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5266-5281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119755"}

@article{bb123661,
        AUTHOR = "Cheng, Q. and Xu, Y.Q. and Huang, Z.Y.",
        TITLE = "VCC-DiffNet: Visual Conditional Control Diffusion Network for Remote
Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "16",
        PAGES = "2961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119756"}

@article{bb123662,
        AUTHOR = "Zou, Y. and Liao, S.Y. and Wang, Q.F.",
        TITLE = "Chinese image captioning with fusion encoder and visual keyword
search",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "11",
        PAGES = "3055-3069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119757"}

@inproceedings{bb123663,
        AUTHOR = "Ruan, J. and Wu, Y. and Wan, X.J. and Zhu, Y.S.",
        TITLE = "Describe Images in a Boring Way:
Towards Cross-Modal Sarcasm Generation",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5689-5698",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119758"}

@inproceedings{bb123664,
        AUTHOR = "Hirsch, E. and Tal, A.",
        TITLE = "CLID: Controlled-Length Image Descriptions with Limited Data",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5519-5529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119759"}

@inproceedings{bb123665,
        AUTHOR = "Rotstein, N. and Bensaid, D. and Brody, S. and Ganz, R. and Kimmel, R.",
        TITLE = "FuseCap: Leveraging Large Language Models for Enriched Fused Image
Captions",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5677-5688",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119760"}

@inproceedings{bb123666,
        AUTHOR = "Petryk, S. and Whitehead, S. and Gonzalez, J.E. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
        TITLE = "Simple Token-Level Confidence Improves Caption Correctness",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5730-5740",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119761"}

@inproceedings{bb123667,
        AUTHOR = "Sabir, A.",
        TITLE = "Word to Sentence Visual Semantic Similarity for Caption Generation:
Lessons Learned",
        BOOKTITLE = MVA23,
        YEAR = "2023",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119762"}

@inproceedings{bb123668,
        AUTHOR = "Verma, A. and Agarwal, S. and Arya, K.V. and Petrlik, I. and Esparza, R. and Rodriguez, C.",
        TITLE = "Image Captioning with Reinforcement Learning",
        BOOKTITLE = ICCVMI23,
        YEAR = "2023",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119763"}

@inproceedings{bb123669,
        AUTHOR = "Wei, Y.C. and Li, L. and Geng, S.L.",
        TITLE = "Remote Sensing Image Captioning Using Hire-MLP",
        BOOKTITLE = CVIDL23,
        YEAR = "2023",
        PAGES = "109-112",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119764"}

@inproceedings{bb123670,
        AUTHOR = "Fan, J. and Liang, Y. and Liu, L. and Huang, S. and Zhang, L.",
        TITLE = "RCA-NOC: Relative Contrastive Alignment for Novel Object Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15464-15474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119765"}

@inproceedings{bb123671,
        AUTHOR = "Li, R. and Sun, S.Y. and Elhoseiny, M. and Torr, P.",
        TITLE = "OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20236-20246",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119766"}

@inproceedings{bb123672,
        AUTHOR = "Hu, A. and Chen, S.Z. and Zhang, L. and Jin, Q.",
        TITLE = "Explore and Tell: Embodied Visual Captioning in 3D Environments",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2482-2491",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119767"}

@inproceedings{bb123673,
        AUTHOR = "Kang, W. and Mun, J. and Lee, S.J. and Roh, B.",
        TITLE = "Noise-aware Learning from Web-crawled Image-Text Data for Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2930-2940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119768"}

@inproceedings{bb123674,
        AUTHOR = "Fei, J.J. and Wang, T. and Zhang, J. and He, Z.Y. and Wang, C.J. and Zheng, F.",
        TITLE = "Transferable Decoding with Visual Entities for Zero-Shot Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3113-3123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119769"}

@inproceedings{bb123675,
        AUTHOR = "Kornblith, S. and Li, L. and Wang, Z. and Nguyen, T.",
        TITLE = "Guiding image captioning models toward more specific captions",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15213-15223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119770"}

@inproceedings{bb123676,
        AUTHOR = "Kim, Y. and Kim, J. and Lee, B.K. and Shin, S. and Ro, Y.M.",
        TITLE = "Mitigating Dataset Bias in Image Captioning Through Clip
Confounder-Free Captioning Network",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1720-1724",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119771"}

@inproceedings{bb123677,
        AUTHOR = "Dessi, R. and Bevilacqua, M. and Gualdoni, E. and Rakotonirina, N.C. and Franzon, F. and Baroni, M.",
        TITLE = "Cross-Domain Image Captioning with Discriminative Finetuning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6935-6944",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119772"}

@inproceedings{bb123678,
        AUTHOR = "Vo, D.M. and Luong, Q.A. and Sugimoto, A. and Nakayama, H.",
        TITLE = "A-CAP: Anticipation Captioning with Commonsense Knowledge",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10824-10833",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119773"}

@inproceedings{bb123679,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "HAAV: Hierarchical Aggregation of Augmented Views for Image
Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11039-11049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119774"}

@inproceedings{bb123680,
        AUTHOR = "Ramos, R. and Martins, B. and Elliott, D. and Kementchedjhieva, Y.",
        TITLE = "Smallcap: Lightweight Image Captioning Prompted with Retrieval
Augmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2840-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119775"}

@inproceedings{bb123681,
        AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.",
        TITLE = "Model-Agnostic Gender Debiased Image Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119776"}

@inproceedings{bb123682,
        AUTHOR = "Chen, S.J. and Zhu, H.Y. and Chen, X. and Lei, Y.J. and Yu, G. and Chen, T.",
        TITLE = "End-to-End 3D Dense Captioning with Vote2Cap-DETR",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11124-11133",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119777"}

@inproceedings{bb123683,
        AUTHOR = "Tran, H.T.T. and Okatani, T.",
        TITLE = "Bright as the Sun: In-depth Analysis of Imagination-driven Image
Captioning",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:675-691",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119778"}

@inproceedings{bb123684,
        AUTHOR = "Phueaksri, I. and Kastner, M.A. and Kawanishi, Y. and Komamizu, T. and Ide, I.",
        TITLE = "Towards Captioning an Image Collection from a Combined Scene Graph
Representation Approach",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 178-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119779"}

@inproceedings{bb123685,
        AUTHOR = "Zhang, Y. and Wang, J. and Wu, H. and Xu, W.J.",
        TITLE = "Distinctive Image Captioning via Clip Guided Group Optimization",
        BOOKTITLE = CMHRI22,
        YEAR = "2022",
        PAGES = "223-238",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119780"}

@inproceedings{bb123686,
        AUTHOR = "Qiu, Y. and Yamamoto, S. and Yamada, R. and Suzuki, R. and Kataoka, H. and Iwata, K. and Satoh, Y.",
        TITLE = "3D Change Localization and Captioning from Dynamic Scans of Indoor
Scenes",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1176-1185",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119781"}

@inproceedings{bb123687,
        AUTHOR = "Honda, U. and Watanabe, T. and Matsumoto, Y.",
        TITLE = "Switching to Discriminative Image Captioning by Relieving a
Bottleneck of Reinforcement Learning",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1124-1134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119782"}

@inproceedings{bb123688,
        AUTHOR = "Sui, J.H. and Yu, H.M. and Liang, X.Y. and Ping, P.",
        TITLE = "Image Caption Method Based on Graph Attention Network with Global
Context",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "480-487",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119783"}

@inproceedings{bb123689,
        AUTHOR = "Arguello, P. and Lopez, J. and Hinojosa, C. and Arguello, H.",
        TITLE = "Optics Lens Design for Privacy-Preserving Scene Captioning",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3551-3555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119784"}

@inproceedings{bb123690,
        AUTHOR = "Meng, Z.H. and Yang, D. and Cao, X.F. and Shah, A. and Lim, S.N.",
        TITLE = "Object-Centric Unsupervised Image Captioning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:219-235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119785"}

@inproceedings{bb123691,
        AUTHOR = "Wang, Z. and Chen, L. and Ma, W.B. and Han, G.X. and Niu, Y. and Shao, J. and Xiao, J.",
        TITLE = "Explicit Image Caption Editing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:113-129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119786"}

@inproceedings{bb123692,
        AUTHOR = "Jiao, Y. and Chen, S.X. and Jie, Z.Q. and Chen, J.J. and Ma, L. and Jiang, Y.G.",
        TITLE = "MORE: Multi-Order RElation Mining for Dense Captioning in 3D Scenes",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:528-545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119787"}

@inproceedings{bb123693,
        AUTHOR = "Nagrani, A. and Seo, P.H. and Seybold, B. and Hauth, A. and Manen, S. and Sun, C. and Schmid, C.",
        TITLE = "Learning Audio-Video Modalities from Image Captions",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIV:407-426",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119788"}

@inproceedings{bb123694,
        AUTHOR = "Tewel, Y. and Shalev, Y. and Schwartz, I. and Wolf, L.B.",
        TITLE = "ZeroCap: Zero-Shot Image-to-Text Generation for Visual-Semantic
Arithmetic",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17897-17907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119789"}

@inproceedings{bb123695,
        AUTHOR = "Truong, P. and Danelljan, M. and Yu, F. and Van Gool, L.J.",
        TITLE = "Probabilistic Warp Consistency for Weakly-Supervised Semantic
Correspondences",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8698-8708",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119790"}

@inproceedings{bb123696,
        AUTHOR = "Chan, D.M. and Myers, A. and Vijayanarasimhan, S. and Ross, D.A. and Seybold, B. and Canny, J.F.",
        TITLE = "What's in a Caption? Dataset-Specific Linguistic Diversity and Its
Effect on Visual Description Models and Metrics",
        BOOKTITLE = VDU22,
        YEAR = "2022",
        PAGES = "4739-4748",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119791"}

@inproceedings{bb123697,
        AUTHOR = "Popattia, M. and Rafi, M. and Qureshi, R. and Nawaz, S.",
        TITLE = "Guiding Attention using Partial-Order Relationships for Image
Captioning",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4670-4679",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119792"}

@inproceedings{bb123698,
        AUTHOR = "Mohamed, Y. and Khan, F.F. and Haydarov, K. and Elhoseiny, M.",
        TITLE = "It is Okay to Not Be Okay: Overcoming Emotional Bias in Affective
Image Captioning by Contrastive Data Collection",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "21231-21240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119793"}

@inproceedings{bb123699,
        AUTHOR = "Chen, J. and Guo, H. and Yi, K. and Li, B.Y. and Elhoseiny, M.",
        TITLE = "VisualGPT: Data-efficient Adaptation of Pretrained Language Models
for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18009-18019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT119794"}

Last update:Sep 28, 2024 at 17:47:54