Keith Price Bibliography Bibtex Entry (ANCHOR 135000 URL http://dx.doi.org/10.1016/j.patcog.2024.111097 PAGES 111097 YEAR 2025 MONTH NIL BIBSOURCE http://www.visionbib.com/bibliography/match607ic1.html#TT130994 VOLUME 159 JOURNAL PR AUTHOR Yang, Z. and Han, B. and Gao, X.B. and Zhan, Z.H. TITLE Eye-movement-prompted large image captioning model)


@article{bb135000,
        AUTHOR = "Yang, Z. and Han, B. and Gao, X.B. and Zhan, Z.H.",
        TITLE = "Eye-movement-prompted large image captioning model",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111097",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130994"}

@article{bb135001,
        AUTHOR = "Liang, X. and Li, C. and Tian, L.H.",
        TITLE = "Generative adversarial network for semi-supervised image captioning",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130995"}

@article{bb135002,
        AUTHOR = "Zheng, Q. and Wang, C.Y. and Wang, D.D.",
        TITLE = "Bypass network for semantics driven image paragraph captioning",
        JOURNAL = CVIU,
        VOLUME = "249",
        YEAR = "2024",
        PAGES = "104154",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130996"}

@article{bb135003,
        AUTHOR = "Meena, P. and Kumar, H. and Yadav, S.K.",
        TITLE = "A Volumetric Saliency Guided Image Summarization for RGB-D Indoor
Scene Classification",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10917-10929",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130997"}

@article{bb135004,
        AUTHOR = "Rahman, M.M. and Uzzaman, A. and Sami, S.I. and Khatun, F. and Bhuiyan, M.A.A.",
        TITLE = "A comprehensive construction of deep neural network-based
encoder-decoder framework for automatic image captioning systems",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "14",
        PAGES = "4778-4798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130998"}

@article{bb135005,
        AUTHOR = "Wang, Z. and Xiao, J. and Zhuang, Y.T. and Gao, F. and Shao, J. and Chen, L.",
        TITLE = "Learning Combinatorial Prompts for Universal Controllable Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "129-150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130999"}

@article{bb135006,
        AUTHOR = "Zhang, C. and Tao, L. and Yamasaki, T.",
        TITLE = "UTStyleCap4K: Generating Image Captions with Sentimental Styles",
        JOURNAL = IEICE,
        VOLUME = "E108-D",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "266-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131000"}

@article{bb135007,
        AUTHOR = "Wei, J.H. and Li, Z.X. and Zhang, C. and Ma, H.F.",
        TITLE = "Fusing grid and adaptive region features for image captioning",
        JOURNAL = IVC,
        VOLUME = "157",
        YEAR = "2025",
        PAGES = "105513",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131001"}

@article{bb135008,
        AUTHOR = "Xu, D.S. and Huang, Q. and Zhang, X. and Cheng, H.N. and Shuang, F. and Cai, Y.",
        TITLE = "DEVICE: Depth and Visual Concepts Aware Transformer for OCR-based
image captioning",
        JOURNAL = PR,
        VOLUME = "164",
        YEAR = "2025",
        PAGES = "111522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131002"}

@article{bb135009,
        AUTHOR = "Liu, A.A. and Wu, Q. and Xu, N. and Tian, H.S. and Wang, L.",
        TITLE = "Enriched Image Captioning Based on Knowledge Divergence and Focus",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "4937-4948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131003"}

@article{bb135010,
        AUTHOR = "Shi, L. and Kan, S.C. and Jin, Y. and Zhang, L. and Cen, Y.G.",
        TITLE = "Multi-Modal Self-Perception Enhanced Large Language Model for 3D
Region-of-Interest Captioning With Limited Data",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "2935-2948",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131004"}

@article{bb135011,
        AUTHOR = "Wang, B. and Zhang, Z. and Zhao, M.B. and Jin, X.J. and Xu, M.L. and Wang, M.",
        TITLE = "SeaCap: Multi-Sight Embedding and Alignment for One-Stage Image
Captioner",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "3411-3425",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131005"}

@article{bb135012,
        AUTHOR = "Zhang, J. and Zhang, K.X. and Xie, Y. and Wang, Z.",
        TITLE = "Deep Reciprocal Learning for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "6684-6697",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131006"}

@article{bb135013,
        AUTHOR = "Gao, J.C. and Zhang, L. and Li, J.Y. and Mao, Z.D.",
        TITLE = "Fully Semantic Gap Recovery for End-to-End Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "9365-9383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131007"}

@article{bb135014,
        AUTHOR = "Gao, Y.Q. and Suo, W. and Sun, M.Y. and Liu, L. and Wang, P.",
        TITLE = "More video-relevant paragraph captioning via Perturbed Attention
Self-Distillation",
        JOURNAL = PR,
        VOLUME = "169",
        YEAR = "2026",
        PAGES = "111871",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131008"}

@article{bb135015,
        AUTHOR = "Li, H. and Xiao, J. and Sun, M.J. and Lim, E.G. and Zhao, Y.",
        TITLE = "Auxiliary captioning: Bridging image-text matching and image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "138",
        YEAR = "2025",
        PAGES = "117337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131009"}

@article{bb135016,
        AUTHOR = "Bao, Q.Y. and Liu, F. and Jiao, L.C. and Liu, Y. and Li, S. and Li, L.L. and Liu, X. and Chen, P.",
        TITLE = "Visual-Language Scene-Relation-Aware Zero-Shot Captioner",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "8725-8739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131010"}

@article{bb135017,
        AUTHOR = "Bui, D.C. and Nguyen, N.H. and Nguyen, K.",
        TITLE = "UIT-OpenViIC: An open-domain benchmark for evaluating image
captioning in Vietnamese",
        JOURNAL = SP:IC,
        VOLUME = "140",
        YEAR = "2026",
        PAGES = "117430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131011"}

@article{bb135018,
        AUTHOR = "Zhao, S.S. and Wang, T. and Zhang, J. and Wang, X.C. and Zheng, F.",
        TITLE = "MCoCa: Towards fine-grained multimodal control in image captioning",
        JOURNAL = PR,
        VOLUME = "172",
        YEAR = "2026",
        PAGES = "112381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131012"}

@article{bb135019,
        AUTHOR = "Wan, J. and Gan, M. and Zhang, L. and Zhou, J. and Liu, J. and Du, B. and Chen, C.L.P.",
        TITLE = "Fine-Grained Image Captioning by Ranking Diffusion Transformer",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "8332-8344",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131013"}

@article{bb135020,
        AUTHOR = "Wang, L. and Xue, R. and Yu, Z.T. and Zhang, R. and Pan, T. and Li, Y.",
        TITLE = "A dynamic hybrid network with attention and mamba for image
captioning",
        JOURNAL = CVIU,
        VOLUME = "263",
        YEAR = "2026",
        PAGES = "104617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131014"}

@article{bb135021,
        AUTHOR = "Li, H.Y. and Li, L. and Wang, H. and Zhang, W.B. and Ren, P.",
        TITLE = "Large Foundation Model Empowered Region-aware Underwater Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "134",
        YEAR = "2026",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131015"}

@inproceedings{bb135022,
        AUTHOR = "Yildiz, S. and Memis, A.",
        TITLE = "Automatic Turkish Image Captioning Using Non-Native Deep Caption
Generator Models and Neural Machine Translators",
        BOOKTITLE = IPTA25,
        YEAR = "2025",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131016"}

@inproceedings{bb135023,
        AUTHOR = "Rubel, A.S. and Shih, F.Y. and Deek, F.P.",
        TITLE = "Frequency-Guided Contextual Image Captioning",
        BOOKTITLE = ICIP25,
        YEAR = "2025",
        PAGES = "1229-1234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131017"}

@inproceedings{bb135024,
        AUTHOR = "Joya, S. and Fatemifar, S. and Akbari, A. and Sheppard, A. and Alder, C.",
        TITLE = "SLICE: Synthetic Caption-Trained Lightweight Image Captioner for Edge
Devices",
        BOOKTITLE = ICIP25,
        YEAR = "2025",
        PAGES = "1864-1869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131018"}

@inproceedings{bb135025,
        AUTHOR = "Kolouju, P. and Xing, E. and Pless, R. and Jacobs, N. and Stylianou, A.",
        TITLE = "Good4cir: Generating Detailed Synthetic Captions for Composed Image
Retrieval",
        BOOKTITLE = SyntaGen25,
        YEAR = "2025",
        PAGES = "3139-3148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131019"}

@inproceedings{bb135026,
        AUTHOR = "Samira, D. and Habler, E. and Elovici, Y. and Shabtai, A.",
        TITLE = "Variance-Based Membership Inference Attacks Against Large-Scale Image
Captioning Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9210-9219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131020"}

@inproceedings{bb135027,
        AUTHOR = "Yang, Z.T. and Feng, R. and Yan, K.Y. and Wang, H.J. and Wang, Z.C. and Zhu, S.W. and Zhang, H. and Xiao, J. and Wu, P.Y. and Zhu, K. and Chen, J. and Xie, C.W. and Yang, Y. and Zhang, H.Y. and Liu, Y. and Cheng, F.",
        TITLE = "BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14380-14389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131021"}

@inproceedings{bb135028,
        AUTHOR = "Hua, H. and Liu, Q. and Zhang, L.Z. and Shi, J. and Kim, S.Y. and Zhang, Z.F. and Wang, Y.L. and Zhang, J.M. and Lin, Z. and Luo, J.B.",
        TITLE = "FineCaption: Compositional Image Captioning Focusing on Wherever You
Want at Any Granularity",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "24763-24773",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131022"}

@inproceedings{bb135029,
        AUTHOR = "Ye, A. and Santy, S. and Hwang, J.D. and Zhang, A.X. and Krishna, R.",
        TITLE = "Semantic and Expressive Variation in Image Captions Across Languages",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "29667-29679",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131023"}

@inproceedings{bb135030,
        AUTHOR = "Lee, J.R. and Shin, Y. and Son, G. and Hwang, D.",
        TITLE = "Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality
Gap Between Text and Vision for Zero-Shot Image Captioning",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "4050-4059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131024"}

@inproceedings{bb135031,
        AUTHOR = "Fan, X.Y. and Tao, Q. and Tang, Y.F.",
        TITLE = "G-DPPD: Gated Data-dependent Prior Probability Distribution for
Unsupervised Image Captioning",
        BOOKTITLE = ICIVC24,
        YEAR = "2024",
        PAGES = "467-472",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131025"}

@inproceedings{bb135032,
        AUTHOR = "Wu, J.L. and Wang, J.F. and Yang, Z.Y. and Gan, Z. and Liu, Z.C. and Yuan, J.S. and Wang, L.J.",
        TITLE = "Grit: A Generative Region-to-text Transformer for Object Understanding",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXXX: 207-224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131026"}

@inproceedings{bb135033,
        AUTHOR = "Song, G.R. and Kim, N.R. and Lee, J.S. and Lee, J.H.",
        TITLE = "IGNORE: Information Gap-based False Negative Loss Rejection for Single
Positive Multi-label Learning",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXXIV: 472-488",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131027"}

@inproceedings{bb135034,
        AUTHOR = "Basioti, K. and Abdelsalam, M.A. and Fancellu, F. and Pavlovic, V. and Fazly, A.",
        TITLE = "CIC-BART-SSA: Controllable Image Captioning with Structured Semantic
Augmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVI: 444-461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131028"}

@inproceedings{bb135035,
        AUTHOR = "Udo, H. and Koshinaka, T.",
        TITLE = "Reading is Believing: Revisiting Language Bottleneck Models for Image
Classification",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "943-949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131029"}

@inproceedings{bb135036,
        AUTHOR = "Das, S. and Sekhar, C.C.",
        TITLE = "Leveraging Generated Image Captions for Visual Commonsense Reasoning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2508-2514",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131030"}

@inproceedings{bb135037,
        AUTHOR = "Chaffin, A. and Kijak, E. and Claveau, V.",
        TITLE = "Distinctive Image Captioning: Leveraging Ground Truth Captions in
Clip Guided Reinforcement Learning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2550-2556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131031"}

@inproceedings{bb135038,
        AUTHOR = "Jeong, K. and Lee, W. and Nam, W. and Ma, M. and Kang, P.",
        TITLE = "Technical Report of NICE Challenge at CVPR 2024: Caption Re-ranking
Evaluation Using Ensembled CLIP and Consensus Scores",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7366-7372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131032"}

@inproceedings{bb135039,
        AUTHOR = "Kim, T. and Marsden, M. and Ahn, P. and Kim, S. and Lee, S. and Sala, A. and Kim, S.H.",
        TITLE = "Large-Scale Bidirectional Training for Zero-Shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7373-7383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131033"}

@inproceedings{bb135040,
        AUTHOR = "Kim, T. and Ahn, P. and Kim, S. and Lee, S. and Marsden, M. and Sala, A. and Kim, S.H. and Han, B.H. and Lee, K.M. and Lee, H.L. and Bae, K. and Wu, X.Y. and Gao, Y. and Zhang, H.L. and Yang, Y. and Guo, W. and Lu, J.F. and Oh, Y. and Cho, J.W. and Kim, D.J. and Kweon, I.S. and Kim, J. and Kang, W. and Jhoo, W.Y. and Roh, B. and Mun, J. and Oh, S. and Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W. and Hwang, K. and Shin, W.S. and Lee, K. and Park, W. and Lee, D. and Kwak, N. and Wang, Y.J. and Wang, Y. and Gu, T.C. and Lv, X.C. and Sun, M.",
        TITLE = "NICE: CVPR 2023 Challenge on Zero-shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7356-7365",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131034"}

@inproceedings{bb135041,
        AUTHOR = "Urbanek, J. and Bordes, F. and Astolfi, P. and Williamson, M. and Sharma, V. and Romero Soriano, A.",
        TITLE = "A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style
Models on Dense Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26690-26699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131035"}

@inproceedings{bb135042,
        AUTHOR = "Nebbia, G. and Kovashka, A.",
        TITLE = "Image-caption difficulty for efficient weakly-supervised object
detection from in-the-wild data",
        BOOKTITLE = L3D-IVU24,
        YEAR = "2024",
        PAGES = "2596-2605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131036"}

@inproceedings{bb135043,
        AUTHOR = "Sakaino, H. and Phuong, T.N. and Duy, V.N.",
        TITLE = "PV-Cap: 3D Dynamic Scene Understanding Through Open Physics-based
Vocabulary",
        BOOKTITLE = AICity24,
        YEAR = "2024",
        PAGES = "7932-7942",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131037"}

@inproceedings{bb135044,
        AUTHOR = "Kong, F. and Chen, Y.B. and Cai, J.R. and Modolo, D.",
        TITLE = "Hyperbolic Learning with Synthetic Captions for Open-World Detection",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "16762-16771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131038"}

@inproceedings{bb135045,
        AUTHOR = "Zeng, Z.Q. and Xie, Y. and Zhang, H. and Chen, C. and Chen, B. and Wang, Z.J.",
        TITLE = "MeaCap: Memory-Augmented Zero-shot Image Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14100-14110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131039"}

@inproceedings{bb135046,
        AUTHOR = "Wada, Y. and Kaneda, K. and Saito, D. and Sugiura, K.",
        TITLE = "Polos: Multimodal Metric Learning from Human Feedback for Image
Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13559-13568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131040"}

@inproceedings{bb135047,
        AUTHOR = "Huang, X.K. and Wang, J.F. and Tang, Y.S. and Zhang, Z. and Hu, H. and Lu, J.W. and Wang, L.J. and Liu, Z.C.",
        TITLE = "Segment and Caption Anything",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13405-13417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131041"}

@inproceedings{bb135048,
        AUTHOR = "Ge, Y.H. and Zeng, X.H. and Huffman, J.S. and Lin, T.Y. and Liu, M.Y. and Cui, Y.",
        TITLE = "Visual Fact Checker: Enabling High-Fidelity Detailed Caption
Generation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14033-14042",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131042"}

@inproceedings{bb135049,
        AUTHOR = "Ruan, J. and Wu, Y. and Wan, X.J. and Zhu, Y.S.",
        TITLE = "Describe Images in a Boring Way:
Towards Cross-Modal Sarcasm Generation",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5689-5698",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131043"}

@inproceedings{bb135050,
        AUTHOR = "Hirsch, E. and Tal, A.",
        TITLE = "CLID: Controlled-Length Image Descriptions with Limited Data",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5519-5529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131044"}

@inproceedings{bb135051,
        AUTHOR = "Petryk, S. and Whitehead, S. and Gonzalez, J.E. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
        TITLE = "Simple Token-Level Confidence Improves Caption Correctness",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5730-5740",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131045"}

@inproceedings{bb135052,
        AUTHOR = "Sabir, A.",
        TITLE = "Word to Sentence Visual Semantic Similarity for Caption Generation:
Lessons Learned",
        BOOKTITLE = MVA23,
        YEAR = "2023",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131046"}

@inproceedings{bb135053,
        AUTHOR = "Verma, A. and Agarwal, S. and Arya, K.V. and Petrlik, I. and Esparza, R. and Rodriguez, C.",
        TITLE = "Image Captioning with Reinforcement Learning",
        BOOKTITLE = ICCVMI23,
        YEAR = "2023",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131047"}

@inproceedings{bb135054,
        AUTHOR = "Fan, J.S. and Liang, Y.Y. and Liu, L. and Huang, S.L. and Zhang, L.",
        TITLE = "RCA-NOC: Relative Contrastive Alignment for Novel Object Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15464-15474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131048"}

@inproceedings{bb135055,
        AUTHOR = "Li, R. and Sun, S.Y. and Elhoseiny, M. and Torr, P.H.S.",
        TITLE = "OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20236-20246",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131049"}

@inproceedings{bb135056,
        AUTHOR = "Hu, A. and Chen, S.Z. and Zhang, L. and Jin, Q.",
        TITLE = "Explore and Tell: Embodied Visual Captioning in 3D Environments",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2482-2491",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131050"}

@inproceedings{bb135057,
        AUTHOR = "Kang, W. and Mun, J. and Lee, S.J. and Roh, B.",
        TITLE = "Noise-aware Learning from Web-crawled Image-Text Data for Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2930-2940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131051"}

@inproceedings{bb135058,
        AUTHOR = "Fei, J.J. and Wang, T. and Zhang, J. and He, Z.Y. and Wang, C.J. and Zheng, F.",
        TITLE = "Transferable Decoding with Visual Entities for Zero-Shot Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3113-3123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131052"}

@inproceedings{bb135059,
        AUTHOR = "Kornblith, S. and Li, L. and Wang, Z. and Nguyen, T.",
        TITLE = "Guiding image captioning models toward more specific captions",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15213-15223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131053"}

@inproceedings{bb135060,
        AUTHOR = "Kim, Y. and Kim, J.H. and Lee, B.K. and Shin, S. and Ro, Y.M.",
        TITLE = "Mitigating Dataset Bias in Image Captioning Through Clip
Confounder-Free Captioning Network",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1720-1724",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131054"}

@inproceedings{bb135061,
        AUTHOR = "Dessi, R. and Bevilacqua, M. and Gualdoni, E. and Rakotonirina, N.C. and Franzon, F. and Baroni, M.",
        TITLE = "Cross-Domain Image Captioning with Discriminative Finetuning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6935-6944",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131055"}

@inproceedings{bb135062,
        AUTHOR = "Vo, D.M. and Luong, Q.A. and Sugimoto, A. and Nakayama, H.",
        TITLE = "A-CAP: Anticipation Captioning with Commonsense Knowledge",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10824-10833",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131056"}

@inproceedings{bb135063,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "HAAV: Hierarchical Aggregation of Augmented Views for Image
Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11039-11049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131057"}

@inproceedings{bb135064,
        AUTHOR = "Ramos, R. and Martins, B. and Elliott, D. and Kementchedjhieva, Y.",
        TITLE = "Smallcap: Lightweight Image Captioning Prompted with Retrieval
Augmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2840-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131058"}

@inproceedings{bb135065,
        AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.",
        TITLE = "Model-Agnostic Gender Debiased Image Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131059"}

@inproceedings{bb135066,
        AUTHOR = "Tran, H.T.T. and Okatani, T.",
        TITLE = "Bright as the Sun: In-depth Analysis of Imagination-driven Image
Captioning",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:675-691",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131060"}

@inproceedings{bb135067,
        AUTHOR = "Phueaksri, I. and Kastner, M.A. and Kawanishi, Y. and Komamizu, T. and Ide, I.",
        TITLE = "Towards Captioning an Image Collection from a Combined Scene Graph
Representation Approach",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 178-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131061"}

@inproceedings{bb135068,
        AUTHOR = "Honda, U. and Watanabe, T. and Matsumoto, Y.",
        TITLE = "Switching to Discriminative Image Captioning by Relieving a
Bottleneck of Reinforcement Learning",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1124-1134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131062"}

@inproceedings{bb135069,
        AUTHOR = "Zhang, Y.Y. and Wang, J.N. and Wu, H. and Xu, W.J.",
        TITLE = "Distinctive Image Captioning via Clip Guided Group Optimization",
        BOOKTITLE = CMHRI22,
        YEAR = "2022",
        PAGES = "223-238",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131063"}

@inproceedings{bb135070,
        AUTHOR = "Arguello, P. and Lopez, J. and Hinojosa, C. and Arguello, H.",
        TITLE = "Optics Lens Design for Privacy-Preserving Scene Captioning",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3551-3555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131064"}

@inproceedings{bb135071,
        AUTHOR = "Meng, Z.H. and Yang, D. and Cao, X.F. and Shah, A. and Lim, S.N.",
        TITLE = "Object-Centric Unsupervised Image Captioning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:219-235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131065"}

@inproceedings{bb135072,
        AUTHOR = "Wang, Z. and Chen, L. and Ma, W.B. and Han, G.X. and Niu, Y. and Shao, J. and Xiao, J.",
        TITLE = "Explicit Image Caption Editing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:113-129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131066"}

@inproceedings{bb135073,
        AUTHOR = "Jiao, Y. and Chen, S.X. and Jie, Z.Q. and Chen, J.J. and Ma, L. and Jiang, Y.G.",
        TITLE = "MORE: Multi-Order RElation Mining for Dense Captioning in 3D Scenes",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:528-545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131067"}

@inproceedings{bb135074,
        AUTHOR = "Nagrani, A. and Seo, P.H. and Seybold, B. and Hauth, A. and Manen, S. and Sun, C. and Schmid, C.",
        TITLE = "Learning Audio-Video Modalities from Image Captions",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIV:407-426",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131068"}

@inproceedings{bb135075,
        AUTHOR = "Tewel, Y. and Shalev, Y. and Schwartz, I. and Wolf, L.B.",
        TITLE = "ZeroCap: Zero-Shot Image-to-Text Generation for Visual-Semantic
Arithmetic",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17897-17907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131069"}

@inproceedings{bb135076,
        AUTHOR = "Truong, P. and Danelljan, M. and Yu, F. and Van Gool, L.J.",
        TITLE = "Probabilistic Warp Consistency for Weakly-Supervised Semantic
Correspondences",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8698-8708",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131070"}

@inproceedings{bb135077,
        AUTHOR = "Chan, D.M. and Myers, A. and Vijayanarasimhan, S. and Ross, D.A. and Seybold, B. and Canny, J.F.",
        TITLE = "What's in a Caption? Dataset-Specific Linguistic Diversity and Its
Effect on Visual Description Models and Metrics",
        BOOKTITLE = VDU22,
        YEAR = "2022",
        PAGES = "4739-4748",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131071"}

@inproceedings{bb135078,
        AUTHOR = "Mohamed, Y. and Khan, F.F. and Haydarov, K. and Elhoseiny, M.",
        TITLE = "It is Okay to Not Be Okay: Overcoming Emotional Bias in Affective
Image Captioning by Contrastive Data Collection",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "21231-21240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131072"}

@inproceedings{bb135079,
        AUTHOR = "Chen, J. and Guo, H. and Yi, K. and Li, B.Y. and Elhoseiny, M.",
        TITLE = "VisualGPT: Data-efficient Adaptation of Pretrained Language Models
for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18009-18019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131073"}

@inproceedings{bb135080,
        AUTHOR = "Chen, S. and Song, Z.H. and Haque, M. and Liu, C. and Yang, W.",
        TITLE = "NICGSlowDown: Evaluating the Efficiency Robustness of Neural Image
Caption Generation Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15344-15353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131074"}

@inproceedings{bb135081,
        AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.",
        TITLE = "Quantifying Societal Bias Amplification in Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "13440-13449",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131075"}

@inproceedings{bb135082,
        AUTHOR = "Beddiar, D. and Oussalah, M. and Tapio, S.",
        TITLE = "Explainability for Medical Image Captioning",
        BOOKTITLE = IPTA22,
        YEAR = "2022",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131076"}

@inproceedings{bb135083,
        AUTHOR = "Bounab, Y. and Oussalah, M. and Ferdenache, A.",
        TITLE = "Reconciling Image Captioning and User's Comments for Urban Tourism",
        BOOKTITLE = IPTA20,
        YEAR = "2020",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131077"}

@inproceedings{bb135084,
        AUTHOR = "Zha, Z.W. and Zhou, P.F. and Bai, C.",
        TITLE = "Exploring Implicit and Explicit Relations with the Dual Relation-Aware
Network for Image Captioning",
        BOOKTITLE = MMMod22,
        YEAR = "2022",
        PAGES = "II:97-108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131078"}

@inproceedings{bb135085,
        AUTHOR = "Ruta, D. and Motiian, S. and Faieta, B. and Lin, Z. and Jin, H.L. and Filipkowski, A. and Gilbert, A. and Collomosse, J.",
        TITLE = "ALADIN: All Layer Adaptive Instance Normalization for Fine-grained
Style Similarity",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11906-11915",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131079"}

@inproceedings{bb135086,
        AUTHOR = "Nguyen, K. and Tripathi, S. and Du, B. and Guha, T. and Nguyen, T.Q.",
        TITLE = "In Defense of Scene Graphs for Image Captioning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1387-1396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131080"}

@inproceedings{bb135087,
        AUTHOR = "Shi, J. and Li, Y. and Wang, S.J.",
        TITLE = "Partial Off-policy Learning: Balance Accuracy and Diversity for
Human-Oriented Image Captioning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2167-2176",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131081"}

@inproceedings{bb135088,
        AUTHOR = "Alahmadi, R. and Hahn, J.",
        TITLE = "Improve Image Captioning by Estimating the Gazing Patterns from the
Caption",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2453-2462",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131082"}

@inproceedings{bb135089,
        AUTHOR = "Biten, A.F. and Gomez, L. and Karatzas, D.",
        TITLE = "Let there be a clock on the beach:
Reducing Object Hallucination in Image Captioning",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2473-2482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131083"}

@inproceedings{bb135090,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "WEmbSim: A Simple yet Effective Metric for Image Captioning",
        BOOKTITLE = DICTA20,
        YEAR = "2020",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131084"}

@inproceedings{bb135091,
        AUTHOR = "Qiu, J.Y. and Yang, Y.D. and Wang, X.C. and Tao, D.C.",
        TITLE = "Scene Essence",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8318-8329",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131085"}

@inproceedings{bb135092,
        AUTHOR = "Chen, L. and Jiang, Z.H. and Xiao, J. and Liu, W.",
        TITLE = "Human-like Controllable Image Captioning with Verb-specific Semantic
Roles",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16841-16851",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131086"}

@inproceedings{bb135093,
        AUTHOR = "Chen, D.Z.Y. and Gholami, A. and Nießner, M. and Chang, A.X.",
        TITLE = "Scan2Cap: Context-aware Dense Captioning in RGB-D Scans",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "3192-3202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131087"}

@inproceedings{bb135094,
        AUTHOR = "Luong, Q.A. and Vo, D.M. and Sugimoto, A.",
        TITLE = "Saliency based Subject Selection for Diverse Image Captioning",
        BOOKTITLE = MVA21,
        YEAR = "2021",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131088"}

@inproceedings{bb135095,
        AUTHOR = "Sharif, N. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "SubICap: Towards Subword-informed Image Captioning",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "3539-3540",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131089"}

@inproceedings{bb135096,
        AUTHOR = "Umemura, K. and Kastner, M.A. and Ide, I. and Kawanishi, Y. and Hirayama, T. and Doman, K. and Deguchi, D. and Murase, H.",
        TITLE = "Tell as You Imagine: Sentence Imageability-aware Image Captioning",
        BOOKTITLE = MMMod21,
        YEAR = "2021",
        PAGES = "II:62-73",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131090"}

@inproceedings{bb135097,
        AUTHOR = "Hallonquist, N. and German, D. and Younes, L.",
        TITLE = "Graph Discovery for Visual Test Generation",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7500-7507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131091"}

@inproceedings{bb135098,
        AUTHOR = "Li, X.J. and Yang, C. and Chen, S.L. and Zhu, C. and Yin, X.C.",
        TITLE = "Semantic Bilinear Pooling for Fine-Grained Recognition",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "3660-3666",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131092"}

@inproceedings{bb135099,
        AUTHOR = "Kalimuthu, M. and Mogadala, A. and Mosbach, M. and Klakow, D.",
        TITLE = "Fusion Models for Improved Image Captioning",
        BOOKTITLE = MMDLCA20,
        YEAR = "2020",
        PAGES = "381-395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131093"}
Last update:Feb 17, 2026 at 20:06:16