@inproceedings{bb130400,
        AUTHOR = "Basioti, K. and Abdelsalam, M.A. and Fancellu, F. and Pavlovic, V. and Fazly, A.",
        TITLE = "CIC-BART-SSA: Controllable Image Captioning with Structured Semantic
Augmentation",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXVI: 444-461",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126444"}

@inproceedings{bb130401,
        AUTHOR = "Udo, H. and Koshinaka, T.",
        TITLE = "Reading is Believing: Revisiting Language Bottleneck Models for Image
Classification",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "943-949",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126445"}

@inproceedings{bb130402,
        AUTHOR = "Das, S. and Sekhar, C.C.",
        TITLE = "Leveraging Generated Image Captions for Visual Commonsense Reasoning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2508-2514",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126446"}

@inproceedings{bb130403,
        AUTHOR = "Chaffin, A. and Kijak, E. and Claveau, V.",
        TITLE = "Distinctive Image Captioning: Leveraging Ground Truth Captions in
Clip Guided Reinforcement Learning",
        BOOKTITLE = ICIP24,
        YEAR = "2024",
        PAGES = "2550-2556",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126447"}

@inproceedings{bb130404,
        AUTHOR = "Jeong, K. and Lee, W. and Nam, W. and Ma, M. and Kang, P.",
        TITLE = "Technical Report of NICE Challenge at CVPR 2024: Caption Re-ranking
Evaluation Using Ensembled CLIP and Consensus Scores",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7366-7372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126448"}

@inproceedings{bb130405,
        AUTHOR = "Kim, T. and Marsden, M. and Ahn, P. and Kim, S. and Lee, S. and Sala, A. and Kim, S.H.",
        TITLE = "Large-Scale Bidirectional Training for Zero-Shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7373-7383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126449"}

@inproceedings{bb130406,
        AUTHOR = "Kim, T. and Ahn, P. and Kim, S. and Lee, S. and Marsden, M. and Sala, A. and Kim, S.H. and Han, B.H. and Lee, K.M. and Lee, H.L. and Bae, K. and Wu, X.Y. and Gao, Y. and Zhang, H.L. and Yang, Y. and Guo, W. and Lu, J.F. and Oh, Y. and Cho, J.W. and Kim, D.J. and Kweon, I.S. and Kim, J. and Kang, W. and Jhoo, W.Y. and Roh, B. and Mun, J. and Oh, S. and Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W. and Hwang, K. and Shin, W.S. and Lee, K. and Park, W. and Lee, D. and Kwak, N. and Wang, Y.J. and Wang, Y. and Gu, T.C. and Lv, X.C. and Sun, M.",
        TITLE = "NICE: CVPR 2023 Challenge on Zero-shot Image Captioning",
        BOOKTITLE = NICE24,
        YEAR = "2024",
        PAGES = "7356-7365",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126450"}

@inproceedings{bb130407,
        AUTHOR = "Urbanek, J. and Bordes, F. and Astolfi, P. and Williamson, M. and Sharma, V. and Romero Soriano, A.",
        TITLE = "A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style
Models on Dense Captions",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "26690-26699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126451"}

@inproceedings{bb130408,
        AUTHOR = "Nebbia, G. and Kovashka, A.",
        TITLE = "Image-caption difficulty for efficient weakly-supervised object
detection from in-the-wild data",
        BOOKTITLE = L3D-IVU24,
        YEAR = "2024",
        PAGES = "2596-2605",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126452"}

@inproceedings{bb130409,
        AUTHOR = "Sakaino, H. and Phuong, T.N. and Duy, V.N.",
        TITLE = "PV-Cap: 3D Dynamic Scene Understanding Through Open Physics-based
Vocabulary",
        BOOKTITLE = AICity24,
        YEAR = "2024",
        PAGES = "7932-7942",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126453"}

@inproceedings{bb130410,
        AUTHOR = "Kong, F. and Chen, Y.B. and Cai, J.R. and Modolo, D.",
        TITLE = "Hyperbolic Learning with Synthetic Captions for Open-World Detection",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "16762-16771",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126454"}

@inproceedings{bb130411,
        AUTHOR = "Zeng, Z.Q. and Xie, Y. and Zhang, H. and Chen, C. and Chen, B. and Wang, Z.J.",
        TITLE = "MeaCap: Memory-Augmented Zero-shot Image Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14100-14110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126455"}

@inproceedings{bb130412,
        AUTHOR = "Wada, Y. and Kaneda, K. and Saito, D. and Sugiura, K.",
        TITLE = "Polos: Multimodal Metric Learning from Human Feedback for Image
Captioning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13559-13568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126456"}

@inproceedings{bb130413,
        AUTHOR = "Huang, X.K. and Wang, J.F. and Tang, Y.S. and Zhang, Z. and Hu, H. and Lu, J.W. and Wang, L.J. and Liu, Z.C.",
        TITLE = "Segment and Caption Anything",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "13405-13417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126457"}

@inproceedings{bb130414,
        AUTHOR = "Ge, Y.H. and Zeng, X.H. and Huffman, J.S. and Lin, T.Y. and Liu, M.Y. and Cui, Y.",
        TITLE = "Visual Fact Checker: Enabling High-Fidelity Detailed Caption
Generation",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "14033-14042",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126458"}

@inproceedings{bb130415,
        AUTHOR = "Ruan, J. and Wu, Y. and Wan, X.J. and Zhu, Y.S.",
        TITLE = "Describe Images in a Boring Way:
Towards Cross-Modal Sarcasm Generation",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5689-5698",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126459"}

@inproceedings{bb130416,
        AUTHOR = "Hirsch, E. and Tal, A.",
        TITLE = "CLID: Controlled-Length Image Descriptions with Limited Data",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5519-5529",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126460"}

@inproceedings{bb130417,
        AUTHOR = "Petryk, S. and Whitehead, S. and Gonzalez, J.E. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.",
        TITLE = "Simple Token-Level Confidence Improves Caption Correctness",
        BOOKTITLE = WACV24,
        YEAR = "2024",
        PAGES = "5730-5740",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126461"}

@inproceedings{bb130418,
        AUTHOR = "Sabir, A.",
        TITLE = "Word to Sentence Visual Semantic Similarity for Caption Generation:
Lessons Learned",
        BOOKTITLE = MVA23,
        YEAR = "2023",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126462"}

@inproceedings{bb130419,
        AUTHOR = "Verma, A. and Agarwal, S. and Arya, K.V. and Petrlik, I. and Esparza, R. and Rodriguez, C.",
        TITLE = "Image Captioning with Reinforcement Learning",
        BOOKTITLE = ICCVMI23,
        YEAR = "2023",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126463"}

@inproceedings{bb130420,
        AUTHOR = "Fan, J. and Liang, Y. and Liu, L. and Huang, S. and Zhang, L.",
        TITLE = "RCA-NOC: Relative Contrastive Alignment for Novel Object Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15464-15474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126464"}

@inproceedings{bb130421,
        AUTHOR = "Li, R. and Sun, S.Y. and Elhoseiny, M. and Torr, P.",
        TITLE = "OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20236-20246",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126465"}

@inproceedings{bb130422,
        AUTHOR = "Hu, A. and Chen, S.Z. and Zhang, L. and Jin, Q.",
        TITLE = "Explore and Tell: Embodied Visual Captioning in 3D Environments",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2482-2491",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126466"}

@inproceedings{bb130423,
        AUTHOR = "Kang, W. and Mun, J. and Lee, S.J. and Roh, B.",
        TITLE = "Noise-aware Learning from Web-crawled Image-Text Data for Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2930-2940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126467"}

@inproceedings{bb130424,
        AUTHOR = "Fei, J.J. and Wang, T. and Zhang, J. and He, Z.Y. and Wang, C.J. and Zheng, F.",
        TITLE = "Transferable Decoding with Visual Entities for Zero-Shot Image
Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3113-3123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126468"}

@inproceedings{bb130425,
        AUTHOR = "Kornblith, S. and Li, L. and Wang, Z. and Nguyen, T.",
        TITLE = "Guiding image captioning models toward more specific captions",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15213-15223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126469"}

@inproceedings{bb130426,
        AUTHOR = "Kim, Y. and Kim, J. and Lee, B.K. and Shin, S. and Ro, Y.M.",
        TITLE = "Mitigating Dataset Bias in Image Captioning Through Clip
Confounder-Free Captioning Network",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1720-1724",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126470"}

@inproceedings{bb130427,
        AUTHOR = "Dessi, R. and Bevilacqua, M. and Gualdoni, E. and Rakotonirina, N.C. and Franzon, F. and Baroni, M.",
        TITLE = "Cross-Domain Image Captioning with Discriminative Finetuning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6935-6944",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126471"}

@inproceedings{bb130428,
        AUTHOR = "Vo, D.M. and Luong, Q.A. and Sugimoto, A. and Nakayama, H.",
        TITLE = "A-CAP: Anticipation Captioning with Commonsense Knowledge",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10824-10833",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126472"}

@inproceedings{bb130429,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "HAAV: Hierarchical Aggregation of Augmented Views for Image
Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11039-11049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126473"}

@inproceedings{bb130430,
        AUTHOR = "Ramos, R. and Martins, B. and Elliott, D. and Kementchedjhieva, Y.",
        TITLE = "Smallcap: Lightweight Image Captioning Prompted with Retrieval
Augmentation",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2840-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126474"}

@inproceedings{bb130431,
        AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.",
        TITLE = "Model-Agnostic Gender Debiased Image Captioning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126475"}

@inproceedings{bb130432,
        AUTHOR = "Tran, H.T.T. and Okatani, T.",
        TITLE = "Bright as the Sun: In-depth Analysis of Imagination-driven Image
Captioning",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:675-691",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126476"}

@inproceedings{bb130433,
        AUTHOR = "Phueaksri, I. and Kastner, M.A. and Kawanishi, Y. and Komamizu, T. and Ide, I.",
        TITLE = "Towards Captioning an Image Collection from a Combined Scene Graph
Representation Approach",
        BOOKTITLE = MMMod23,
        YEAR = "2023",
        PAGES = "I: 178-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126477"}

@inproceedings{bb130434,
        AUTHOR = "Honda, U. and Watanabe, T. and Matsumoto, Y.",
        TITLE = "Switching to Discriminative Image Captioning by Relieving a
Bottleneck of Reinforcement Learning",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "1124-1134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126478"}

@inproceedings{bb130435,
        AUTHOR = "Zhang, Y.Y. and Wang, J.N. and Wu, H. and Xu, W.J.",
        TITLE = "Distinctive Image Captioning via Clip Guided Group Optimization",
        BOOKTITLE = CMHRI22,
        YEAR = "2022",
        PAGES = "223-238",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126479"}

@inproceedings{bb130436,
        AUTHOR = "Sui, J.H. and Yu, H.M. and Liang, X.Y. and Ping, P.",
        TITLE = "Image Caption Method Based on Graph Attention Network with Global
Context",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "480-487",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126480"}

@inproceedings{bb130437,
        AUTHOR = "Arguello, P. and Lopez, J. and Hinojosa, C. and Arguello, H.",
        TITLE = "Optics Lens Design for Privacy-Preserving Scene Captioning",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3551-3555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126481"}

@inproceedings{bb130438,
        AUTHOR = "Meng, Z.H. and Yang, D. and Cao, X.F. and Shah, A. and Lim, S.N.",
        TITLE = "Object-Centric Unsupervised Image Captioning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:219-235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126482"}

@inproceedings{bb130439,
        AUTHOR = "Wang, Z. and Chen, L. and Ma, W.B. and Han, G.X. and Niu, Y. and Shao, J. and Xiao, J.",
        TITLE = "Explicit Image Caption Editing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:113-129",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126483"}

@inproceedings{bb130440,
        AUTHOR = "Jiao, Y. and Chen, S.X. and Jie, Z.Q. and Chen, J.J. and Ma, L. and Jiang, Y.G.",
        TITLE = "MORE: Multi-Order RElation Mining for Dense Captioning in 3D Scenes",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:528-545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126484"}

@inproceedings{bb130441,
        AUTHOR = "Nagrani, A. and Seo, P.H. and Seybold, B. and Hauth, A. and Manen, S. and Sun, C. and Schmid, C.",
        TITLE = "Learning Audio-Video Modalities from Image Captions",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIV:407-426",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126485"}

@inproceedings{bb130442,
        AUTHOR = "Tewel, Y. and Shalev, Y. and Schwartz, I. and Wolf, L.B.",
        TITLE = "ZeroCap: Zero-Shot Image-to-Text Generation for Visual-Semantic
Arithmetic",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17897-17907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126486"}

@inproceedings{bb130443,
        AUTHOR = "Truong, P. and Danelljan, M. and Yu, F. and Van Gool, L.J.",
        TITLE = "Probabilistic Warp Consistency for Weakly-Supervised Semantic
Correspondences",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8698-8708",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126487"}

@inproceedings{bb130444,
        AUTHOR = "Chan, D.M. and Myers, A. and Vijayanarasimhan, S. and Ross, D.A. and Seybold, B. and Canny, J.F.",
        TITLE = "What's in a Caption? Dataset-Specific Linguistic Diversity and Its
Effect on Visual Description Models and Metrics",
        BOOKTITLE = VDU22,
        YEAR = "2022",
        PAGES = "4739-4748",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126488"}

@inproceedings{bb130445,
        AUTHOR = "Popattia, M. and Rafi, M. and Qureshi, R. and Nawaz, S.",
        TITLE = "Guiding Attention using Partial-Order Relationships for Image
Captioning",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4670-4679",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126489"}

@inproceedings{bb130446,
        AUTHOR = "Mohamed, Y. and Khan, F.F. and Haydarov, K. and Elhoseiny, M.",
        TITLE = "It is Okay to Not Be Okay: Overcoming Emotional Bias in Affective
Image Captioning by Contrastive Data Collection",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "21231-21240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126490"}

@inproceedings{bb130447,
        AUTHOR = "Chen, J. and Guo, H. and Yi, K. and Li, B.Y. and Elhoseiny, M.",
        TITLE = "VisualGPT: Data-efficient Adaptation of Pretrained Language Models
for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18009-18019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126491"}

@inproceedings{bb130448,
        AUTHOR = "Chen, S. and Song, Z.H. and Haque, M. and Liu, C. and Yang, W.",
        TITLE = "NICGSlowDown: Evaluating the Efficiency Robustness of Neural Image
Caption Generation Models",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15344-15353",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126492"}

@inproceedings{bb130449,
        AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.",
        TITLE = "Quantifying Societal Bias Amplification in Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "13440-13449",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126493"}

@inproceedings{bb130450,
        AUTHOR = "Beddiar, D. and Oussalah, M. and Tapio, S.",
        TITLE = "Explainability for Medical Image Captioning",
        BOOKTITLE = IPTA22,
        YEAR = "2022",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126494"}

@inproceedings{bb130451,
        AUTHOR = "Bounab, Y. and Oussalah, M. and Ferdenache, A.",
        TITLE = "Reconciling Image Captioning and User's Comments for Urban Tourism",
        BOOKTITLE = IPTA20,
        YEAR = "2020",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126495"}

@inproceedings{bb130452,
        AUTHOR = "Zha, Z.W. and Zhou, P.F. and Bai, C.",
        TITLE = "Exploring Implicit and Explicit Relations with the Dual Relation-Aware
Network for Image Captioning",
        BOOKTITLE = MMMod22,
        YEAR = "2022",
        PAGES = "II:97-108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126496"}

@inproceedings{bb130453,
        AUTHOR = "Ruta, D. and Motiian, S. and Faieta, B. and Lin, Z. and Jin, H.L. and Filipkowski, A. and Gilbert, A. and Collomosse, J.",
        TITLE = "ALADIN: All Layer Adaptive Instance Normalization for Fine-grained
Style Similarity",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11906-11915",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126497"}

@inproceedings{bb130454,
        AUTHOR = "Nguyen, K. and Tripathi, S. and Du, B. and Guha, T. and Nguyen, T.Q.",
        TITLE = "In Defense of Scene Graphs for Image Captioning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1387-1396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126498"}

@inproceedings{bb130455,
        AUTHOR = "Shi, J. and Li, Y. and Wang, S.J.",
        TITLE = "Partial Off-policy Learning: Balance Accuracy and Diversity for
Human-Oriented Image Captioning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2167-2176",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126499"}

@inproceedings{bb130456,
        AUTHOR = "Alahmadi, R. and Hahn, J.",
        TITLE = "Improve Image Captioning by Estimating the Gazing Patterns from the
Caption",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2453-2462",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126500"}

@inproceedings{bb130457,
        AUTHOR = "Biten, A.F. and Gomez, L. and Karatzas, D.",
        TITLE = "Let there be a clock on the beach:
Reducing Object Hallucination in Image Captioning",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2473-2482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126501"}

@inproceedings{bb130458,
        AUTHOR = "Deb, T. and Sadmanee, A. and Bhaumik, K.K. and Ali, A.A. and Amin, M.A. and Rahman, A.K.M.M.",
        TITLE = "Variational Stacked Local Attention Networks for Diverse Video
Captioning",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2493-2502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126502"}

@inproceedings{bb130459,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "WEmbSim: A Simple yet Effective Metric for Image Captioning",
        BOOKTITLE = DICTA20,
        YEAR = "2020",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126503"}

@inproceedings{bb130460,
        AUTHOR = "Qiu, J.Y. and Yang, Y.D. and Wang, X. and Tao, D.C.",
        TITLE = "Scene Essence",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8318-8329",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126504"}

@inproceedings{bb130461,
        AUTHOR = "Chen, L. and Jiang, Z.H. and Xiao, J. and Liu, W.",
        TITLE = "Human-like Controllable Image Captioning with Verb-specific Semantic
Roles",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16841-16851",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126505"}

@inproceedings{bb130462,
        AUTHOR = "Chen, D.Z.Y. and Gholami, A. and Nießner, M. and Chang, A.X.",
        TITLE = "Scan2Cap: Context-aware Dense Captioning in RGB-D Scans",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "3192-3202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126506"}

@inproceedings{bb130463,
        AUTHOR = "Luong, Q.A. and Vo, D.M. and Sugimoto, A.",
        TITLE = "Saliency based Subject Selection for Diverse Image Captioning",
        BOOKTITLE = MVA21,
        YEAR = "2021",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126507"}

@inproceedings{bb130464,
        AUTHOR = "Sharif, N. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "SubICap: Towards Subword-informed Image Captioning",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "3539-3540",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126508"}

@inproceedings{bb130465,
        AUTHOR = "Umemura, K. and Kastner, M.A. and Ide, I. and Kawanishi, Y. and Hirayama, T. and Doman, K. and Deguchi, D. and Murase, H.",
        TITLE = "Tell as You Imagine: Sentence Imageability-aware Image Captioning",
        BOOKTITLE = MMMod21,
        YEAR = "2021",
        PAGES = "II:62-73",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126509"}

@inproceedings{bb130466,
        AUTHOR = "Hallonquist, N. and German, D. and Younes, L.",
        TITLE = "Graph Discovery for Visual Test Generation",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "7500-7507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126510"}

@inproceedings{bb130467,
        AUTHOR = "Li, X.J. and Yang, C. and Chen, S.L. and Zhu, C. and Yin, X.C.",
        TITLE = "Semantic Bilinear Pooling for Fine-Grained Recognition",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "3660-3666",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126511"}

@inproceedings{bb130468,
        AUTHOR = "Kalimuthu, M. and Mogadala, A. and Mosbach, M. and Klakow, D.",
        TITLE = "Fusion Models for Improved Image Captioning",
        BOOKTITLE = MMDLCA20,
        YEAR = "2020",
        PAGES = "381-395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126512"}

@inproceedings{bb130469,
        AUTHOR = "Cetinic, E.",
        TITLE = "Iconographic Image Captioning for Artworks",
        BOOKTITLE = FAPER20,
        YEAR = "2020",
        PAGES = "502-516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126513"}

@inproceedings{bb130470,
        AUTHOR = "Huang, Y.Q. and Chen, J.S.",
        TITLE = "Show, Conceive and Tell: Image Captioning with Prospective Linguistic
Information",
        BOOKTITLE = ACCV20,
        YEAR = "2020",
        PAGES = "VI:478-494",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126514"}

@inproceedings{bb130471,
        AUTHOR = "Deng, C.R. and Ding, N. and Tan, M.K. and Wu, Q.",
        TITLE = "Length-controllable Image Captioning",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIII:712-729",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126515"}

@inproceedings{bb130472,
        AUTHOR = "Gurari, D. and Zhao, Y.N. and Zhang, M. and Bhattacharya, N.",
        TITLE = "Captioning Images Taken by People Who Are Blind",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVII:417-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126516"}

@inproceedings{bb130473,
        AUTHOR = "Zhong, Y.W. and Wang, L.W. and Chen, J.S. and Yu, D. and Li, Y.",
        TITLE = "Comprehensive Image Captioning via Scene Graph Decomposition",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XIV:211-229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126517"}

@inproceedings{bb130474,
        AUTHOR = "Wang, Z. and Feng, B. and Narasimhan, K. and Russakovsky, O.",
        TITLE = "Towards Unique and Informative Captioning of Images",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "VII:629-644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126518"}

@inproceedings{bb130475,
        AUTHOR = "Sidorov, O. and Hu, R.H. and Rohrbach, M. and Singh, A.",
        TITLE = "Textcaps: A Dataset for Image Captioning with Reading Comprehension",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "II:742-758",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126519"}

@inproceedings{bb130476,
        AUTHOR = "Durand, T.",
        TITLE = "Learning User Representations for Open Vocabulary Image Hashtag
Prediction",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "9766-9775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126520"}

@inproceedings{bb130477,
        AUTHOR = "Prabhudesai, M. and Tung, H.Y.F. and Javed, S.A. and Sieb, M. and Harley, A.W. and Fragkiadaki, K.",
        TITLE = "Embodied Language Grounding With 3D Visual Feature Representations",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "2217-2226",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126521"}

@inproceedings{bb130478,
        AUTHOR = "Li, Z. and Tran, Q. and Mai, L. and Lin, Z. and Yuille, A.L.",
        TITLE = "Context-Aware Group Captioning via Self-Attention and Contrastive
Features",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "3437-3447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126522"}

@inproceedings{bb130479,
        AUTHOR = "Zhou, Y. and Wang, M. and Liu, D. and Hu, Z. and Zhang, H.",
        TITLE = "More Grounded Image Captioning by Distilling Image-Text Matching
Model",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "4776-4785",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126523"}

@inproceedings{bb130480,
        AUTHOR = "Sammani, F. and Melas Kyriazi, L.",
        TITLE = "Show, Edit and Tell: A Framework for Editing Image Captions",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "4807-4815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126524"}

@inproceedings{bb130481,
        AUTHOR = "Chen, S. and Jin, Q. and Wang, P. and Wu, Q.",
        TITLE = "Say As You Wish: Fine-Grained Control of Image Caption Generation
With Abstract Scene Graphs",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "9959-9968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126525"}

@inproceedings{bb130482,
        AUTHOR = "Guo, L. and Liu, J. and Zhu, X. and Yao, P. and Lu, S. and Lu, H.",
        TITLE = "Normalized and Geometry-Aware Self-Attention Network for Image
Captioning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10324-10333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126526"}

@inproceedings{bb130483,
        AUTHOR = "Chen, J. and Jin, Q.",
        TITLE = "Better Captioning With Sequence-Level Exploration",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10887-10896",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126527"}

@inproceedings{bb130484,
        AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.",
        TITLE = "X-Linear Attention Networks for Image Captioning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10968-10977",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126528"}

@inproceedings{bb130485,
        AUTHOR = "Park, G. and Han, C. and Kim, D. and Yoon, W.J.",
        TITLE = "MHSAN: Multi-Head Self-Attention Network for Visual Semantic
Embedding",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1507-1515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126529"}

@inproceedings{bb130486,
        AUTHOR = "Chen, C. and Zhang, R. and Koh, E. and Kim, S. and Cohen, S. and Rossi, R.",
        TITLE = "Figure Captioning with Relation Maps for Reasoning",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1526-1534",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126530"}

@inproceedings{bb130487,
        AUTHOR = "He, S. and Tavakoli, H.R. and Borji, A. and Pugeault, N.",
        TITLE = "Human Attention in Image Captioning: Dataset and Analysis",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "8528-8537",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126531"}

@inproceedings{bb130488,
        AUTHOR = "Huang, L. and Wang, W. and Chen, J. and Wei, X.",
        TITLE = "Attention on Attention for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4633-4642",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126532"}

@inproceedings{bb130489,
        AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Mei, T.",
        TITLE = "Hierarchy Parsing for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2621-2629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126533"}

@inproceedings{bb130490,
        AUTHOR = "Liu, L. and Tang, J. and Wan, X. and Guo, Z.",
        TITLE = "Generating Diverse and Descriptive Image Captions Using Visual
Paraphrases",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4239-4248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126534"}

@inproceedings{bb130491,
        AUTHOR = "Ke, L. and Pei, W. and Li, R. and Shen, X. and Tai, Y.",
        TITLE = "Reflective Decoding Network for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "8887-8896",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126535"}

@inproceedings{bb130492,
        AUTHOR = "Vered, G. and Oren, G. and Atzmon, Y. and Chechik, G.",
        TITLE = "Joint Optimization for Cooperative Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "8897-8906",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126536"}

@inproceedings{bb130493,
        AUTHOR = "Ge, H. and Yan, Z. and Zhang, K. and Zhao, M. and Sun, L.",
        TITLE = "Exploring Overall Contextual Information for Image Captioning in
Human-Like Cognitive Style",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "1754-1763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126537"}

@inproceedings{bb130494,
        AUTHOR = "Agrawal, H. and Desai, K. and Wang, Y. and Chen, X. and Jain, R. and Johnson, M. and Batra, D. and Parikh, D. and Lee, S. and Anderson, P.",
        TITLE = "nocaps: novel object captioning at scale",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "8947-8956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126538"}

@inproceedings{bb130495,
        AUTHOR = "Nguyen, A. and Tran, Q.D. and Do, T. and Reid, I. and Caldwell, D.G. and Tsagarakis, N.G.",
        TITLE = "Object Captioning and Retrieval with Natural Language",
        BOOKTITLE = ACVR19,
        YEAR = "2019",
        PAGES = "2584-2592",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126539"}

@inproceedings{bb130496,
        AUTHOR = "Gu, J. and Joty, S. and Cai, J. and Zhao, H. and Yang, X. and Wang, G.",
        TITLE = "Unpaired Image Captioning via Scene Graph Alignments",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "10322-10331",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126540"}

@inproceedings{bb130497,
        AUTHOR = "Shen, T. and Kar, A. and Fidler, S.",
        TITLE = "Learning to Caption Images Through a Lifetime by Asking Questions",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "10392-10401",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126541"}

@inproceedings{bb130498,
        AUTHOR = "Aneja, J. and Agrawal, H. and Batra, D. and Schwing, A.G.",
        TITLE = "Sequential Latent Spaces for Modeling the Intention During Diverse
Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4260-4269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126542"}

@inproceedings{bb130499,
        AUTHOR = "Deshpande, A. and Aneja, J. and Wang, L.W. and Schwing, A.G. and Forsyth, D.A.",
        TITLE = "Fast, Diverse and Accurate Image Captioning Guided by Part-Of-Speech",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10687-10696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126543"}

Last update:Jun 23, 2025 at 20:00:30