@inproceedings{bb130400, AUTHOR = "Basioti, K. and Abdelsalam, M.A. and Fancellu, F. and Pavlovic, V. and Fazly, A.", TITLE = "CIC-BART-SSA: Controllable Image Captioning with Structured Semantic Augmentation", BOOKTITLE = ECCV24, YEAR = "2024", PAGES = "LXVI: 444-461", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126444"} @inproceedings{bb130401, AUTHOR = "Udo, H. and Koshinaka, T.", TITLE = "Reading is Believing: Revisiting Language Bottleneck Models for Image Classification", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "943-949", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126445"} @inproceedings{bb130402, AUTHOR = "Das, S. and Sekhar, C.C.", TITLE = "Leveraging Generated Image Captions for Visual Commonsense Reasoning", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2508-2514", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126446"} @inproceedings{bb130403, AUTHOR = "Chaffin, A. and Kijak, E. and Claveau, V.", TITLE = "Distinctive Image Captioning: Leveraging Ground Truth Captions in Clip Guided Reinforcement Learning", BOOKTITLE = ICIP24, YEAR = "2024", PAGES = "2550-2556", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126447"} @inproceedings{bb130404, AUTHOR = "Jeong, K. and Lee, W. and Nam, W. and Ma, M. and Kang, P.", TITLE = "Technical Report of NICE Challenge at CVPR 2024: Caption Re-ranking Evaluation Using Ensembled CLIP and Consensus Scores", BOOKTITLE = NICE24, YEAR = "2024", PAGES = "7366-7372", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126448"} @inproceedings{bb130405, AUTHOR = "Kim, T. and Marsden, M. and Ahn, P. and Kim, S. and Lee, S. and Sala, A. and Kim, S.H.", TITLE = "Large-Scale Bidirectional Training for Zero-Shot Image Captioning", BOOKTITLE = NICE24, YEAR = "2024", PAGES = "7373-7383", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126449"} @inproceedings{bb130406, AUTHOR = "Kim, T. and Ahn, P. and Kim, S. and Lee, S. and Marsden, M. and Sala, A. and Kim, S.H. and Han, B.H. and Lee, K.M. and Lee, H.L. and Bae, K. and Wu, X.Y. and Gao, Y. and Zhang, H.L. and Yang, Y. and Guo, W. and Lu, J.F. and Oh, Y. and Cho, J.W. and Kim, D.J. and Kweon, I.S. and Kim, J. and Kang, W. and Jhoo, W.Y. and Roh, B. and Mun, J. and Oh, S. and Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W. and Hwang, K. and Shin, W.S. and Lee, K. and Park, W. and Lee, D. and Kwak, N. and Wang, Y.J. and Wang, Y. and Gu, T.C. and Lv, X.C. and Sun, M.", TITLE = "NICE: CVPR 2023 Challenge on Zero-shot Image Captioning", BOOKTITLE = NICE24, YEAR = "2024", PAGES = "7356-7365", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126450"} @inproceedings{bb130407, AUTHOR = "Urbanek, J. and Bordes, F. and Astolfi, P. and Williamson, M. and Sharma, V. and Romero Soriano, A.", TITLE = "A Picture is Worth More Than 77 Text Tokens: Evaluating CLIP-Style Models on Dense Captions", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "26690-26699", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126451"} @inproceedings{bb130408, AUTHOR = "Nebbia, G. and Kovashka, A.", TITLE = "Image-caption difficulty for efficient weakly-supervised object detection from in-the-wild data", BOOKTITLE = L3D-IVU24, YEAR = "2024", PAGES = "2596-2605", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126452"} @inproceedings{bb130409, AUTHOR = "Sakaino, H. and Phuong, T.N. and Duy, V.N.", TITLE = "PV-Cap: 3D Dynamic Scene Understanding Through Open Physics-based Vocabulary", BOOKTITLE = AICity24, YEAR = "2024", PAGES = "7932-7942", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126453"} @inproceedings{bb130410, AUTHOR = "Kong, F. and Chen, Y.B. and Cai, J.R. and Modolo, D.", TITLE = "Hyperbolic Learning with Synthetic Captions for Open-World Detection", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "16762-16771", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126454"} @inproceedings{bb130411, AUTHOR = "Zeng, Z.Q. and Xie, Y. and Zhang, H. and Chen, C. and Chen, B. and Wang, Z.J.", TITLE = "MeaCap: Memory-Augmented Zero-shot Image Captioning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14100-14110", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126455"} @inproceedings{bb130412, AUTHOR = "Wada, Y. and Kaneda, K. and Saito, D. and Sugiura, K.", TITLE = "Polos: Multimodal Metric Learning from Human Feedback for Image Captioning", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13559-13568", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126456"} @inproceedings{bb130413, AUTHOR = "Huang, X.K. and Wang, J.F. and Tang, Y.S. and Zhang, Z. and Hu, H. and Lu, J.W. and Wang, L.J. and Liu, Z.C.", TITLE = "Segment and Caption Anything", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "13405-13417", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126457"} @inproceedings{bb130414, AUTHOR = "Ge, Y.H. and Zeng, X.H. and Huffman, J.S. and Lin, T.Y. and Liu, M.Y. and Cui, Y.", TITLE = "Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation", BOOKTITLE = CVPR24, YEAR = "2024", PAGES = "14033-14042", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126458"} @inproceedings{bb130415, AUTHOR = "Ruan, J. and Wu, Y. and Wan, X.J. and Zhu, Y.S.", TITLE = "Describe Images in a Boring Way: Towards Cross-Modal Sarcasm Generation", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "5689-5698", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126459"} @inproceedings{bb130416, AUTHOR = "Hirsch, E. and Tal, A.", TITLE = "CLID: Controlled-Length Image Descriptions with Limited Data", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "5519-5529", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126460"} @inproceedings{bb130417, AUTHOR = "Petryk, S. and Whitehead, S. and Gonzalez, J.E. and Darrell, T.J. and Rohrbach, A. and Rohrbach, M.", TITLE = "Simple Token-Level Confidence Improves Caption Correctness", BOOKTITLE = WACV24, YEAR = "2024", PAGES = "5730-5740", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126461"} @inproceedings{bb130418, AUTHOR = "Sabir, A.", TITLE = "Word to Sentence Visual Semantic Similarity for Caption Generation: Lessons Learned", BOOKTITLE = MVA23, YEAR = "2023", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126462"} @inproceedings{bb130419, AUTHOR = "Verma, A. and Agarwal, S. and Arya, K.V. and Petrlik, I. and Esparza, R. and Rodriguez, C.", TITLE = "Image Captioning with Reinforcement Learning", BOOKTITLE = ICCVMI23, YEAR = "2023", PAGES = "1-7", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126463"} @inproceedings{bb130420, AUTHOR = "Fan, J. and Liang, Y. and Liu, L. and Huang, S. and Zhang, L.", TITLE = "RCA-NOC: Relative Contrastive Alignment for Novel Object Captioning", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15464-15474", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126464"} @inproceedings{bb130421, AUTHOR = "Li, R. and Sun, S.Y. and Elhoseiny, M. and Torr, P.", TITLE = "OxfordTVG-HIC: Can Machine Make Humorous Captions from Images?", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20236-20246", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126465"} @inproceedings{bb130422, AUTHOR = "Hu, A. and Chen, S.Z. and Zhang, L. and Jin, Q.", TITLE = "Explore and Tell: Embodied Visual Captioning in 3D Environments", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2482-2491", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126466"} @inproceedings{bb130423, AUTHOR = "Kang, W. and Mun, J. and Lee, S.J. and Roh, B.", TITLE = "Noise-aware Learning from Web-crawled Image-Text Data for Image Captioning", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "2930-2940", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126467"} @inproceedings{bb130424, AUTHOR = "Fei, J.J. and Wang, T. and Zhang, J. and He, Z.Y. and Wang, C.J. and Zheng, F.", TITLE = "Transferable Decoding with Visual Entities for Zero-Shot Image Captioning", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "3113-3123", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126468"} @inproceedings{bb130425, AUTHOR = "Kornblith, S. and Li, L. and Wang, Z. and Nguyen, T.", TITLE = "Guiding image captioning models toward more specific captions", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15213-15223", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126469"} @inproceedings{bb130426, AUTHOR = "Kim, Y. and Kim, J. and Lee, B.K. and Shin, S. and Ro, Y.M.", TITLE = "Mitigating Dataset Bias in Image Captioning Through Clip Confounder-Free Captioning Network", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1720-1724", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126470"} @inproceedings{bb130427, AUTHOR = "Dessi, R. and Bevilacqua, M. and Gualdoni, E. and Rakotonirina, N.C. and Franzon, F. and Baroni, M.", TITLE = "Cross-Domain Image Captioning with Discriminative Finetuning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6935-6944", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126471"} @inproceedings{bb130428, AUTHOR = "Vo, D.M. and Luong, Q.A. and Sugimoto, A. and Nakayama, H.", TITLE = "A-CAP: Anticipation Captioning with Commonsense Knowledge", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10824-10833", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126472"} @inproceedings{bb130429, AUTHOR = "Kuo, C.W. and Kira, Z.", TITLE = "HAAV: Hierarchical Aggregation of Augmented Views for Image Captioning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "11039-11049", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126473"} @inproceedings{bb130430, AUTHOR = "Ramos, R. and Martins, B. and Elliott, D. and Kementchedjhieva, Y.", TITLE = "Smallcap: Lightweight Image Captioning Prompted with Retrieval Augmentation", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2840-2849", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126474"} @inproceedings{bb130431, AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.", TITLE = "Model-Agnostic Gender Debiased Image Captioning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "15191-15200", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126475"} @inproceedings{bb130432, AUTHOR = "Tran, H.T.T. and Okatani, T.", TITLE = "Bright as the Sun: In-depth Analysis of Imagination-driven Image Captioning", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:675-691", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126476"} @inproceedings{bb130433, AUTHOR = "Phueaksri, I. and Kastner, M.A. and Kawanishi, Y. and Komamizu, T. and Ide, I.", TITLE = "Towards Captioning an Image Collection from a Combined Scene Graph Representation Approach", BOOKTITLE = MMMod23, YEAR = "2023", PAGES = "I: 178-190", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126477"} @inproceedings{bb130434, AUTHOR = "Honda, U. and Watanabe, T. and Matsumoto, Y.", TITLE = "Switching to Discriminative Image Captioning by Relieving a Bottleneck of Reinforcement Learning", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "1124-1134", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126478"} @inproceedings{bb130435, AUTHOR = "Zhang, Y.Y. and Wang, J.N. and Wu, H. and Xu, W.J.", TITLE = "Distinctive Image Captioning via Clip Guided Group Optimization", BOOKTITLE = CMHRI22, YEAR = "2022", PAGES = "223-238", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126479"} @inproceedings{bb130436, AUTHOR = "Sui, J.H. and Yu, H.M. and Liang, X.Y. and Ping, P.", TITLE = "Image Caption Method Based on Graph Attention Network with Global Context", BOOKTITLE = ICIVC22, YEAR = "2022", PAGES = "480-487", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126480"} @inproceedings{bb130437, AUTHOR = "Arguello, P. and Lopez, J. and Hinojosa, C. and Arguello, H.", TITLE = "Optics Lens Design for Privacy-Preserving Scene Captioning", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3551-3555", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126481"} @inproceedings{bb130438, AUTHOR = "Meng, Z.H. and Yang, D. and Cao, X.F. and Shah, A. and Lim, S.N.", TITLE = "Object-Centric Unsupervised Image Captioning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:219-235", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126482"} @inproceedings{bb130439, AUTHOR = "Wang, Z. and Chen, L. and Ma, W.B. and Han, G.X. and Niu, Y. and Shao, J. and Xiao, J.", TITLE = "Explicit Image Caption Editing", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:113-129", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126483"} @inproceedings{bb130440, AUTHOR = "Jiao, Y. and Chen, S.X. and Jie, Z.Q. and Chen, J.J. and Ma, L. and Jiang, Y.G.", TITLE = "MORE: Multi-Order RElation Mining for Dense Captioning in 3D Scenes", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:528-545", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126484"} @inproceedings{bb130441, AUTHOR = "Nagrani, A. and Seo, P.H. and Seybold, B. and Hauth, A. and Manen, S. and Sun, C. and Schmid, C.", TITLE = "Learning Audio-Video Modalities from Image Captions", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XIV:407-426", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126485"} @inproceedings{bb130442, AUTHOR = "Tewel, Y. and Shalev, Y. and Schwartz, I. and Wolf, L.B.", TITLE = "ZeroCap: Zero-Shot Image-to-Text Generation for Visual-Semantic Arithmetic", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "17897-17907", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126486"} @inproceedings{bb130443, AUTHOR = "Truong, P. and Danelljan, M. and Yu, F. and Van Gool, L.J.", TITLE = "Probabilistic Warp Consistency for Weakly-Supervised Semantic Correspondences", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8698-8708", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126487"} @inproceedings{bb130444, AUTHOR = "Chan, D.M. and Myers, A. and Vijayanarasimhan, S. and Ross, D.A. and Seybold, B. and Canny, J.F.", TITLE = "What's in a Caption? Dataset-Specific Linguistic Diversity and Its Effect on Visual Description Models and Metrics", BOOKTITLE = VDU22, YEAR = "2022", PAGES = "4739-4748", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126488"} @inproceedings{bb130445, AUTHOR = "Popattia, M. and Rafi, M. and Qureshi, R. and Nawaz, S.", TITLE = "Guiding Attention using Partial-Order Relationships for Image Captioning", BOOKTITLE = MULA22, YEAR = "2022", PAGES = "4670-4679", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126489"} @inproceedings{bb130446, AUTHOR = "Mohamed, Y. and Khan, F.F. and Haydarov, K. and Elhoseiny, M.", TITLE = "It is Okay to Not Be Okay: Overcoming Emotional Bias in Affective Image Captioning by Contrastive Data Collection", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "21231-21240", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126490"} @inproceedings{bb130447, AUTHOR = "Chen, J. and Guo, H. and Yi, K. and Li, B.Y. and Elhoseiny, M.", TITLE = "VisualGPT: Data-efficient Adaptation of Pretrained Language Models for Image Captioning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18009-18019", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126491"} @inproceedings{bb130448, AUTHOR = "Chen, S. and Song, Z.H. and Haque, M. and Liu, C. and Yang, W.", TITLE = "NICGSlowDown: Evaluating the Efficiency Robustness of Neural Image Caption Generation Models", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15344-15353", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126492"} @inproceedings{bb130449, AUTHOR = "Hirota, Y. and Nakashima, Y. and Garcia, N.", TITLE = "Quantifying Societal Bias Amplification in Image Captioning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "13440-13449", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126493"} @inproceedings{bb130450, AUTHOR = "Beddiar, D. and Oussalah, M. and Tapio, S.", TITLE = "Explainability for Medical Image Captioning", BOOKTITLE = IPTA22, YEAR = "2022", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126494"} @inproceedings{bb130451, AUTHOR = "Bounab, Y. and Oussalah, M. and Ferdenache, A.", TITLE = "Reconciling Image Captioning and User's Comments for Urban Tourism", BOOKTITLE = IPTA20, YEAR = "2020", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126495"} @inproceedings{bb130452, AUTHOR = "Zha, Z.W. and Zhou, P.F. and Bai, C.", TITLE = "Exploring Implicit and Explicit Relations with the Dual Relation-Aware Network for Image Captioning", BOOKTITLE = MMMod22, YEAR = "2022", PAGES = "II:97-108", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126496"} @inproceedings{bb130453, AUTHOR = "Ruta, D. and Motiian, S. and Faieta, B. and Lin, Z. and Jin, H.L. and Filipkowski, A. and Gilbert, A. and Collomosse, J.", TITLE = "ALADIN: All Layer Adaptive Instance Normalization for Fine-grained Style Similarity", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11906-11915", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126497"} @inproceedings{bb130454, AUTHOR = "Nguyen, K. and Tripathi, S. and Du, B. and Guha, T. and Nguyen, T.Q.", TITLE = "In Defense of Scene Graphs for Image Captioning", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1387-1396", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126498"} @inproceedings{bb130455, AUTHOR = "Shi, J. and Li, Y. and Wang, S.J.", TITLE = "Partial Off-policy Learning: Balance Accuracy and Diversity for Human-Oriented Image Captioning", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2167-2176", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126499"} @inproceedings{bb130456, AUTHOR = "Alahmadi, R. and Hahn, J.", TITLE = "Improve Image Captioning by Estimating the Gazing Patterns from the Caption", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2453-2462", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126500"} @inproceedings{bb130457, AUTHOR = "Biten, A.F. and Gomez, L. and Karatzas, D.", TITLE = "Let there be a clock on the beach: Reducing Object Hallucination in Image Captioning", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2473-2482", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126501"} @inproceedings{bb130458, AUTHOR = "Deb, T. and Sadmanee, A. and Bhaumik, K.K. and Ali, A.A. and Amin, M.A. and Rahman, A.K.M.M.", TITLE = "Variational Stacked Local Attention Networks for Diverse Video Captioning", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2493-2502", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126502"} @inproceedings{bb130459, AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.", TITLE = "WEmbSim: A Simple yet Effective Metric for Image Captioning", BOOKTITLE = DICTA20, YEAR = "2020", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126503"} @inproceedings{bb130460, AUTHOR = "Qiu, J.Y. and Yang, Y.D. and Wang, X. and Tao, D.C.", TITLE = "Scene Essence", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8318-8329", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126504"} @inproceedings{bb130461, AUTHOR = "Chen, L. and Jiang, Z.H. and Xiao, J. and Liu, W.", TITLE = "Human-like Controllable Image Captioning with Verb-specific Semantic Roles", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "16841-16851", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126505"} @inproceedings{bb130462, AUTHOR = "Chen, D.Z.Y. and Gholami, A. and Nießner, M. and Chang, A.X.", TITLE = "Scan2Cap: Context-aware Dense Captioning in RGB-D Scans", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "3192-3202", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126506"} @inproceedings{bb130463, AUTHOR = "Luong, Q.A. and Vo, D.M. and Sugimoto, A.", TITLE = "Saliency based Subject Selection for Diverse Image Captioning", BOOKTITLE = MVA21, YEAR = "2021", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126507"} @inproceedings{bb130464, AUTHOR = "Sharif, N. and Bennamoun, M. and Liu, W. and Shah, S.A.A.", TITLE = "SubICap: Towards Subword-informed Image Captioning", BOOKTITLE = WACV21, YEAR = "2021", PAGES = "3539-3540", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126508"} @inproceedings{bb130465, AUTHOR = "Umemura, K. and Kastner, M.A. and Ide, I. and Kawanishi, Y. and Hirayama, T. and Doman, K. and Deguchi, D. and Murase, H.", TITLE = "Tell as You Imagine: Sentence Imageability-aware Image Captioning", BOOKTITLE = MMMod21, YEAR = "2021", PAGES = "II:62-73", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126509"} @inproceedings{bb130466, AUTHOR = "Hallonquist, N. and German, D. and Younes, L.", TITLE = "Graph Discovery for Visual Test Generation", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "7500-7507", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126510"} @inproceedings{bb130467, AUTHOR = "Li, X.J. and Yang, C. and Chen, S.L. and Zhu, C. and Yin, X.C.", TITLE = "Semantic Bilinear Pooling for Fine-Grained Recognition", BOOKTITLE = ICPR21, YEAR = "2021", PAGES = "3660-3666", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126511"} @inproceedings{bb130468, AUTHOR = "Kalimuthu, M. and Mogadala, A. and Mosbach, M. and Klakow, D.", TITLE = "Fusion Models for Improved Image Captioning", BOOKTITLE = MMDLCA20, YEAR = "2020", PAGES = "381-395", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126512"} @inproceedings{bb130469, AUTHOR = "Cetinic, E.", TITLE = "Iconographic Image Captioning for Artworks", BOOKTITLE = FAPER20, YEAR = "2020", PAGES = "502-516", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126513"} @inproceedings{bb130470, AUTHOR = "Huang, Y.Q. and Chen, J.S.", TITLE = "Show, Conceive and Tell: Image Captioning with Prospective Linguistic Information", BOOKTITLE = ACCV20, YEAR = "2020", PAGES = "VI:478-494", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126514"} @inproceedings{bb130471, AUTHOR = "Deng, C.R. and Ding, N. and Tan, M.K. and Wu, Q.", TITLE = "Length-controllable Image Captioning", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIII:712-729", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126515"} @inproceedings{bb130472, AUTHOR = "Gurari, D. and Zhao, Y.N. and Zhang, M. and Bhattacharya, N.", TITLE = "Captioning Images Taken by People Who Are Blind", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVII:417-434", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126516"} @inproceedings{bb130473, AUTHOR = "Zhong, Y.W. and Wang, L.W. and Chen, J.S. and Yu, D. and Li, Y.", TITLE = "Comprehensive Image Captioning via Scene Graph Decomposition", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XIV:211-229", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126517"} @inproceedings{bb130474, AUTHOR = "Wang, Z. and Feng, B. and Narasimhan, K. and Russakovsky, O.", TITLE = "Towards Unique and Informative Captioning of Images", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "VII:629-644", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126518"} @inproceedings{bb130475, AUTHOR = "Sidorov, O. and Hu, R.H. and Rohrbach, M. and Singh, A.", TITLE = "Textcaps: A Dataset for Image Captioning with Reading Comprehension", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "II:742-758", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126519"} @inproceedings{bb130476, AUTHOR = "Durand, T.", TITLE = "Learning User Representations for Open Vocabulary Image Hashtag Prediction", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "9766-9775", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126520"} @inproceedings{bb130477, AUTHOR = "Prabhudesai, M. and Tung, H.Y.F. and Javed, S.A. and Sieb, M. and Harley, A.W. and Fragkiadaki, K.", TITLE = "Embodied Language Grounding With 3D Visual Feature Representations", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "2217-2226", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126521"} @inproceedings{bb130478, AUTHOR = "Li, Z. and Tran, Q. and Mai, L. and Lin, Z. and Yuille, A.L.", TITLE = "Context-Aware Group Captioning via Self-Attention and Contrastive Features", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "3437-3447", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126522"} @inproceedings{bb130479, AUTHOR = "Zhou, Y. and Wang, M. and Liu, D. and Hu, Z. and Zhang, H.", TITLE = "More Grounded Image Captioning by Distilling Image-Text Matching Model", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "4776-4785", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126523"} @inproceedings{bb130480, AUTHOR = "Sammani, F. and Melas Kyriazi, L.", TITLE = "Show, Edit and Tell: A Framework for Editing Image Captions", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "4807-4815", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126524"} @inproceedings{bb130481, AUTHOR = "Chen, S. and Jin, Q. and Wang, P. and Wu, Q.", TITLE = "Say As You Wish: Fine-Grained Control of Image Caption Generation With Abstract Scene Graphs", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "9959-9968", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126525"} @inproceedings{bb130482, AUTHOR = "Guo, L. and Liu, J. and Zhu, X. and Yao, P. and Lu, S. and Lu, H.", TITLE = "Normalized and Geometry-Aware Self-Attention Network for Image Captioning", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10324-10333", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126526"} @inproceedings{bb130483, AUTHOR = "Chen, J. and Jin, Q.", TITLE = "Better Captioning With Sequence-Level Exploration", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10887-10896", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126527"} @inproceedings{bb130484, AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.", TITLE = "X-Linear Attention Networks for Image Captioning", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "10968-10977", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126528"} @inproceedings{bb130485, AUTHOR = "Park, G. and Han, C. and Kim, D. and Yoon, W.J.", TITLE = "MHSAN: Multi-Head Self-Attention Network for Visual Semantic Embedding", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1507-1515", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126529"} @inproceedings{bb130486, AUTHOR = "Chen, C. and Zhang, R. and Koh, E. and Kim, S. and Cohen, S. and Rossi, R.", TITLE = "Figure Captioning with Relation Maps for Reasoning", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1526-1534", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126530"} @inproceedings{bb130487, AUTHOR = "He, S. and Tavakoli, H.R. and Borji, A. and Pugeault, N.", TITLE = "Human Attention in Image Captioning: Dataset and Analysis", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "8528-8537", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126531"} @inproceedings{bb130488, AUTHOR = "Huang, L. and Wang, W. and Chen, J. and Wei, X.", TITLE = "Attention on Attention for Image Captioning", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4633-4642", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126532"} @inproceedings{bb130489, AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Mei, T.", TITLE = "Hierarchy Parsing for Image Captioning", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "2621-2629", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126533"} @inproceedings{bb130490, AUTHOR = "Liu, L. and Tang, J. and Wan, X. and Guo, Z.", TITLE = "Generating Diverse and Descriptive Image Captions Using Visual Paraphrases", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4239-4248", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126534"} @inproceedings{bb130491, AUTHOR = "Ke, L. and Pei, W. and Li, R. and Shen, X. and Tai, Y.", TITLE = "Reflective Decoding Network for Image Captioning", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "8887-8896", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126535"} @inproceedings{bb130492, AUTHOR = "Vered, G. and Oren, G. and Atzmon, Y. and Chechik, G.", TITLE = "Joint Optimization for Cooperative Image Captioning", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "8897-8906", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126536"} @inproceedings{bb130493, AUTHOR = "Ge, H. and Yan, Z. and Zhang, K. and Zhao, M. and Sun, L.", TITLE = "Exploring Overall Contextual Information for Image Captioning in Human-Like Cognitive Style", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "1754-1763", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126537"} @inproceedings{bb130494, AUTHOR = "Agrawal, H. and Desai, K. and Wang, Y. and Chen, X. and Jain, R. and Johnson, M. and Batra, D. and Parikh, D. and Lee, S. and Anderson, P.", TITLE = "nocaps: novel object captioning at scale", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "8947-8956", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126538"} @inproceedings{bb130495, AUTHOR = "Nguyen, A. and Tran, Q.D. and Do, T. and Reid, I. and Caldwell, D.G. and Tsagarakis, N.G.", TITLE = "Object Captioning and Retrieval with Natural Language", BOOKTITLE = ACVR19, YEAR = "2019", PAGES = "2584-2592", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126539"} @inproceedings{bb130496, AUTHOR = "Gu, J. and Joty, S. and Cai, J. and Zhao, H. and Yang, X. and Wang, G.", TITLE = "Unpaired Image Captioning via Scene Graph Alignments", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "10322-10331", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126540"} @inproceedings{bb130497, AUTHOR = "Shen, T. and Kar, A. and Fidler, S.", TITLE = "Learning to Caption Images Through a Lifetime by Asking Questions", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "10392-10401", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126541"} @inproceedings{bb130498, AUTHOR = "Aneja, J. and Agrawal, H. and Batra, D. and Schwing, A.G.", TITLE = "Sequential Latent Spaces for Modeling the Intention During Diverse Image Captioning", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4260-4269", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126542"} @inproceedings{bb130499, AUTHOR = "Deshpande, A. and Aneja, J. and Wang, L.W. and Schwing, A.G. and Forsyth, D.A.", TITLE = "Fast, Diverse and Accurate Image Captioning Guided by Part-Of-Speech", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "10687-10696", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT126543"}