Keith Price Bibliography Bibtex Entry (ANCHOR 134900 URL http://dx.doi.org/10.1007/978-3-031-20059-5_32 TYPE CONFERENCE PAGES XXXVI:558-575 YEAR 2022 MONTH NIL BIBSOURCE http://www.visionbib.com/bibliography/match605.html#TT130895 VOLUME NIL JOURNAL ECCV22 AUTHOR Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y. TITLE The Abduction of Sherlock Holmes: A Dataset for Visual Abductive Reasoning)


@inproceedings{bb134900,
        AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.",
        TITLE = "The Abduction of Sherlock Holmes:
A Dataset for Visual Abductive Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:558-575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130895"}

@inproceedings{bb134901,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130896"}

@inproceedings{bb134902,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130897"}

@inproceedings{bb134903,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130898"}

@inproceedings{bb134904,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130899"}

@inproceedings{bb134905,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT130900"}

@article{bb134906,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130901"}

@inproceedings{bb134907,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130902"}

@article{bb134908,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130903"}

@inproceedings{bb134909,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130904"}

@inproceedings{bb134910,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130905"}

@article{bb134911,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130906"}

@inproceedings{bb134912,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130907"}

@inproceedings{bb134913,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130908"}

@inproceedings{bb134914,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130909"}

@inproceedings{bb134915,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130910"}

@inproceedings{bb134916,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130911"}

@inproceedings{bb134917,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT130912"}

@article{bb134918,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130913"}

@article{bb134919,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130914"}

@inproceedings{bb134920,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130915"}

@article{bb134921,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130916"}

@article{bb134922,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130917"}

@article{bb134923,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130918"}

@article{bb134924,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130918"}

@article{bb134925,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130919"}

@article{bb134926,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130920"}

@article{bb134927,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130921"}

@article{bb134928,
        AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Dense semantic embedding network for image captioning",
        JOURNAL = PR,
        VOLUME = "90",
        YEAR = "2019",
        PAGES = "285-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130922"}

@article{bb134929,
        AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
        TITLE = "A survey on deep neural network-based image captioning",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "445-470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130923"}

@article{bb134930,
        AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
        TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
        JOURNAL = Surveys,
        VOLUME = "51",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "Article No 118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130924"}

@article{bb134931,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2117-2130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130925"}

@article{bb134932,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "LCEval: Learned Composite Metric for Caption Evaluation",
        JOURNAL = IJCV,
        VOLUME = "127",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1586-1610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130926"}

@article{bb134933,
        AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.",
        TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase
Learning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        NUMBER = "1",
        PAGES = "694-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130927"}

@article{bb134934,
        AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
        TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2942-2956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130928"}

@article{bb134935,
        AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
        TITLE = "Modeling coverage with semantic embedding for image caption generation",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130929"}

@article{bb134936,
        AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.",
        TITLE = "Leveraging unpaired out-of-domain data for image captioning",
        JOURNAL = PRL,
        VOLUME = "132",
        YEAR = "2020",
        PAGES = "132-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130930"}

@article{bb134937,
        AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.",
        TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning
Framework for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1372-1383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130931"}

@article{bb134938,
        AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.",
        TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2149-2162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130932"}

@article{bb134939,
        AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.",
        TITLE = "Cascaded Revision Network for Novel Object Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "3413-3421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130933"}

@article{bb134940,
        AUTHOR = "Shilpa, M. and He, J. and Zhao, Y.J. and Sun, B. and Yu, L.J.",
        TITLE = "Feedback evaluations to promote image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "13",
        MONTH = "November",
        PAGES = "3021-3027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130934"}

@article{bb134941,
        AUTHOR = "Liu, H. and Zhang, S. and Lin, K. and Wen, J. and Li, J. and Hu, X.",
        TITLE = "Vocabulary-Wide Credit Assignment for Training Image Captioning
Models",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "2450-2460",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130935"}

@article{bb134942,
        AUTHOR = "Xu, N. and Tian, H.S. and Wang, Y.H. and Nie, W.Z. and Song, D. and Liu, A.A. and Liu, W.",
        TITLE = "Coupled-dynamic learning for vision and language:
Exploring Interaction between different tasks",
        JOURNAL = PR,
        VOLUME = "113",
        YEAR = "2021",
        PAGES = "107829",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130936"}

@article{bb134943,
        AUTHOR = "Yang, L. and Wang, H. and Tang, P. and Li, Q.",
        TITLE = "CaptionNet: A Tailor-made Recurrent Neural Network for Generating
Image Descriptions",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "835-845",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130937"}

@article{bb134944,
        AUTHOR = "Liu, A.A. and Wang, Y.H. and Xu, N. and Liu, S. and Li, X.Y.",
        TITLE = "Scene-Graph-Guided message passing network for dense captioning",
        JOURNAL = PRL,
        VOLUME = "145",
        YEAR = "2021",
        PAGES = "187-193",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130938"}

@article{bb134945,
        AUTHOR = "Zhang, L. and Zhang, Y.S. and Zhao, X. and Zou, Z.X.",
        TITLE = "Image captioning via proximal policy optimization",
        JOURNAL = IVC,
        VOLUME = "108",
        YEAR = "2021",
        PAGES = "104126",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130939"}

@article{bb134946,
        AUTHOR = "Wu, J. and Chen, T.S. and Wu, H.F. and Yang, Z. and Luo, G.C. and Lin, L.",
        TITLE = "Fine-Grained Image Captioning With Global-Local Discriminative
Objective",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2413-2427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130940"}

@article{bb134947,
        AUTHOR = "Wu, L.X. and Xu, M. and Sang, L. and Yao, T. and Mei, T.",
        TITLE = "Noise Augmented Double-Stream Graph Convolutional Networks for Image
Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "31",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "3118-3127",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130941"}

@article{bb134948,
        AUTHOR = "Nivedita, M. and Chandrashekar, P. and Mahapatra, S. and Phamila, Y.A.V. and Selvaperumal, S.K.",
        TITLE = "Image Captioning for Video Surveillance System using Neural Networks",
        JOURNAL = IJIG,
        VOLUME = "21",
        YEAR = "2021",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "2150044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130942"}

@article{bb134949,
        AUTHOR = "Zha, Z.J. and Liu, D. and Zhang, H.W. and Zhang, Y.D. and Wu, F.",
        TITLE = "Context-Aware Visual Policy Network for Fine-Grained Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "710-722",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130943"}

@article{bb134950,
        AUTHOR = "Luo, G.F. and Cheng, L.J. and Jing, C. and Zhao, C. and Song, G.Z.",
        TITLE = "A thorough review of models, evaluation metrics, and datasets on
image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "2",
        PAGES = "311-332",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130944"}

@article{bb134951,
        AUTHOR = "Ben, H.X. and Pan, Y.W. and Li, Y. and Yao, T. and Hong, R.C. and Wang, M. and Mei, T.",
        TITLE = "Unpaired Image Captioning With semantic-Constrained Self-Learning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "904-916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130945"}

@article{bb134952,
        AUTHOR = "Song, P.P. and Guo, D. and Zhou, J.X. and Xu, M.L. and Wang, M.",
        TITLE = "Memorial GAN With Joint Semantic Optimization for Unpaired Image
Captioning",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4388-4399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130946"}

@inproceedings{bb134953,
        AUTHOR = "Li, Y. and Yao, T. and Pan, Y.W. and Chao, H.Y. and Mei, T.",
        TITLE = "Pointing Novel Objects in Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12489-12498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130947"}

@article{bb134954,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding and Distilling Scene Graphs for Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2313-2327",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130948"}

@article{bb134955,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Deconfounded Image Captioning: A Causal Retrospect",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12996-13010",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130949"}

@inproceedings{bb134956,
        AUTHOR = "Yang, X. and Tang, K. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Auto-Encoding Scene Graphs for Image Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10677-10686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130950"}

@article{bb134957,
        AUTHOR = "Yang, Z.P. and Wang, P.B. and Chu, T.S. and Yang, J.",
        TITLE = "Human-Centric Image Captioning",
        JOURNAL = PR,
        VOLUME = "126",
        YEAR = "2022",
        PAGES = "108545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130951"}

@article{bb134958,
        AUTHOR = "Zhang, M. and Chen, J.X. and Li, P.F. and Jiang, M. and Zhou, Z.",
        TITLE = "Topic scene graphs for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "4",
        PAGES = "364-375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130952"}

@article{bb134959,
        AUTHOR = "Yu, Q. and Zhang, C.X. and Weng, L. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Scene captioning with deep fusion of images and point clouds",
        JOURNAL = PRL,
        VOLUME = "158",
        YEAR = "2022",
        PAGES = "9-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130953"}

@article{bb134960,
        AUTHOR = "Chaudhari, C.P. and Devane, S.",
        TITLE = "Improved Framework using Rider Optimization Algorithm for Precise Image
Caption Generation",
        JOURNAL = IJIG,
        VOLUME = "22",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "2250021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130954"}

@article{bb134961,
        AUTHOR = "Li, Y.C. and Wu, C. and Li, L. and Liu, Y.H. and Zhu, J.",
        TITLE = "Caption Generation From Road Images for Traffic Scene Modeling",
        JOURNAL = ITS,
        VOLUME = "23",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "7805-7816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130955"}

@article{bb134962,
        AUTHOR = "Wang, Y.H. and Xu, N. and Liu, A.A. and Li, W.H. and Zhang, Y.D.",
        TITLE = "High-Order Interaction Learning for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4417-4430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130956"}

@article{bb134963,
        AUTHOR = "Guo, D.D. and Lu, R.Y. and Chen, B. and Zeng, Z.Q. and Zhou, M.Y.",
        TITLE = "Matching Visual Features to Hierarchical Semantic Topics for Image
Paragraph Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1920-1937",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130957"}

@article{bb134964,
        AUTHOR = "Demirel, B. and Cinbis, R.G.",
        TITLE = "Caption generation on scenes with seen and unseen object categories",
        JOURNAL = IVC,
        VOLUME = "124",
        YEAR = "2022",
        PAGES = "104515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130958"}

@article{bb134965,
        AUTHOR = "Wu, X.X. and Zhao, W.T. and Luo, J.B.",
        TITLE = "Learning Cooperative Neural Modules for Stylized Image Captioning",
        JOURNAL = IJCV,
        VOLUME = "130",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2305-2320",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130959"}

@article{bb134966,
        AUTHOR = "Stefanini, M. and Cornia, M. and Baraldi, L. and Cascianelli, S. and Fiameni, G. and Cucchiara, R.",
        TITLE = "From Show to Tell: A Survey on Deep Learning-Based Image Captioning",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "539-559",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130960"}

@article{bb134967,
        AUTHOR = "Wu, Y. and Jiang, L. and Yang, Y.",
        TITLE = "Switchable Novel Object Captioner",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1162-1173",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130961"}

@article{bb134968,
        AUTHOR = "Yang, X. and Zhang, H.W. and Gao, C.Y. and Cai, J.F.",
        TITLE = "Learning to Collocate Visual-Linguistic Neural Modules for Image
Captioning",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "82-100",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130962"}

@inproceedings{bb134969,
        AUTHOR = "Yang, X. and Zhang, H.W. and Cai, J.F.",
        TITLE = "Learning to Collocate Neural Modules for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4249-4259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130963"}

@article{bb134970,
        AUTHOR = "Feng, J.L. and Zhao, J.P.",
        TITLE = "Effectively Utilizing the Category Labels for Image Captioning",
        JOURNAL = IEICE,
        VOLUME = "E106-D",
        YEAR = "2023",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "617-624",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130964"}

@article{bb134971,
        AUTHOR = "Wang, D.P. and Hu, Z.Z. and Zhou, Y. and Hong, R.C. and Wang, M.",
        TITLE = "A Text-Guided Generation and Refinement Model for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "2966-2977",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130965"}

@article{bb134972,
        AUTHOR = "Al Qatf, M. and Wang, X. and Hawbani, A. and Abdussalam, A. and Alsamhi, S.H.",
        TITLE = "Image Captioning With Novel Topics Guidance and Retrieval-Based
Topics Re-Weighting",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "5984-5999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130966"}

@article{bb134973,
        AUTHOR = "Zhu, P.P. and Wang, X. and Luo, Y. and Sun, Z.L. and Zheng, W.S. and Wang, Y.W. and Chen, C.",
        TITLE = "Unpaired Image Captioning by Image-Level Weakly-Supervised Visual
Concept Recognition",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6702-6716",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130967"}

@article{bb134974,
        AUTHOR = "Hu, N.N. and Ming, Y. and Fan, C.X. and Feng, F. and Lyu, B.Y.",
        TITLE = "TSFNet: Triple-Steam Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "6904-6916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130968"}

@article{bb134975,
        AUTHOR = "Gonzalez Chavez, O. and Ruiz, G. and Moctezuma, D. and Ramirez delReal, T.",
        TITLE = "Are metrics measuring what they should? An evaluation of Image
Captioning task metrics",
        JOURNAL = SP:IC,
        VOLUME = "120",
        YEAR = "2024",
        PAGES = "117071",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130969"}

@article{bb134976,
        AUTHOR = "Padate, R. and Jain, A. and Kalla, M. and Sharma, A.",
        TITLE = "A Widespread Assessment and Open Issues on Image Captioning Models",
        JOURNAL = IJIG,
        VOLUME = "23",
        YEAR = "2023",
        NUMBER = "6 2023",
        PAGES = "2350057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130970"}

@article{bb134977,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "Textual Context-Aware Dense Captioning With Diverse Words",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8753-8766",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130971"}

@article{bb134978,
        AUTHOR = "Cheng, J. and Wu, F. and Liu, L. and Zhang, Q. and Rutkowski, L. and Tao, D.C.",
        TITLE = "InDecGAN: Learning to Generate Complex Images From Captions via
Independent Object-Level Decomposition and Enhancement",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8279-8293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130972"}

@article{bb134979,
        AUTHOR = "Ding, N. and Deng, C.R. and Tan, M.K. and Du, Q. and Ge, Z.W. and Wu, Q.",
        TITLE = "Image Captioning With Controllable and Adaptive Length Levels",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "764-779",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130973"}

@inproceedings{bb134980,
        AUTHOR = "Xu, G.H. and Niu, S.C. and Tan, M.K. and Luo, Y.C. and Du, Q. and Wu, Q.",
        TITLE = "Towards Accurate Text-based Image Captioning with Content Diversity
Exploration",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12632-12641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130974"}

@article{bb134981,
        AUTHOR = "Zhu, P.P. and Wang, X. and Zhu, L. and Sun, Z.L. and Zheng, W.S. and Wang, Y.W. and Chen, C.W.",
        TITLE = "Prompt-Based Learning for Unpaired Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "379-393",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130975"}

@article{bb134982,
        AUTHOR = "Liu, A.A. and Zhai, Y.C. and Xu, N. and Tian, H. and Nie, W.Z. and Zhang, Y.D.",
        TITLE = "Event-Aware Retrospective Learning for Knowledge-Based Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4898-4911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130976"}

@article{bb134983,
        AUTHOR = "Ajankar, S. and Dutta, T.",
        TITLE = "Image-Relevant Entities Knowledge-Aware News Image Captioning",
        JOURNAL = MultMedMag,
        VOLUME = "31",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "88-98",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130977"}

@article{bb134984,
        AUTHOR = "Dai, Z.Z. and Tran, V. and Markham, A. and Trigoni, N. and Rahman, M.A. and Wijayasingha, L.N.S. and Stankovic, J. and Li, C.",
        TITLE = "EgoCap and EgoFormer:
First-person image captioning with context fusion",
        JOURNAL = PRL,
        VOLUME = "181",
        YEAR = "2024",
        PAGES = "50-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130978"}

@article{bb134985,
        AUTHOR = "Shao, Z. and Han, J.G. and Debattista, K. and Pang, Y.W.",
        TITLE = "DCMSTRD: End-to-end Dense Captioning via Multi-Scale Transformer
Decoding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "7581-7593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130979"}

@article{bb134986,
        AUTHOR = "Cornia, M. and Baraldi, L. and Fiameni, G. and Cucchiara, R.",
        TITLE = "Generating More Pertinent Captions by Leveraging Semantics and Style on
Multi-Source Datasets",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1701-1720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130980"}

@inproceedings{bb134987,
        AUTHOR = "Barraco, M. and Sarto, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "With a Little Help from your own Past: Prototypical Memory Networks
for Image Captioning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3009-3019",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130981"}

@inproceedings{bb134988,
        AUTHOR = "Barraco, M. and Stefanini, M. and Cornia, M. and Cascianelli, S. and Baraldi, L. and Cucchiara, R.",
        TITLE = "CaMEL: Mean Teacher Learning for Image Captioning",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "4087-4094",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130982"}

@inproceedings{bb134989,
        AUTHOR = "Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Show, Control and Tell: A Framework for Generating Controllable and
Grounded Captions",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8299-8308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130983"}

@article{bb134990,
        AUTHOR = "Wang, L.X. and Qiu, H.Q. and Qiu, B. and Meng, F.M. and Wu, Q.B. and Li, H.L.",
        TITLE = "TridentCap: Image-Fact-Style Trident Semantic Framework for Stylized
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3563-3575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130984"}

@article{bb134991,
        AUTHOR = "Zhang, H.N. and Zeng, P.P. and Gao, L. and Lyu, X.Y. and Song, J.K. and Shen, H.T.",
        TITLE = "SPT: Spatial Pyramid Transformer for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4829-4842",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130985"}

@article{bb134992,
        AUTHOR = "Wang, H.Y. and Song, K. and Jiang, X. and He, Z.Q.",
        TITLE = "ragBERT: Relationship-aligned and grammar-wise BERT model for image
captioning",
        JOURNAL = IVC,
        VOLUME = "148",
        YEAR = "2024",
        PAGES = "105105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130986"}

@article{bb134993,
        AUTHOR = "Li, J.Y. and Zhang, L. and Zhang, K. and Hu, B. and Xie, H.T. and Mao, Z.D.",
        TITLE = "Cascade Semantic Prompt Alignment Network for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "5266-5281",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130987"}

@article{bb134994,
        AUTHOR = "Zou, Y. and Liao, S.Y. and Wang, Q.F.",
        TITLE = "Chinese image captioning with fusion encoder and visual keyword
search",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "11",
        PAGES = "3055-3069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130988"}

@article{bb134995,
        AUTHOR = "Chen, S.J. and Zhu, H.Y. and Li, M.S. and Chen, X. and Guo, P. and Lei, Y.J. and Yu, G. and Li, T. and Chen, T.",
        TITLE = "Vote2Cap-DETR++: Decoupling Localization and Describing for
End-to-End 3D Dense Captioning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7331-7347",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130989"}

@inproceedings{bb134996,
        AUTHOR = "Chen, S.J. and Zhu, H.Y. and Chen, X. and Lei, Y.J. and Yu, G. and Chen, T.",
        TITLE = "End-to-End 3D Dense Captioning with Vote2Cap-DETR",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "11124-11133",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130990"}

@article{bb134997,
        AUTHOR = "Lv, F.X. and Wang, R. and Jing, L.H. and Dai, P.W.",
        TITLE = "HIST: Hierarchical and sequential transformer for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "7",
        PAGES = "1043-1056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130991"}

@article{bb134998,
        AUTHOR = "Yang, X.B. and He, S. and Zhang, J. and Ma, S. and Hou, Z.Q. and Sun, W.",
        TITLE = "Memory positional encoding for image captioning",
        JOURNAL = SP:IC,
        VOLUME = "130",
        YEAR = "2025",
        PAGES = "117201",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130992"}

@article{bb134999,
        AUTHOR = "Wang, L. and Chen, H.P. and Liu, Y. and Lyu, Y.D.",
        TITLE = "Regular Constrained Multimodal Fusion for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11900-11913",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT130993"}
Last update:Feb 17, 2026 at 20:06:16