@article{bb121800,
        AUTHOR = "Helmy, T.",
        TITLE = "A Generic Framework for Semantic Annotation of Images",
        JOURNAL = IJIG,
        VOLUME = "18",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "July",
        PAGES = "Article 1850013",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117913"}

@article{bb121801,
        AUTHOR = "Hu, J. and Lam, K.M. and Lou, P. and Liu, Q. and Deng, W.P.",
        TITLE = "Can a machine have two systems for recognition, like human beings?",
        JOURNAL = JVCIR,
        VOLUME = "56",
        YEAR = "2018",
        PAGES = "275-286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117914"}

@article{bb121802,
        AUTHOR = "Bhagat, P.K. and Choudhary, P.",
        TITLE = "Image annotation: Then and now",
        JOURNAL = IVC,
        VOLUME = "80",
        YEAR = "2018",
        PAGES = "1-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117915"}

@article{bb121803,
        AUTHOR = "Bazrafkan, S. and Javidnia, H. and Corcoran, P.",
        TITLE = "Latent space mapping for generation of object elements with
corresponding data annotation",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "179-186",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117916"}

@article{bb121804,
        AUTHOR = "Jiu, M.Y. and Sahbi, H.",
        TITLE = "Deep representation design from deep kernel networks",
        JOURNAL = PR,
        VOLUME = "88",
        YEAR = "2019",
        PAGES = "447-457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117917"}

@article{bb121805,
        AUTHOR = "Foumani, S.N.M. and Nickabadi, A.",
        TITLE = "A probabilistic topic model using deep visual word representation for
simultaneous image classification and annotation",
        JOURNAL = JVCIR,
        VOLUME = "59",
        YEAR = "2019",
        PAGES = "195-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117918"}

@article{bb121806,
        AUTHOR = "Zhang, J.J. and Wu, Q. and Zhang, J. and Shen, C.H. and Lu, J.F. and Wu, Q.A.",
        TITLE = "Heritage image annotation via collective knowledge",
        JOURNAL = PR,
        VOLUME = "93",
        YEAR = "2019",
        PAGES = "204-214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117919"}

@article{bb121807,
        AUTHOR = "Verma, Y.",
        TITLE = "Diverse image annotation with missing labels",
        JOURNAL = PR,
        VOLUME = "93",
        YEAR = "2019",
        PAGES = "470-484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117920"}

@article{bb121808,
        AUTHOR = "Markatopoulou, F. and Mezaris, V. and Patras, I.",
        TITLE = "Implicit and Explicit Concept Relations in Deep Neural Networks for
Multi-Label Video/Image Annotation",
        JOURNAL = CirSysVideo,
        VOLUME = "29",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "1631-1644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117921"}

@article{bb121809,
        AUTHOR = "Laib, L. and Allili, M.S. and Ait Aoudia, S.",
        TITLE = "A probabilistic topic model for event-based image classification and
multi-label annotation",
        JOURNAL = SP:IC,
        VOLUME = "76",
        YEAR = "2019",
        PAGES = "283-294",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117922"}

@article{bb121810,
        AUTHOR = "Olaode, A. and Naghdy, G.",
        TITLE = "Review of the application of machine learning to the automatic semantic
annotation of images",
        JOURNAL = IET-IPR,
        VOLUME = "13",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "June",
        PAGES = "1232-1245",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117923"}

@article{bb121811,
        AUTHOR = "Zhang, C.J. and Cheng, J. and Tian, Q.",
        TITLE = "Multiview, Few-Labeled Object Categorization by Predicting Labels
With View Consistency",
        JOURNAL = Cyber,
        VOLUME = "49",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3834-3843",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117924"}

@article{bb121812,
        AUTHOR = "Tang, C. and Liu, X. and Wang, P. and Zhang, C. and Li, M. and Wang, L.",
        TITLE = "Adaptive Hypergraph Embedded Semi-Supervised Multi-Label Image
Annotation",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2837-2849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117925"}

@article{bb121813,
        AUTHOR = "Mundnich, K. and Booth, B.M. and Girault, B. and Narayanan, S.",
        TITLE = "Generating labels for regression of subjective constructs using
triplet embeddings",
        JOURNAL = PRL,
        VOLUME = "128",
        YEAR = "2019",
        PAGES = "385-392",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117926"}

@article{bb121814,
        AUTHOR = "Chaudhary, C. and Goyal, P. and Prasad, D.N. and Chen, Y.P.",
        TITLE = "Enhancing the Quality of Image Tagging Using a Visio-Textual
Knowledge Base",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "897-911",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117927"}

@article{bb121815,
        AUTHOR = "Khatchatoorian, A.G. and Jamzad, M.",
        TITLE = "Architecture to improve the accuracy of automatic image annotation
systems",
        JOURNAL = IET-CV,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "August",
        PAGES = "214-223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117928"}

@article{bb121816,
        AUTHOR = "Theodosiou, Z. and Tsapatsoulis, N.",
        TITLE = "Image annotation: the effects of content, lexicon and annotation method",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "191-203",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117929"}

@article{bb121817,
        AUTHOR = "Haghighi, F. and Taher, M.R.H. and Zhou, Z.W. and Gotway, M.B. and Liang, J.M.",
        TITLE = "Transferable Visual Words: Exploiting the Semantics of Anatomical
Patterns for Self-Supervised Learning",
        JOURNAL = MedImg,
        VOLUME = "40",
        YEAR = "2021",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2857-2868",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117930"}

@article{bb121818,
        AUTHOR = "Hochberg, D.C. and Greenspan, H. and Giryes, R.",
        TITLE = "A Self Supervised StyleGAN for Image Annotation and Classification
With Extremely Limited Labels",
        JOURNAL = MedImg,
        VOLUME = "41",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "3509-3519",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117931"}

@article{bb121819,
        AUTHOR = "Wang, J. and Xu, W.J. and Wang, Q.Z. and Chan, A.B.",
        TITLE = "On Distinctive Image Captioning via Comparing and Reweighting",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "2088-2103",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117932"}

@inproceedings{bb121820,
        AUTHOR = "Lahtinen, T. and Turtiainen, H. and Costin, A.",
        TITLE = "Brima: Low-Overhead Browser-Only Image Annotation Tool (Preprint)",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2633-2637",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117933"}

@inproceedings{bb121821,
        AUTHOR = "Lotfi, F. and Jamzad, M. and Beigy, H.",
        TITLE = "Automatic Image Annotation using Tag Relations and Graph
Convolutional Networks",
        BOOKTITLE = IPRIA21,
        YEAR = "2021",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117934"}

@inproceedings{bb121822,
        AUTHOR = "Chen, X.Y. and Jiang, M. and Zhao, Q.",
        TITLE = "Self-Distillation for Few-Shot Image Captioning",
        BOOKTITLE = WACV21,
        YEAR = "2021",
        PAGES = "545-555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117935"}

@inproceedings{bb121823,
        AUTHOR = "Jiu, M. and Sahbi, H.",
        TITLE = "End-to-End Deep Kernel Map Design for Image Annotation",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "1546-1550",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117936"}

@inproceedings{bb121824,
        AUTHOR = "Hu, H. and Misra, I. and van der Maaten, L.",
        TITLE = "Evaluating Text-to-Image Matching using Binary Image Selection
(BISON)",
        BOOKTITLE = CLVL19,
        YEAR = "2019",
        PAGES = "1887-1890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117937"}

@inproceedings{bb121825,
        AUTHOR = "Tanaka, M. and Itamochi, T. and Narioka, K. and Sato, I. and Ushiku, Y. and Harada, T.",
        TITLE = "Generating Easy-to-Understand Referring Expressions for Target
Identifications",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "5793-5802",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117938"}

@inproceedings{bb121826,
        AUTHOR = "Gupta, T. and Schwing, A.G. and Hoiem, D.",
        TITLE = "ViCo: Word Embeddings From Visual Co-Occurrences",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7424-7433",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117939"}

@inproceedings{bb121827,
        AUTHOR = "Bracha, L. and Chechik, G.",
        TITLE = "Informative Object Annotations: Tell Me Something I Don't Know",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12499-12507",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117940"}

@inproceedings{bb121828,
        AUTHOR = "Rapson, C.J. and Seet, B. and Naeem, M.A. and Lee, J.E. and Al Sarayreh, M. and Klette, R.",
        TITLE = "Reducing the Pain: A Novel Tool for Efficient Ground-Truth Labelling
in Images",
        BOOKTITLE = IVCNZ18,
        YEAR = "2018",
        PAGES = "1-9",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117941"}

@inproceedings{bb121829,
        AUTHOR = "Wu, B.Y. and Chen, W.D. and Sun, P. and Liu, W. and Ghanem, B. and Lyu, S.W.",
        TITLE = "Tagging Like Humans: Diverse and Distinct Image Annotation",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7967-7975",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117942"}

@inproceedings{bb121830,
        AUTHOR = "Wu, X.J. and Zhang, L. and Li, F.Z. and Wang, B.J.",
        TITLE = "A Novel Model for Multi-label Image Annotation",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "1953-1958",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117943"}

@inproceedings{bb121831,
        AUTHOR = "Jiu, M. and Sahbi, H. and Qi, L.",
        TITLE = "Deep Context Networks for Image Annotation",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "2422-2427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117944"}

@inproceedings{bb121832,
        AUTHOR = "Khatchatoorian, A.G. and Jamzad, M.",
        TITLE = "Post Rectifying Methods to Improve the Accuracy of Image Annotation",
        BOOKTITLE = DICTA17,
        YEAR = "2017",
        PAGES = "1-7",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117945"}

@inproceedings{bb121833,
        AUTHOR = "Pellegrin, L. and Escalante, H.J. and Montes y Gomez, M. and Villegas, M. and Gonzalez, F.A.",
        TITLE = "A Flexible Framework for the Evaluation of Unsupervised Image
Annotation",
        BOOKTITLE = CIARP17,
        YEAR = "2017",
        PAGES = "508-516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117946"}

@inproceedings{bb121834,
        AUTHOR = "Tripathi, A. and Gupta, A. and Chaudhary, S. and Lall, B.",
        TITLE = "Image Annotation Using Latent Components and Transmedia Association",
        BOOKTITLE = PReMI17,
        YEAR = "2017",
        PAGES = "493-500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117947"}

@inproceedings{bb121835,
        AUTHOR = "Wu, B.Y. and Jia, F. and Liu, W. and Ghanem, B.",
        TITLE = "Diverse Image Annotation",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6194-6202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT117948"}

@article{bb121836,
        AUTHOR = "Gao, L.L. and Guo, Z. and Zhang, H.W. and Xu, X. and Shen, H.T.",
        TITLE = "Video Captioning With Attention-Based LSTM and Semantic Consistency",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2045-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117949"}

@article{bb121837,
        AUTHOR = "Bin, Y. and Yang, Y. and Shen, F. and Xie, N. and Shen, H.T. and Li, X.",
        TITLE = "Describing Video With Attention-Based Bidirectional LSTM",
        JOURNAL = Cyber,
        VOLUME = "49",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "2631-2641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117950"}

@article{bb121838,
        AUTHOR = "Fu, K. and Jin, J.Q. and Cui, R.P. and Sha, F. and Zhang, C.S.",
        TITLE = "Aligning Where to See and What to Tell: Image Captioning with
Region-Based Attention and Scene-Specific Contexts",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "2321-2334",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117951"}

@article{bb121839,
        AUTHOR = "Xiao, C.M. and Yang, Q. and Xu, X.Q. and Zhang, J.W. and Zhou, F. and Zhang, C.S.",
        TITLE = "Where you edit is what you get: Text-guided image editing with
region-based attention",
        JOURNAL = PR,
        VOLUME = "139",
        YEAR = "2023",
        PAGES = "109458",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117952"}

@article{bb121840,
        AUTHOR = "Nian, F.D. and Li, T. and Wang, Y. and Wu, X.Y. and Ni, B.B. and Xu, C.S.",
        TITLE = "Learning explicit video attributes from mid-level representation for
video captioning",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "126-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117953"}

@article{bb121841,
        AUTHOR = "Ye, S. and Han, J. and Liu, N.",
        TITLE = "Attentive Linear Transformation for Image Captioning",
        JOURNAL = IP,
        VOLUME = "27",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5514-5524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117954"}

@article{bb121842,
        AUTHOR = "Xian, Y. and Tian, Y.",
        TITLE = "Self-Guiding Multimodal LSTM: When We Do Not Have a Perfect Training
Dataset for Image Captioning",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5241-5252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117955"}

@article{bb121843,
        AUTHOR = "Peng, Y.Q. and Liu, X. and Wang, W.H. and Zhao, X.S. and Wei, M.",
        TITLE = "Image caption model of double LSTM with scene factors",
        JOURNAL = IVC,
        VOLUME = "86",
        YEAR = "2019",
        PAGES = "38-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117956"}

@article{bb121844,
        AUTHOR = "Wu, L. and Xu, M. and Wang, J. and Perry, S.",
        TITLE = "Recall What You See Continually Using GridLSTM in Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "808-818",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117957"}

@article{bb121845,
        AUTHOR = "Deng, Z.R. and Jiang, Z.Q. and Lan, R. and Huang, W.M. and Luo, X.N.",
        TITLE = "Image captioning using DenseNet network and adaptive attention",
        JOURNAL = SP:IC,
        VOLUME = "85",
        YEAR = "2020",
        PAGES = "115836",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117958"}

@article{bb121846,
        AUTHOR = "Ji, J. and Xu, C. and Zhang, X. and Wang, B. and Song, X.",
        TITLE = "Spatio-Temporal Memory Attention for Image Captioning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        PAGES = "7615-7628",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117959"}

@article{bb121847,
        AUTHOR = "Che, W.B. and Fan, X.P. and Xiong, R.Q. and Zhao, D.B.",
        TITLE = "Visual Relationship Embedding Network for Image Paragraph Generation",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2307-2320",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117960"}

@article{bb121848,
        AUTHOR = "Zhang, J. and Li, K.K. and Wang, Z.",
        TITLE = "Parallel-fusion LSTM with synchronous semantic and visual information
for image captioning",
        JOURNAL = JVCIR,
        VOLUME = "75",
        YEAR = "2021",
        PAGES = "103044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117961"}

@article{bb121849,
        AUTHOR = "He, S. and Lu, Y.Y. and Chen, S.N.",
        TITLE = "Image Captioning Algorithm Based on Multi-Branch CNN and Bi-LSTM",
        JOURNAL = IEICE,
        VOLUME = "E104-D",
        YEAR = "2021",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "941-947",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117962"}

@article{bb121850,
        AUTHOR = "Yuan, J. and Zhu, S. and Huang, S.Y. and Zhang, H.W. and Xiao, Y.Q. and Li, Z.Y. and Wang, M.",
        TITLE = "Discriminative Style Learning for Cross-Domain Image Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "1723-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117963"}

@inproceedings{bb121851,
        AUTHOR = "Zhou, Y. and Zhang, Y. and Hu, Z.Z. and Wang, M.",
        TITLE = "Semi-Autoregressive Transformer for Image Captioning",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3132-3136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117964"}

@article{bb121852,
        AUTHOR = "Lv, G. and Sun, Y.N. and Nian, F. and Zhu, M.F. and Tang, W.L. and Hu, Z.Z.",
        TITLE = "COME: Clip-OCR and Master ObjEct for text image captioning",
        JOURNAL = IVC,
        VOLUME = "136",
        YEAR = "2023",
        PAGES = "104751",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117965"}

@inproceedings{bb121853,
        AUTHOR = "Niu, Z.X. and Zhou, M. and Wang, L. and Gao, X.B. and Hua, G.",
        TITLE = "Hierarchical Multimodal LSTM for Dense Visual-Semantic Embedding",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1899-1907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117966"}

@inproceedings{bb121854,
        AUTHOR = "Tan, Y.H. and Chan, C.S.",
        TITLE = "phi-LSTM: A Phrase-Based Hierarchical LSTM Model for Image Captioning",
        BOOKTITLE = ACCV16,
        YEAR = "2016",
        PAGES = "V: 101-117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117967"}

@inproceedings{bb121855,
        AUTHOR = "Wang, M. and Song, L. and Yang, X.K. and Luo, C.F.",
        TITLE = "A parallel-fusion RNN-LSTM architecture for image caption generation",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "4448-4452",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607lscap4.html#TT117968"}

@article{bb121856,
        AUTHOR = "Verma, Y. and Jawahar, C.V.",
        TITLE = "A support vector approach for cross-modal search of images and texts",
        JOURNAL = CVIU,
        VOLUME = "154",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "48-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117969"}

@inproceedings{bb121857,
        AUTHOR = "Dutta, A. and Verma, Y. and Jawahar, C.V.",
        TITLE = "Recurrent Image Annotation with Explicit Inter-Label Dependencies",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117970"}

@article{bb121858,
        AUTHOR = "Xue, J.F. and Eguchi, K.",
        TITLE = "Video Data Modeling Using Sequential Correspondence Hierarchical
Dirichlet Processes",
        JOURNAL = IEICE,
        VOLUME = "E100-D",
        YEAR = "2017",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "33-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117971"}

@article{bb121859,
        AUTHOR = "Liu, A.A. and Xu, N. and Wong, Y.K. and Li, J. and Su, Y.T. and Kankanhalli, M.",
        TITLE = "Hierarchical & multimodal video captioning: Discovering and
transferring multimodal knowledge for vision to language",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "113-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117972"}

@article{bb121860,
        AUTHOR = "Guan, J.N. and Wang, E.",
        TITLE = "Repeated review based image captioning for image evidence review",
        JOURNAL = SP:IC,
        VOLUME = "63",
        YEAR = "2018",
        PAGES = "141-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117973"}

@article{bb121861,
        AUTHOR = "Hu, M. and Yang, Y. and Shen, F. and Zhang, L. and Shen, H.T. and Li, X.",
        TITLE = "Robust Web Image Annotation via Exploring Multi-Facet and Structural
Knowledge",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "4871-4884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117974"}

@article{bb121862,
        AUTHOR = "Gil Gonzalez, J. and Alvarez Meza, A. and Orozco Gutierrez, A.",
        TITLE = "Learning from multiple annotators using kernel alignment",
        JOURNAL = PRL,
        VOLUME = "116",
        YEAR = "2018",
        PAGES = "150-156",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117975"}

@article{bb121863,
        AUTHOR = "Zheng, H. and Wu, J.H. and Liang, R. and Li, Y. and Li, X.Z.",
        TITLE = "Multi-task learning for captioning images with novel words",
        JOURNAL = IET-CV,
        VOLUME = "13",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "April",
        PAGES = "294-301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117976"}

@article{bb121864,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Towards Personalized Image Captioning via Multimodal Memory Networks",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "999-1012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117977"}

@inproceedings{bb121865,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Attend to You: Personalized Image Captioning with Context Sequence
Memory Networks",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6432-6440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117978"}

@article{bb121866,
        AUTHOR = "Yang, M. and Zhao, W. and Xu, W. and Feng, Y. and Zhao, Z. and Chen, X. and Lei, K.",
        TITLE = "Multitask Learning for Cross-Domain Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1047-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117979"}

@article{bb121867,
        AUTHOR = "Yu, N. and Hu, X. and Song, B. and Yang, J. and Zhang, J.",
        TITLE = "Topic-Oriented Image Captioning Based on Order-Embedding",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2743-2754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117980"}

@article{bb121868,
        AUTHOR = "Li, X. and Xu, C. and Wang, X. and Lan, W. and Jia, Z. and Yang, G. and Xu, J.",
        TITLE = "COCO-CN for Cross-Lingual Image Tagging, Captioning, and Retrieval",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2347-2360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117981"}

@article{bb121869,
        AUTHOR = "Tian, C. and Tian, M. and Jiang, M.M. and Liu, H. and Deng, D.H.",
        TITLE = "How much do cross-modal related semantics benefit image captioning by
weighting attributes and re-ranking sentences?",
        JOURNAL = PRL,
        VOLUME = "125",
        YEAR = "2019",
        PAGES = "639-645",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117982"}

@article{bb121870,
        AUTHOR = "Niu, Y. and Lu, Z. and Wen, J. and Xiang, T. and Chang, S.",
        TITLE = "Multi-Modal Multi-Scale Deep Learning for Large-Scale Image
Annotation",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1720-1731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117983"}

@article{bb121871,
        AUTHOR = "Huang, Y. and Chen, J. and Ouyang, W. and Wan, W. and Xue, Y.",
        TITLE = "Image Captioning With End-to-End Attribute Detection and Subsequent
Attributes Prediction",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        PAGES = "4013-4026",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117984"}

@article{bb121872,
        AUTHOR = "Zhao, W. and Wu, X. and Luo, J.",
        TITLE = "Cross-Domain Image Captioning via Cross-Modal Retrieval and Model
Adaptation",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1180-1192",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117985"}

@article{bb121873,
        AUTHOR = "Wang, H. and Du, Y.T. and Zhang, G.X. and Cai, Z.M. and Su, C.",
        TITLE = "Learning Fundamental Visual Concepts Based on Evolved Multi-Edge
Concept Graph",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4400-4413",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117986"}

@article{bb121874,
        AUTHOR = "Zhang, J. and Mei, K. and Zheng, Y. and Fan, J.",
        TITLE = "Integrating Part of Speech Guidance for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "92-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117987"}

@article{bb121875,
        AUTHOR = "Kim, D.J. and Oh, T.H. and Choi, J. and Kweon, I.S.",
        TITLE = "Dense Relational Image Captioning via Multi-Task Triple-Stream
Networks",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7348-7362",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117988"}

@inproceedings{bb121876,
        AUTHOR = "Kim, D.J. and Choi, J. and Oh, T.H. and Kweon, I.S.",
        TITLE = "Dense Relational Captioning: Triple-Stream Networks for
Relationship-Based Captioning",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6264-6273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117989"}

@article{bb121877,
        AUTHOR = "Nguyen, T.S. and Fernando, B.",
        TITLE = "Effective Multimodal Encoding for Image Paragraph Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "6381-6395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117990"}

@article{bb121878,
        AUTHOR = "Duan, Y.Q. and Wang, Z. and Li, Y. and Wang, J.Y.",
        TITLE = "Cross-domain multi-style merge for image captioning",
        JOURNAL = CVIU,
        VOLUME = "228",
        YEAR = "2023",
        PAGES = "103617",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117991"}

@article{bb121879,
        AUTHOR = "Wu, X.X. and Li, T.",
        TITLE = "Sentimental Visual Captioning using Multimodal Transformer",
        JOURNAL = IJCV,
        VOLUME = "131",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "1073-1090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117992"}

@article{bb121880,
        AUTHOR = "Ding, Z.W. and Lan, G.L. and Song, Y.Z. and Yang, Z.W.",
        TITLE = "SGIR: Star Graph-Based Interaction for Efficient and Robust
Multimodal Representation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4217-4229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117993"}

@article{bb121881,
        AUTHOR = "Zhao, W.T. and Wu, X.X.",
        TITLE = "Boosting Entity-Aware Image Captioning With Multi-Modal Knowledge
Graph",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2659-2670",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117994"}

@article{bb121882,
        AUTHOR = "Gao, J.L. and Li, J. and Jia, C.M. and Wang, S.S. and Ma, S.W. and Gao, W.",
        TITLE = "Cross Modal Compression With Variable Rate Prompt",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3444-3456",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117995"}

@inproceedings{bb121883,
        AUTHOR = "Hu, J.C. and Cavicchioli, R. and Capotondi, A.",
        TITLE = "A Request for Clarity over the End of Sequence Token in the
Self-critical Sequence Training",
        BOOKTITLE = CIAP23,
        YEAR = "2023",
        PAGES = "I:39-50",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117996"}

@inproceedings{bb121884,
        AUTHOR = "Hu, W.Z. and Wang, L. and Xu, L.",
        TITLE = "Spatial-Semantic Attention for Grounded Image Captioning",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "61-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117997"}

@inproceedings{bb121885,
        AUTHOR = "Sharif, N. and Jalwana, M.A.A.K. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "Leveraging Linguistically-aware Object Relations and NASNet for Image
Captioning",
        BOOKTITLE = IVCNZ20,
        YEAR = "2020",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117998"}

@inproceedings{bb121886,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "Beyond a Pre-Trained Object Detector: Cross-Modal Textual and Visual
Context for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17948-17958",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT117999"}

@inproceedings{bb121887,
        AUTHOR = "Zhou, M.Y. and Zhou, L.W. and Wang, S.H. and Cheng, Y. and Li, L.J. and Yu, Z. and Liu, J.J.",
        TITLE = "UC2: Universal Cross-lingual Cross-modal Vision-and-Language
Pre-training",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4153-4163",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118000"}

@inproceedings{bb121888,
        AUTHOR = "Laina, I. and Rupprecht, C. and Navab, N.",
        TITLE = "Towards Unsupervised Image Captioning With Shared Multimodal
Embeddings",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7413-7423",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118001"}

@inproceedings{bb121889,
        AUTHOR = "Akbari, H. and Karaman, S. and Bhargava, S. and Chen, B. and Vondrick, C. and Chang, S.F.",
        TITLE = "Multi-Level Multimodal Common Semantic Space for Image-Phrase Grounding",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12468-12478",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118002"}

@inproceedings{bb121890,
        AUTHOR = "Chen, T.H. and Liao, Y.H. and Chuang, C.Y. and Hsu, W.T. and Fu, J. and Sun, M.",
        TITLE = "Show, Adapt and Tell:
Adversarial Training of Cross-Domain Image Captioner",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "521-530",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118003"}

@inproceedings{bb121891,
        AUTHOR = "Pini, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Towards Video Captioning with Naming:
A Novel Dataset and a Multi-modal Approach",
        BOOKTITLE = CIAP17,
        YEAR = "2017",
        PAGES = "II:384-395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118004"}

@inproceedings{bb121892,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C.",
        TITLE = "MMSS: Graph-based Multi-modal Story-oriented Video Summarization and
Retrieval",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "2004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118005"}

@inproceedings{bb121893,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C. and Duygulu, P.",
        TITLE = "GCap: Graph-based Automatic Image Captioning",
        BOOKTITLE = MMDE04,
        YEAR = "2004",
        PAGES = "146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118006"}

@inproceedings{bb121894,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = CMU-CS,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118007"}

@inproceedings{bb121895,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = Ph.D.,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT118007"}

@article{bb121896,
        AUTHOR = "Yu, J. and Li, J. and Yu, Z. and Huang, Q.",
        TITLE = "Multimodal Transformer With Multi-View Visual Representation for
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "4467-4480",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT118008"}

@article{bb121897,
        AUTHOR = "Zhang, Y. and Shi, X.Y. and Mi, S. and Yang, X.",
        TITLE = "Image captioning with transformer and knowledge graph",
        JOURNAL = PRL,
        VOLUME = "143",
        YEAR = "2021",
        PAGES = "43-49",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT118009"}

@article{bb121898,
        AUTHOR = "Yan, C.G. and Hao, Y.M. and Li, L. and Yin, J. and Liu, A. and Mao, Z. and Chen, Z.Y. and Gao, X.Y.",
        TITLE = "Task-Adaptive Attention for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "43-51",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT118010"}

@article{bb121899,
        AUTHOR = "Ren, Z.H. and Gou, S.P. and Guo, Z. and Mao, S.S. and Li, R.M.",
        TITLE = "A Mask-Guided Transformer Network with Topic Token for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "12",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT118011"}

Last update:Apr 18, 2024 at 11:38:49