@inproceedings{bb115700,
        AUTHOR = "Liu, J.Y. and Wang, L. and Yang, M.H.",
        TITLE = "Referring Expression Generation and Comprehension via Attributes",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4866-4874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111897"}

@inproceedings{bb115701,
        AUTHOR = "Dai, B. and Fidler, S. and Urtasun, R. and Lin, D.",
        TITLE = "Towards Diverse and Natural Image Descriptions via a Conditional GAN",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2989-2998",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111898"}

@inproceedings{bb115702,
        AUTHOR = "Liang, X. and Hu, Z. and Zhang, H. and Gan, C. and Xing, E.P.",
        TITLE = "Recurrent Topic-Transition GAN for Visual Paragraph Generation",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "3382-3391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111899"}

@inproceedings{bb115703,
        AUTHOR = "Shetty, R. and Rohrbach, M. and Hendricks, L.A. and Fritz, M. and Schiele, B.",
        TITLE = "Speaking the Same Language:
Matching Machine to Human Captions by Adversarial Training",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4155-4164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111900"}

@inproceedings{bb115704,
        AUTHOR = "Liu, S. and Zhu, Z. and Ye, N. and Guadarrama, S. and Murphy, K.",
        TITLE = "Improved Image Captioning via Policy Gradient optimization of SPIDEr",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "873-881",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111901"}

@inproceedings{bb115705,
        AUTHOR = "Gu, J.X. and Joty, S. and Cai, J.F. and Wang, G.",
        TITLE = "Unpaired Image Captioning by Language Pivoting",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "I: 519-535",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111902"}

@inproceedings{bb115706,
        AUTHOR = "Gu, J.X. and Wang, G. and Cai, J.F. and Chen, T.H.",
        TITLE = "An Empirical Study of Language CNN for Image Captioning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1231-1240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111903"}

@inproceedings{bb115707,
        AUTHOR = "Pedersoli, M. and Lucas, T. and Schmid, C. and Verbeek, J.",
        TITLE = "Areas of Attention for Image Captioning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1251-1259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111904"}

@inproceedings{bb115708,
        AUTHOR = "Zhang, Z. and Wu, J.J. and Li, Q. and Huang, Z. and Traer, J. and McDermott, J.H. and Tenenbaum, J.B. and Freeman, W.T.",
        TITLE = "Generative Modeling of Audible Shapes for Object Perception",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1260-1269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111905"}

@inproceedings{bb115709,
        AUTHOR = "Liu, Z.J. and Freeman, W.T. and Tenenbaum, J.B. and Wu, J.J.",
        TITLE = "Physical Primitive Decomposition",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XII: 3-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111906"}

@inproceedings{bb115710,
        AUTHOR = "Wu, J.J. and Lim, J. and Zhang, H.Y. and Tenenbaum, J.B. and Freeman, W.T.",
        TITLE = "Physics 101: Learning Physical Object Properties from Unlabeled Videos",
        BOOKTITLE = BMVC16,
        YEAR = "2016",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111907"}

@inproceedings{bb115711,
        AUTHOR = "Tavakoliy, H.R. and Shetty, R. and Borji, A. and Laaksonen, J.",
        TITLE = "Paying Attention to Descriptions Generated by Image Captioning Models",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2506-2515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111908"}

@inproceedings{bb115712,
        AUTHOR = "Tripathi, A. and Gupta, A. and Chaudhary, S. and Lall, B.",
        TITLE = "Image Annotation Using Latent Components and Transmedia Association",
        BOOKTITLE = PReMI17,
        YEAR = "2017",
        PAGES = "493-500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111909"}

@inproceedings{bb115713,
        AUTHOR = "Wu, B.Y. and Jia, F. and Liu, W. and Ghanem, B.",
        TITLE = "Diverse Image Annotation",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6194-6202",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111910"}

@inproceedings{bb115714,
        AUTHOR = "Krause, J. and Johnson, J. and Krishna, R. and Fei Fei, L.",
        TITLE = "A Hierarchical Approach for Generating Descriptive Image Paragraphs",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3337-3345",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111911"}

@inproceedings{bb115715,
        AUTHOR = "Vedantam, R. and Bengio, S. and Murphy, K. and Parikh, D. and Chechik, G.",
        TITLE = "Context-Aware Captions from Context-Agnostic Supervision",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1070-1079",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111912"}

@inproceedings{bb115716,
        AUTHOR = "Gan, Z. and Gan, C. and He, X. and Pu, Y. and Tran, K. and Gao, J. and Carin, L. and Deng, L.",
        TITLE = "Semantic Compositional Networks for Visual Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1141-1150",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111913"}

@inproceedings{bb115717,
        AUTHOR = "Ren, Z. and Wang, X. and Zhang, N. and Lv, X. and Li, L.J.",
        TITLE = "Deep Reinforcement Learning-Based Image Captioning with Embedding
Reward",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1151-1159",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111914"}

@inproceedings{bb115718,
        AUTHOR = "Rennie, S.J. and Marcheret, E. and Mroueh, Y. and Ross, J. and Goel, V.",
        TITLE = "Self-Critical Sequence Training for Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1179-1195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111915"}

@inproceedings{bb115719,
        AUTHOR = "Yang, L. and Tang, K. and Yang, J. and Li, L.J.",
        TITLE = "Dense Captioning with Joint Inference and Visual Context",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1978-1987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111916"}

@inproceedings{bb115720,
        AUTHOR = "Lu, J. and Xiong, C. and Parikh, D. and Socher, R.",
        TITLE = "Knowing When to Look: Adaptive Attention via a Visual Sentinel for
Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3242-3250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111917"}

@inproceedings{bb115721,
        AUTHOR = "Yao, T. and Pan, Y. and Li, Y. and Mei, T.",
        TITLE = "Incorporating Copying Mechanism in Image Captioning for Learning
Novel Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "5263-5271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111918"}

@inproceedings{bb115722,
        AUTHOR = "Chen, L. and Zhang, H. and Xiao, J. and Nie, L. and Shao, J. and Liu, W. and Chua, T.S.",
        TITLE = "SCA-CNN: Spatial and Channel-Wise Attention in Convolutional Networks
for Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6298-6306",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111919"}

@inproceedings{bb115723,
        AUTHOR = "Sun, Q. and Lee, S. and Batra, D.",
        TITLE = "Bidirectional Beam Search: Forward-Backward Inference in Neural
Sequence Models for Fill-in-the-Blank Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7215-7223",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111920"}

@inproceedings{bb115724,
        AUTHOR = "Wang, Y. and Lin, Z. and Shen, X. and Cohen, S. and Cottrell, G.W.",
        TITLE = "Skeleton Key: Image Captioning by Skeleton-Attribute Decomposition",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "7378-7387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111921"}

@inproceedings{bb115725,
        AUTHOR = "Zanfir, M. and Marinoiu, E. and Sminchisescu, C.",
        TITLE = "Spatio-Temporal Attention Models for Grounded Video Captioning",
        BOOKTITLE = ACCV16,
        YEAR = "2016",
        PAGES = "IV: 104-119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111922"}

@inproceedings{bb115726,
        AUTHOR = "Chen, T.H. and Zeng, K.H. and Hsu, W.T. and Sun, M.",
        TITLE = "Video Captioning via Sentence Augmentation and Spatio-Temporal
Attention",
        BOOKTITLE = Assist16,
        YEAR = "2016",
        PAGES = "I: 269-286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111923"}

@inproceedings{bb115727,
        AUTHOR = "Tan, Y.H. and Chan, C.S.",
        TITLE = "phi-LSTM: A Phrase-Based Hierarchical LSTM Model for Image Captioning",
        BOOKTITLE = ACCV16,
        YEAR = "2016",
        PAGES = "V: 101-117",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111924"}

@inproceedings{bb115728,
        AUTHOR = "Weiland, L. and Hulpus, I. and Ponzetto, S.P. and Dietz, L.",
        TITLE = "Using Object Detection, NLP, and Knowledge Bases to Understand the
Message of Images",
        BOOKTITLE = MMMod17,
        YEAR = "2017",
        PAGES = "II: 405-418",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111925"}

@inproceedings{bb115729,
        AUTHOR = "Liu, Y. and Guo, Y.M. and Lew, M.S.",
        TITLE = "What Convnets Make for Image Captioning?",
        BOOKTITLE = MMMod17,
        YEAR = "2017",
        PAGES = "I: 416-428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111926"}

@inproceedings{bb115730,
        AUTHOR = "Tran, K. and He, X. and Zhang, L. and Sun, J.",
        TITLE = "Rich Image Captioning in the Wild",
        BOOKTITLE = DeepLearn-C16,
        YEAR = "2016",
        PAGES = "434-441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111927"}

@inproceedings{bb115731,
        AUTHOR = "Wang, Y.L. and Wang, S.H. and Tang, J.L. and Liu, H. and Li, B.X.",
        TITLE = "PPP: Joint Pointwise and Pairwise Image Label Prediction",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "6005-6013",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111928"}

@inproceedings{bb115732,
        AUTHOR = "Sadhu, A. and Gupta, T. and Yatskar, M. and Nevatia, R. and Kembhavi, A.",
        TITLE = "Visual Semantic Role Labeling for Video Understanding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5585-5596",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111929"}

@inproceedings{bb115733,
        AUTHOR = "Yatskar, M. and Ordonez, V. and Zettlemoyer, L. and Farhadi, A.",
        TITLE = "Commonly Uncommon: Semantic Sparsity in Situation Recognition",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6335-6344",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111930"}

@inproceedings{bb115734,
        AUTHOR = "Yatskar, M. and Zettlemoyer, L. and Farhadi, A.",
        TITLE = "Situation Recognition: Visual Semantic Role Labeling for Image
Understanding",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "5534-5542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111931"}

@inproceedings{bb115735,
        AUTHOR = "Kottur, S. and Vedantam, R. and Moura, J.M.F. and Parikh, D.",
        TITLE = "VisualWord2Vec (Vis-W2V):
Learning Visually Grounded Word Embeddings Using Abstract Scenes",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4985-4994",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111932"}

@inproceedings{bb115736,
        AUTHOR = "Zhu, Y. and Groth, O. and Bernstein, M. and Fei Fei, L.",
        TITLE = "Visual7W: Grounded Question Answering in Images",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4995-5004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111933"}

@inproceedings{bb115737,
        AUTHOR = "Zhang, P. and Goyal, Y. and Summers Stay, D. and Batra, D. and Parikh, D.",
        TITLE = "Yin and Yang: Balancing and Answering Binary Visual Questions",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "5014-5022",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111934"}

@inproceedings{bb115738,
        AUTHOR = "Park, D.H. and Darrell, T.J. and Rohrbach, A.",
        TITLE = "Robust Change Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4623-4632",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111935"}

@inproceedings{bb115739,
        AUTHOR = "Venugopalan, S. and Hendricks, L.A. and Rohrbach, M. and Mooney, R. and Darrell, T.J. and Saenko, K.",
        TITLE = "Captioning Images with Diverse Objects",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1170-1178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111936"}

@inproceedings{bb115740,
        AUTHOR = "Hendricks, L.A. and Venugopalan, S. and Rohrbach, M. and Mooney, R. and Saenko, K. and Darrell, T.J.",
        TITLE = "Deep Compositional Captioning: Describing Novel Object Categories
without Paired Training Data",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "1-10",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111937"}

@inproceedings{bb115741,
        AUTHOR = "Johnson, J. and Karpathy, A. and Fei Fei, L.",
        TITLE = "DenseCap:
Fully Convolutional Localization Networks for Dense Captioning",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4565-4574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111938"}

@inproceedings{bb115742,
        AUTHOR = "Wang, M. and Song, L. and Yang, X.K. and Luo, C.F.",
        TITLE = "A parallel-fusion RNN-LSTM architecture for image caption generation",
        BOOKTITLE = ICIP16,
        YEAR = "2016",
        PAGES = "4448-4452",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111939"}

@inproceedings{bb115743,
        AUTHOR = "Lin, X. and Parikh, D.",
        TITLE = "Leveraging Visual Question Answering for Image-Caption Ranking",
        BOOKTITLE = ECCV16,
        YEAR = "2016",
        PAGES = "II: 261-277",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111940"}

@inproceedings{bb115744,
        AUTHOR = "Lin, X. and Parikh, D.",
        TITLE = "Don't just listen, use your imagination:
Leveraging visual common sense for non-visual tasks",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "2984-2993",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111941"}

@inproceedings{bb115745,
        AUTHOR = "Chen, T.L. and Zhang, Z.P. and You, Q.Z. and Fang, C. and Wang, Z.W. and Jin, H.L. and Luo, J.B.",
        TITLE = "'Factual' or 'Emotional':
Stylized Image Captioning with Adaptive Learning and Attention",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "X: 527-543",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111942"}

@inproceedings{bb115746,
        AUTHOR = "You, Q.Z. and Jin, H.L. and Wang, Z.W. and Fang, C. and Luo, J.B.",
        TITLE = "Image Captioning with Semantic Attention",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4651-4659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111943"}

@inproceedings{bb115747,
        AUTHOR = "Jia, X. and Gavves, E. and Fernando, B. and Tuytelaars, T.",
        TITLE = "Guiding the Long-Short Term Memory Model for Image Caption Generation",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "2407-2415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111944"}

@inproceedings{bb115748,
        AUTHOR = "Chen, X.L. and Zitnick, C.L.",
        TITLE = "Mind's eye:
A recurrent visual representation for image caption generation",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "2422-2431",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111945"}

@inproceedings{bb115749,
        AUTHOR = "Vedantam, R. and Zitnick, C.L. and Parikh, D.",
        TITLE = "CIDEr: Consensus-based image description evaluation",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "4566-4575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111946"}

@inproceedings{bb115750,
        AUTHOR = "Fang, H. and Gupta, S. and Iandola, F. and Srivastava, R.K. and Deng, L. and Dollar, P. and Gao, J.F. and He, X.D. and Mitchell, M. and Platt, J.C. and Zitnick, C.L. and Zweig, G.",
        TITLE = "From captions to visual concepts and back",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "1473-1482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111947"}

@inproceedings{bb115751,
        AUTHOR = "Ramnath, K. and Baker, S. and Vanderwende, L. and El Saban, M. and Sinha, S.N. and Kannan, A. and Hassan, N. and Galley, M. and Yang, Y. and Ramanan, D. and Bergamo, A. and Torresani, L.",
        TITLE = "AutoCaption: Automatic caption generation for personal photos",
        BOOKTITLE = WACV14,
        YEAR = "2014",
        PAGES = "1050-1057",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT111948"}

@article{bb115752,
        AUTHOR = "Verma, Y. and Jawahar, C.V.",
        TITLE = "A support vector approach for cross-modal search of images and texts",
        JOURNAL = CVIU,
        VOLUME = "154",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "48-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111949"}

@inproceedings{bb115753,
        AUTHOR = "Dutta, A. and Verma, Y. and Jawahar, C.V.",
        TITLE = "Recurrent Image Annotation with Explicit Inter-Label Dependencies",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 191-207",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111950"}

@article{bb115754,
        AUTHOR = "Xue, J.F. and Eguchi, K.",
        TITLE = "Video Data Modeling Using Sequential Correspondence Hierarchical
Dirichlet Processes",
        JOURNAL = IEICE,
        VOLUME = "E100-D",
        YEAR = "2017",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "33-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111951"}

@article{bb115755,
        AUTHOR = "Liu, A.A. and Xu, N. and Wong, Y.K. and Li, J. and Su, Y.T. and Kankanhalli, M.",
        TITLE = "Hierarchical & multimodal video captioning: Discovering and
transferring multimodal knowledge for vision to language",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "113-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111952"}

@article{bb115756,
        AUTHOR = "Guan, J.N. and Wang, E.",
        TITLE = "Repeated review based image captioning for image evidence review",
        JOURNAL = SP:IC,
        VOLUME = "63",
        YEAR = "2018",
        PAGES = "141-148",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111953"}

@article{bb115757,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Towards Personalized Image Captioning via Multimodal Memory Networks",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "999-1012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111954"}

@inproceedings{bb115758,
        AUTHOR = "Park, C.C. and Kim, B. and Kim, G.",
        TITLE = "Attend to You: Personalized Image Captioning with Context Sequence
Memory Networks",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6432-6440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111955"}

@article{bb115759,
        AUTHOR = "Xian, Y. and Tian, Y.",
        TITLE = "Self-Guiding Multimodal LSTM: When We Do Not Have a Perfect Training
Dataset for Image Captioning",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "5241-5252",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111956"}

@article{bb115760,
        AUTHOR = "Yang, M. and Zhao, W. and Xu, W. and Feng, Y. and Zhao, Z. and Chen, X. and Lei, K.",
        TITLE = "Multitask Learning for Cross-Domain Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1047-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111957"}

@article{bb115761,
        AUTHOR = "Yu, N. and Hu, X. and Song, B. and Yang, J. and Zhang, J.",
        TITLE = "Topic-Oriented Image Captioning Based on Order-Embedding",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2743-2754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111958"}

@article{bb115762,
        AUTHOR = "Li, X. and Xu, C. and Wang, X. and Lan, W. and Jia, Z. and Yang, G. and Xu, J.",
        TITLE = "COCO-CN for Cross-Lingual Image Tagging, Captioning, and Retrieval",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "2347-2360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111959"}

@article{bb115763,
        AUTHOR = "Tian, C. and Tian, M. and Jiang, M.M. and Liu, H. and Deng, D.H.",
        TITLE = "How much do cross-modal related semantics benefit image captioning by
weighting attributes and re-ranking sentences?",
        JOURNAL = PRL,
        VOLUME = "125",
        YEAR = "2019",
        PAGES = "639-645",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111960"}

@article{bb115764,
        AUTHOR = "Niu, Y. and Lu, Z. and Wen, J. and Xiang, T. and Chang, S.",
        TITLE = "Multi-Modal Multi-Scale Deep Learning for Large-Scale Image
Annotation",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1720-1731",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111961"}

@article{bb115765,
        AUTHOR = "Huang, Y. and Chen, J. and Ouyang, W. and Wan, W. and Xue, Y.",
        TITLE = "Image Captioning With End-to-End Attribute Detection and Subsequent
Attributes Prediction",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        PAGES = "4013-4026",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111962"}

@article{bb115766,
        AUTHOR = "Zhao, W. and Wu, X. and Luo, J.",
        TITLE = "Cross-Domain Image Captioning via Cross-Modal Retrieval and Model
Adaptation",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "1180-1192",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111963"}

@article{bb115767,
        AUTHOR = "Wang, H. and Du, Y.T. and Zhang, G.X. and Cai, Z.M. and Su, C.",
        TITLE = "Learning Fundamental Visual Concepts Based on Evolved Multi-Edge
Concept Graph",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "4400-4413",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111964"}

@inproceedings{bb115768,
        AUTHOR = "Kuo, C.W. and Kira, Z.",
        TITLE = "Beyond a Pre-Trained Object Detector: Cross-Modal Textual and Visual
Context for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17948-17958",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111965"}

@inproceedings{bb115769,
        AUTHOR = "Zhou, M.Y. and Zhou, L.W. and Wang, S.H. and Cheng, Y. and Li, L.J. and Yu, Z. and Liu, J.J.",
        TITLE = "UC2: Universal Cross-lingual Cross-modal Vision-and-Language
Pre-training",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4153-4163",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111966"}

@inproceedings{bb115770,
        AUTHOR = "Laina, I. and Rupprecht, C. and Navab, N.",
        TITLE = "Towards Unsupervised Image Captioning With Shared Multimodal
Embeddings",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7413-7423",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111967"}

@inproceedings{bb115771,
        AUTHOR = "Akbari, H. and Karaman, S. and Bhargava, S. and Chen, B. and Vondrick, C. and Chang, S.F.",
        TITLE = "Multi-Level Multimodal Common Semantic Space for Image-Phrase Grounding",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "12468-12478",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111968"}

@inproceedings{bb115772,
        AUTHOR = "Chen, T.H. and Liao, Y.H. and Chuang, C.Y. and Hsu, W.T. and Fu, J. and Sun, M.",
        TITLE = "Show, Adapt and Tell:
Adversarial Training of Cross-Domain Image Captioner",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "521-530",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111969"}

@inproceedings{bb115773,
        AUTHOR = "Niu, Z.X. and Zhou, M. and Wang, L. and Gao, X.B. and Hua, G.",
        TITLE = "Hierarchical Multimodal LSTM for Dense Visual-Semantic Embedding",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1899-1907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111970"}

@inproceedings{bb115774,
        AUTHOR = "Pini, S. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Towards Video Captioning with Naming:
A Novel Dataset and a Multi-modal Approach",
        BOOKTITLE = CIAP17,
        YEAR = "2017",
        PAGES = "II:384-395",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111971"}

@inproceedings{bb115775,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C.",
        TITLE = "MMSS: Graph-based Multi-modal Story-oriented Video Summarization and
Retrieval",
        BOOKTITLE = CMU-CS-TR,
        YEAR = "2004",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111972"}

@inproceedings{bb115776,
        AUTHOR = "Pan, J.Y. and Yang, H.J. and Faloutsos, C. and Duygulu, P.",
        TITLE = "GCap: Graph-based Automatic Image Captioning",
        BOOKTITLE = MMDE04,
        YEAR = "2004",
        PAGES = "146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111973"}

@inproceedings{bb115777,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = CMU-CS,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111974"}

@inproceedings{bb115778,
        AUTHOR = "Pan, J.Y.",
        TITLE = "Advanced Tools for Video and Multimedia Mining",
        BOOKTITLE = Ph.D.,
        YEAR = "2006",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607mmic3.html#TT111974"}

@article{bb115779,
        AUTHOR = "Yu, J. and Li, J. and Yu, Z. and Huang, Q.",
        TITLE = "Multimodal Transformer With Multi-View Visual Representation for
Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "4467-4480",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111975"}

@article{bb115780,
        AUTHOR = "Zhang, Y. and Shi, X.Y. and Mi, S. and Yang, X.",
        TITLE = "Image captioning with transformer and knowledge graph",
        JOURNAL = PRL,
        VOLUME = "143",
        YEAR = "2021",
        PAGES = "43-49",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111976"}

@article{bb115781,
        AUTHOR = "Yan, C.G. and Hao, Y.M. and Li, L. and Yin, J. and Liu, A. and Mao, Z. and Chen, Z.Y. and Gao, X.Y.",
        TITLE = "Task-Adaptive Attention for Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "43-51",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111977"}

@article{bb115782,
        AUTHOR = "Yuan, J. and Zhu, S. and Huang, S.Y. and Zhang, H.W. and Xiao, Y.Q. and Li, Z.Y. and Wang, M.",
        TITLE = "Discriminative Style Learning for Cross-Domain Image Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "1723-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111978"}

@inproceedings{bb115783,
        AUTHOR = "Zhou, Y. and Zhang, Y. and Hu, Z.Z. and Wang, M.",
        TITLE = "Semi-Autoregressive Transformer for Image Captioning",
        BOOKTITLE = CLVL21,
        YEAR = "2021",
        PAGES = "3132-3136",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111979"}

@article{bb115784,
        AUTHOR = "Ren, Z.H. and Gou, S.P. and Guo, Z. and Mao, S.S. and Li, R.M.",
        TITLE = "A Mask-Guided Transformer Network with Topic Token for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "12",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111980"}

@article{bb115785,
        AUTHOR = "Ji, J.Y. and Ma, Y. and Sun, X.S. and Zhou, Y. and Wu, Y.J. and Ji, R.R.",
        TITLE = "Knowing What to Learn: A Metric-Oriented Focal Mechanism for Image
Captioning",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "4321-4335",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111981"}

@article{bb115786,
        AUTHOR = "Li, X. and Zhang, W.K. and Sun, X. and Gao, X.",
        TITLE = "Semantic-meshed and content-guided transformer for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "5",
        PAGES = "431-444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111982"}

@article{bb115787,
        AUTHOR = "Xian, T.T. and Li, Z.X. and Tang, Z.J. and Ma, H.F.",
        TITLE = "Adaptive Path Selection for Dynamic Image Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "5762-5775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111983"}

@inproceedings{bb115788,
        AUTHOR = "Vo, D.M. and Chen, H. and Sugimoto, A. and Nakayama, H.",
        TITLE = "NOC-REK: Novel Object Captioning with Retrieved Vocabulary from
External Knowledge",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17979-17987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111984"}

@inproceedings{bb115789,
        AUTHOR = "Yuan, Z.H. and Yan, X. and Liao, Y.H. and Guo, Y. and Li, G.B. and Cui, S.G. and Li, Z.",
        TITLE = "X-Trans2Cap:
Cross-Modal Knowledge Transfer using Transformer for 3D Dense Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8553-8563",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111985"}

@inproceedings{bb115790,
        AUTHOR = "Liu, B. and Wang, D. and Yang, X. and Zhou, Y. and Yao, R. and Shao, Z.W. and Zhao, J.Q.",
        TITLE = "Show, Deconfound and Tell: Image Captioning with Causal Inference",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18020-18029",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111986"}

@inproceedings{bb115791,
        AUTHOR = "Fang, Z.Y. and Wang, J.F. and Hu, X.W. and Liang, L. and Gan, Z. and Wang, L.J. and Yang, Y.Z. and Liu, Z.C.",
        TITLE = "Injecting Semantic Concepts into End-to-End Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17988-17998",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111987"}

@inproceedings{bb115792,
        AUTHOR = "Li, Y. and Pan, Y. and Yao, T. and Mei, T.",
        TITLE = "Comprehending and Ordering Semantics for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17969-17978",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111988"}

@inproceedings{bb115793,
        AUTHOR = "Hu, X.W. and Gan, Z. and Wang, J.F. and Yang, Z.Y. and Liu, Z.C. and Lu, Y. and Wang, L.J.",
        TITLE = "Scaling Up Vision-Language Pretraining for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17959-17968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111989"}

@inproceedings{bb115794,
        AUTHOR = "Fei, Z.C. and Yan, X. and Wang, S.H. and Tian, Q.",
        TITLE = "DeeCap: Dynamic Early Exiting for Efficient Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12206-12216",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111990"}

@inproceedings{bb115795,
        AUTHOR = "Wu, M.R. and Zhang, X.Y. and Sun, X.S. and Zhou, Y. and Chen, C. and Gu, J.X. and Sun, X. and Ji, R.R.",
        TITLE = "DIFNet: Boosting Visual Information Flow for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17999-18008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111991"}

@inproceedings{bb115796,
        AUTHOR = "Rio Torto, I. and Cardoso, J.S. and Teixeira, L.F.",
        TITLE = "From Captions to Explanations: A Multimodal Transformer-based
Architecture for Natural Language Explanation Generation",
        BOOKTITLE = IbPRIA22,
        YEAR = "2022",
        PAGES = "54-65",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111992"}

@inproceedings{bb115797,
        AUTHOR = "Chen, H.S. and Wang, Y. and Yang, X. and Li, J.",
        TITLE = "Captioning Transformer With Scene Graph Guiding",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "2538-2542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111993"}

@inproceedings{bb115798,
        AUTHOR = "Zhang, P.C. and Li, X.J. and Hu, X.W. and Yang, J.W. and Zhang, L. and Wang, L.J. and Choi, Y.J. and Gao, J.F.",
        TITLE = "VinVL: Revisiting Visual Representations in Vision-Language Models",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5575-5584",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111994"}

@inproceedings{bb115799,
        AUTHOR = "Zhang, X.Y. and Sun, X.S. and Luo, Y.P. and Ji, J.Y. and Zhou, Y. and Wu, Y.J. and Huang, F.Y. and Ji, R.R.",
        TITLE = "RSTNet:
Captioning with Adaptive Attention on Visual and Non-Visual Words",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15460-15469",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607tic2.html#TT111995"}

Last update:Jun 1, 2023 at 10:05:03