@inproceedings{bb135900,
AUTHOR = "Cai, S.J. and Zuo, W.M. and Zhang, L.",
TITLE = "Higher-Order Integration of Hierarchical Convolutional Activations
for Fine-Grained Visual Categorization",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "511-520",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131895"}
@inproceedings{bb135901,
AUTHOR = "Kong, S. and Fowlkes, C.C.",
TITLE = "Pixel-Wise Attentional Gating for Scene Parsing",
BOOKTITLE = WACV19,
YEAR = "2019",
PAGES = "1024-1033",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131896"}
@inproceedings{bb135902,
AUTHOR = "Kong, S. and Fowlkes, C.C.",
TITLE = "Recurrent Scene Parsing with Perspective Understanding in the Loop",
BOOKTITLE = CVPR18,
YEAR = "2018",
PAGES = "956-965",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131897"}
@inproceedings{bb135903,
AUTHOR = "Zheng, H. and Fu, J. and Mei, T. and Luo, J.",
TITLE = "Learning Multi-attention Convolutional Neural Network for
Fine-Grained Image Recognition",
BOOKTITLE = ICCV17,
YEAR = "2017",
PAGES = "5219-5227",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131898"}
@inproceedings{bb135904,
AUTHOR = "Fu, J. and Zheng, H. and Mei, T.",
TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural
Network for Fine-Grained Image Recognition",
BOOKTITLE = CVPR17,
YEAR = "2017",
PAGES = "4476-4484",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131899"}
@inproceedings{bb135905,
AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.",
TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object
Classification",
BOOKTITLE = DICTA16,
YEAR = "2016",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131900"}
@inproceedings{bb135906,
AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.",
TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN
Learning and Multi-class Regression",
BOOKTITLE = MMMod17,
YEAR = "2017",
PAGES = "I: 176-188",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131901"}
@inproceedings{bb135907,
AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.",
TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns",
BOOKTITLE = CVPR16,
YEAR = "2016",
PAGES = "3557-3565",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131902"}
@inproceedings{bb135908,
AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.",
TITLE = "LR-CNN for fine-grained classification with varying resolution",
BOOKTITLE = ICIP15,
YEAR = "2015",
PAGES = "3101-3105",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131903"}
@inproceedings{bb135909,
AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.",
TITLE = "Fine-grained classification via mixture of deep convolutional neural
networks",
BOOKTITLE = WACV16,
YEAR = "2016",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131904"}
@inproceedings{bb135910,
AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.",
TITLE = "Modelling local deep convolutional neural network features to improve
fine-grained image classification",
BOOKTITLE = ICIP15,
YEAR = "2015",
PAGES = "4112-4116",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131905"}
@article{bb135911,
AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.",
TITLE = "Integrating Natural-Language Understanding with
Document Structure-Analysis",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1994",
NUMBER = "2-3",
PAGES = "255-276",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131906"}
@article{bb135912,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing: Theory",
JOURNAL = AIR,
VOLUME = "9",
YEAR = "1995",
NUMBER = "4-5",
MONTH = "October",
PAGES = "247-250",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131907"}
@book{bb135913,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing: Theory",
PUBLISHER = "Springer",
YEAR = "1995",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131907"}
@article{bb135914,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing:
Intelligent Multimedia",
JOURNAL = AIR,
VOLUME = "9",
YEAR = "1995",
NUMBER = "2-3",
MONTH = "June",
PAGES = "77-80",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131908"}
@article{bb135915,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing:
More Computational Models and Systems",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1995",
NUMBER = "5-6",
PAGES = "345-348",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131909"}
@article{bb135916,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1994",
NUMBER = "2-3",
PAGES = "99-104",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131910"}
@book{bb135917,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
PUBLISHER = "Kluwer",
YEAR = "1995",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131910"}
@article{bb135918,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural-Language and Vision Processing:
Grounding Representations",
JOURNAL = AIR,
VOLUME = "10",
YEAR = "1996",
NUMBER = "1-2",
MONTH = "April",
PAGES = "7-13",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131911"}
@book{bb135919,
AUTHOR = "McKevitt, P.",
TITLE = "Integration of Natural Language and Vision Processing",
PUBLISHER = "Kluwer",
YEAR = "1996",
MONTH = "September",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131912"}
@article{bb135920,
AUTHOR = "Siskind, J.M.",
TITLE = "Grounding Language in Perception",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1995",
NUMBER = "5-6",
PAGES = "371-391",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131913"}
@article{bb135921,
AUTHOR = "Srihari, R.K.",
TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images",
JOURNAL = Computer,
VOLUME = "28",
YEAR = "1995",
NUMBER = "9",
MONTH = "September",
PAGES = "49-56",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131914"}
@article{bb135922,
AUTHOR = "Srihari, R.K.",
TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1995",
NUMBER = "5-6",
PAGES = "409-430",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131915"}
@article{bb135923,
AUTHOR = "Srihari, R.K.",
TITLE = "Use of Collateral Text in Understanding Photos in Documents",
JOURNAL = SPIE,
VOLUME = "2368",
YEAR = "1994",
PAGES = "186-199",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131916"}
@article{bb135924,
AUTHOR = "Srihari, R.K.",
TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
JOURNAL = AIR,
VOLUME = "8",
YEAR = "1995",
NUMBER = "5-6",
PAGES = "349-369",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131917"}
@inproceedings{bb135925,
AUTHOR = "Srihari, R.K.",
TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
BOOKTITLE = "Radius97",
YEAR = "1997",
PAGES = "419-427",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131918"}
@inproceedings{bb135926,
AUTHOR = "Srihari, R.K.",
TITLE = "Linguistic Context In Vision",
BOOKTITLE = Context95,
YEAR = "1995",
PAGES = "xx",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131919"}
@inproceedings{bb135927,
AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
TITLE = "Using Speech Input for Image Interpretation and Annotation",
BOOKTITLE = ARPA96,
YEAR = "1996",
PAGES = "501-510",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131920"}
@inproceedings{bb135928,
AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
TITLE = "Use of Collateral Text in Image Interpretation",
BOOKTITLE = ARPA94,
YEAR = "1994",
PAGES = "II:897-907",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131921"}
@inproceedings{bb135929,
AUTHOR = "Srihari, R.K. and Burhans, D.T.",
TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
BOOKTITLE = AAAI-94,
YEAR = "1994",
PAGES = "793-798",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131922"}
@inproceedings{bb135930,
AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
TITLE = "Caption-Aided Face Location In Newspaper Photographs",
BOOKTITLE = ICPR92,
YEAR = "1992",
PAGES = "I:474-477",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131923"}
@inproceedings{bb135931,
AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
BOOKTITLE = AAAI-92,
YEAR = "1992",
PAGES = "350-355",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131924"}
@article{bb135932,
AUTHOR = "Schank, R.C. and Fano, A.",
TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
JOURNAL = AIR,
VOLUME = "9",
YEAR = "1995",
NUMBER = "4-5",
MONTH = "October",
PAGES = "261-271",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131925"}
@article{bb135933,
AUTHOR = "Wilks, Y.",
TITLE = "Language, Vision and Metaphor",
JOURNAL = AIR,
VOLUME = "9",
YEAR = "1995",
NUMBER = "4-5",
MONTH = "October",
PAGES = "273-289",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131926"}
@article{bb135934,
AUTHOR = "Partridge, D.",
TITLE = "Language and Vision: A Single Perceptual Mechanism",
JOURNAL = AIR,
VOLUME = "9",
YEAR = "1995",
NUMBER = "4-5",
MONTH = "October",
PAGES = "291-303",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131927"}
@article{bb135935,
AUTHOR = "Marconi, D.",
TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
JOURNAL = AIR,
VOLUME = "10",
YEAR = "1996",
NUMBER = "1-2",
MONTH = "April",
PAGES = "15-20",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131928"}
@article{bb135936,
AUTHOR = "Meini, C. and Paternoster, A.",
TITLE = "Understanding Language Through Vision",
JOURNAL = AIR,
VOLUME = "10",
YEAR = "1996",
NUMBER = "1-2",
MONTH = "April",
PAGES = "37-48",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131929"}
@article{bb135937,
AUTHOR = "McKevitt, P. and Guo, C.M.",
TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
JOURNAL = AIR,
VOLUME = "10",
YEAR = "1996",
NUMBER = "1-2",
MONTH = "April",
PAGES = "49-63",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131930"}
@article{bb135938,
AUTHOR = "Grumbach, A.",
TITLE = "Grounding Symbols into Perceptions",
JOURNAL = AIR,
VOLUME = "10",
YEAR = "1996",
NUMBER = "1-2",
MONTH = "April",
PAGES = "131-146",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131931"}
@article{bb135939,
AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
JOURNAL = IVC,
VOLUME = "18",
YEAR = "2000",
NUMBER = "2",
MONTH = "January",
PAGES = "155-172",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131932"}
@article{bb135940,
AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
TITLE = "Conceptual description of visual scenes from linguistic models",
JOURNAL = IVC,
VOLUME = "18",
YEAR = "2000",
NUMBER = "2",
MONTH = "January",
PAGES = "173-187",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131933"}
@article{bb135941,
AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
TITLE = "Conceptual representations between video signals and natural language
descriptions",
JOURNAL = IVC,
VOLUME = "26",
YEAR = "2008",
NUMBER = "1",
MONTH = "January",
PAGES = "53-66",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131934"}
@inproceedings{bb135942,
AUTHOR = "Gerber, R. and Nagel, H.H.",
TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
BOOKTITLE = ECCV98,
YEAR = "1998",
PAGES = "II: 255",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131935"}
@article{bb135943,
AUTHOR = "Lee, Y.J. and Grauman, K.",
TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
JOURNAL = PAMI,
VOLUME = "34",
YEAR = "2012",
NUMBER = "2",
MONTH = "February",
PAGES = "346-358",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131936"}
@inproceedings{bb135944,
AUTHOR = "Lee, Y.J. and Grauman, K.",
TITLE = "Object-graphs for context-aware category discovery",
BOOKTITLE = CVPR10,
YEAR = "2010",
PAGES = "1-8",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131937"}
@inproceedings{bb135945,
AUTHOR = "Lee, Y.J. and Grauman, K.",
TITLE = "Learning the easy things first: Self-paced visual category discovery",
BOOKTITLE = CVPR11,
YEAR = "2011",
PAGES = "1721-1728",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131938"}
@article{bb135946,
AUTHOR = "Yu, A. and Grauman, K.",
TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
JOURNAL = IJCV,
VOLUME = "128",
YEAR = "2020",
NUMBER = "10-11",
MONTH = "November",
PAGES = "2704-2730",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131939"}
@inproceedings{bb135947,
AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.",
TITLE = "The Abduction of Sherlock Holmes:
A Dataset for Visual Abductive Reasoning",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVI:558-575",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131940"}
@inproceedings{bb135948,
AUTHOR = "Perona, P.",
TITLE = "A taxonomy of visual recognition",
BOOKTITLE = VMV04,
YEAR = "2004",
PAGES = "187",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131941"}
@inproceedings{bb135949,
AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
BOOKTITLE = ICPR98,
YEAR = "1998",
PAGES = "Vol II: 1216-1218",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131942"}
@inproceedings{bb135950,
AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration
of Image and Natural Language Processing",
BOOKTITLE = IJCAI97,
YEAR = "1997",
PAGES = "1488-1495",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131943"}
@inproceedings{bb135951,
AUTHOR = "Satoh, S. and Kanade, T.",
TITLE = "Name-It: Association Of Face And Name In Video",
BOOKTITLE = CVPR97,
YEAR = "1997",
PAGES = "368-373",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131944"}
@inproceedings{bb135952,
AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
BOOKTITLE = ICIP96,
YEAR = "1996",
PAGES = "II: 809-812",
BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT131945"}
@article{bb135953,
AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
JOURNAL = RA,
VOLUME = "10",
YEAR = "1994",
PAGES = "799-822",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131946"}
@inproceedings{bb135954,
AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
BOOKTITLE = ECCV92,
YEAR = "1992",
PAGES = "874-878",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131947"}
@article{bb135955,
AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
JOURNAL = IJCV,
VOLUME = "88",
YEAR = "2010",
NUMBER = "2",
MONTH = "June",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131948"}
@inproceedings{bb135956,
AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
TITLE = "A hierarchical and contextual model for aerial image understanding",
BOOKTITLE = CVPR08,
YEAR = "2008",
PAGES = "1-8",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131949"}
@inproceedings{bb135957,
AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
TITLE = "Learning mixed templates for object recognition",
BOOKTITLE = CVPR09,
YEAR = "2009",
PAGES = "272-279",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131950"}
@article{bb135958,
AUTHOR = "Tu, Z.W. and Bai, X.",
TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
JOURNAL = PAMI,
VOLUME = "32",
YEAR = "2010",
NUMBER = "10",
MONTH = "October",
PAGES = "1744-1757",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131951"}
@inproceedings{bb135959,
AUTHOR = "Tu, Z.W.",
TITLE = "Auto-context and its application to high-level vision tasks",
BOOKTITLE = CVPR08,
YEAR = "2008",
PAGES = "1-8",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131952"}
@inproceedings{bb135960,
AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
BOOKTITLE = WACV19,
YEAR = "2019",
PAGES = "426-434",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131953"}
@inproceedings{bb135961,
AUTHOR = "Lampert, C.H.",
TITLE = "Partitioning of image datasets using discriminative context information",
BOOKTITLE = CVPR08,
YEAR = "2008",
PAGES = "1-8",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131954"}
@inproceedings{bb135962,
AUTHOR = "Hansen, C. and Henderson, T.C.",
TITLE = "Towards the Automatic Generation of Recognition Strategies",
BOOKTITLE = ICCV88,
YEAR = "1988",
PAGES = "275-279",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131955"}
@inproceedings{bb135963,
AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
BOOKTITLE = ICPR88,
YEAR = "1988",
PAGES = "I: 371-374",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131956"}
@inproceedings{bb135964,
AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
TITLE = "Object Recognition Strategy in a Multi-Resolution System",
BOOKTITLE = CIAP89,
YEAR = "1989",
PAGES = "729-733",
BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT131957"}
@article{bb135965,
AUTHOR = "Feng, Y.S. and Lapata, M.",
TITLE = "Automatic Caption Generation for News Images",
JOURNAL = PAMI,
VOLUME = "35",
YEAR = "2013",
NUMBER = "4",
MONTH = "April",
PAGES = "797-812",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131958"}
@article{bb135966,
AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
JOURNAL = PAMI,
VOLUME = "39",
YEAR = "2017",
NUMBER = "4",
MONTH = "April",
PAGES = "652-663",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131959"}
@inproceedings{bb135967,
AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
TITLE = "Show and tell: A neural image caption generator",
BOOKTITLE = CVPR15,
YEAR = "2015",
PAGES = "3156-3164",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131960"}
@article{bb135968,
AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
TITLE = "Discovering visual concept structure with sparse and incomplete tags",
JOURNAL = AI,
VOLUME = "250",
YEAR = "2017",
NUMBER = "1",
PAGES = "16-36",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131961"}
@article{bb135969,
AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
TITLE = "Data-driven image captioning via salient region discovery",
JOURNAL = IET-CV,
VOLUME = "11",
YEAR = "2017",
NUMBER = "6",
MONTH = "September",
PAGES = "398-406",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131962"}
@article{bb135970,
AUTHOR = "He, X.D. and Deng, L.",
TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
JOURNAL = SPMag,
VOLUME = "34",
YEAR = "2017",
NUMBER = "6",
MONTH = "November",
PAGES = "109-116",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131963"}
@article{bb135971,
AUTHOR = "Deng, L. and He, X.D.",
TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
JOURNAL = SPMag,
VOLUME = "35",
YEAR = "2018",
NUMBER = "1",
MONTH = "January",
PAGES = "178",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131963"}
@article{bb135972,
AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
JOURNAL = IP,
VOLUME = "28",
YEAR = "2019",
NUMBER = "1",
MONTH = "January",
PAGES = "32-44",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131964"}
@article{bb135973,
AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
TITLE = "Disambiguating Visual Verbs",
JOURNAL = PAMI,
VOLUME = "41",
YEAR = "2019",
NUMBER = "2",
MONTH = "February",
PAGES = "311-322",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131965"}
@article{bb135974,
AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
TITLE = "Image Caption Generation with Part of Speech Guidance",
JOURNAL = PRL,
VOLUME = "119",
YEAR = "2019",
PAGES = "229-237",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131966"}
@article{bb135975,
AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.H.",
TITLE = "Dense semantic embedding network for image captioning",
JOURNAL = PR,
VOLUME = "90",
YEAR = "2019",
PAGES = "285-296",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131967"}
@article{bb135976,
AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
TITLE = "A survey on deep neural network-based image captioning",
JOURNAL = VC,
VOLUME = "35",
YEAR = "2019",
NUMBER = "3",
MONTH = "March",
PAGES = "445-470",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131968"}
@article{bb135977,
AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
JOURNAL = Surveys,
VOLUME = "51",
YEAR = "2019",
NUMBER = "6",
MONTH = "February",
PAGES = "Article No 118",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131969"}
@article{bb135978,
AUTHOR = "Li, X. and Jiang, S.",
TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs",
JOURNAL = MultMed,
VOLUME = "21",
YEAR = "2019",
NUMBER = "8",
MONTH = "August",
PAGES = "2117-2130",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131970"}
@article{bb135979,
AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
TITLE = "LCEval: Learned Composite Metric for Caption Evaluation",
JOURNAL = IJCV,
VOLUME = "127",
YEAR = "2019",
NUMBER = "10",
MONTH = "October",
PAGES = "1586-1610",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131971"}
@article{bb135980,
AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.",
TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase
Learning",
JOURNAL = IP,
VOLUME = "29",
YEAR = "2020",
NUMBER = "1",
PAGES = "694-709",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131972"}
@article{bb135981,
AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
JOURNAL = MultMed,
VOLUME = "21",
YEAR = "2019",
NUMBER = "11",
MONTH = "November",
PAGES = "2942-2956",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131973"}
@article{bb135982,
AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
TITLE = "Modeling coverage with semantic embedding for image caption generation",
JOURNAL = VC,
VOLUME = "35",
YEAR = "2018",
NUMBER = "11",
MONTH = "November",
PAGES = "1655-1665",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131974"}
@article{bb135983,
AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.",
TITLE = "Leveraging unpaired out-of-domain data for image captioning",
JOURNAL = PRL,
VOLUME = "132",
YEAR = "2020",
PAGES = "132-140",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131975"}
@article{bb135984,
AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.",
TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning
Framework for Image Captioning",
JOURNAL = MultMed,
VOLUME = "22",
YEAR = "2020",
NUMBER = "5",
MONTH = "May",
PAGES = "1372-1383",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131976"}
@article{bb135985,
AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.",
TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning",
JOURNAL = MultMed,
VOLUME = "22",
YEAR = "2020",
NUMBER = "8",
MONTH = "August",
PAGES = "2149-2162",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131977"}
@article{bb135986,
AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.",
TITLE = "Cascaded Revision Network for Novel Object Captioning",
JOURNAL = CirSysVideo,
VOLUME = "30",
YEAR = "2020",
NUMBER = "10",
MONTH = "October",
PAGES = "3413-3421",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131978"}
@article{bb135987,
AUTHOR = "Shilpa, M. and He, J. and Zhao, Y.J. and Sun, B. and Yu, L.J.",
TITLE = "Feedback evaluations to promote image captioning",
JOURNAL = IET-IPR,
VOLUME = "14",
YEAR = "2020",
NUMBER = "13",
MONTH = "November",
PAGES = "3021-3027",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131979"}
@article{bb135988,
AUTHOR = "Liu, H. and Zhang, S. and Lin, K. and Wen, J. and Li, J. and Hu, X.",
TITLE = "Vocabulary-Wide Credit Assignment for Training Image Captioning
Models",
JOURNAL = IP,
VOLUME = "30",
YEAR = "2021",
PAGES = "2450-2460",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131980"}
@article{bb135989,
AUTHOR = "Xu, N. and Tian, H.S. and Wang, Y.H. and Nie, W.Z. and Song, D. and Liu, A.A. and Liu, W.",
TITLE = "Coupled-dynamic learning for vision and language:
Exploring Interaction between different tasks",
JOURNAL = PR,
VOLUME = "113",
YEAR = "2021",
PAGES = "107829",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131981"}
@article{bb135990,
AUTHOR = "Yang, L. and Wang, H. and Tang, P. and Li, Q.",
TITLE = "CaptionNet: A Tailor-made Recurrent Neural Network for Generating
Image Descriptions",
JOURNAL = MultMed,
VOLUME = "23",
YEAR = "2021",
PAGES = "835-845",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131982"}
@article{bb135991,
AUTHOR = "Liu, A.A. and Wang, Y.H. and Xu, N. and Liu, S. and Li, X.Y.",
TITLE = "Scene-Graph-Guided message passing network for dense captioning",
JOURNAL = PRL,
VOLUME = "145",
YEAR = "2021",
PAGES = "187-193",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131983"}
@article{bb135992,
AUTHOR = "Zhang, L. and Zhang, Y.S. and Zhao, X. and Zou, Z.X.",
TITLE = "Image captioning via proximal policy optimization",
JOURNAL = IVC,
VOLUME = "108",
YEAR = "2021",
PAGES = "104126",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131984"}
@article{bb135993,
AUTHOR = "Wu, J. and Chen, T.S. and Wu, H.F. and Yang, Z. and Luo, G.C. and Lin, L.",
TITLE = "Fine-Grained Image Captioning With Global-Local Discriminative
Objective",
JOURNAL = MultMed,
VOLUME = "23",
YEAR = "2021",
PAGES = "2413-2427",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131985"}
@article{bb135994,
AUTHOR = "Wu, L.X. and Xu, M. and Sang, L. and Yao, T. and Mei, T.",
TITLE = "Noise Augmented Double-Stream Graph Convolutional Networks for Image
Captioning",
JOURNAL = CirSysVideo,
VOLUME = "31",
YEAR = "2021",
NUMBER = "8",
MONTH = "August",
PAGES = "3118-3127",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131986"}
@article{bb135995,
AUTHOR = "Nivedita, M. and Chandrashekar, P. and Mahapatra, S. and Phamila, Y.A.V. and Selvaperumal, S.K.",
TITLE = "Image Captioning for Video Surveillance System using Neural Networks",
JOURNAL = IJIG,
VOLUME = "21",
YEAR = "2021",
NUMBER = "4",
MONTH = "October",
PAGES = "2150044",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131987"}
@article{bb135996,
AUTHOR = "Zha, Z.J. and Liu, D. and Zhang, H.W. and Zhang, Y.D. and Wu, F.",
TITLE = "Context-Aware Visual Policy Network for Fine-Grained Image Captioning",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "2",
MONTH = "February",
PAGES = "710-722",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131988"}
@article{bb135997,
AUTHOR = "Luo, G.F. and Cheng, L.J. and Jing, C. and Zhao, C. and Song, G.Z.",
TITLE = "A thorough review of models, evaluation metrics, and datasets on
image captioning",
JOURNAL = IET-IPR,
VOLUME = "16",
YEAR = "2022",
NUMBER = "2",
PAGES = "311-332",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131989"}
@article{bb135998,
AUTHOR = "Ben, H.X. and Pan, Y.W. and Li, Y. and Yao, T. and Hong, R.C. and Wang, M. and Mei, T.",
TITLE = "Unpaired Image Captioning With semantic-Constrained Self-Learning",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "904-916",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131990"}
@article{bb135999,
AUTHOR = "Song, P.P. and Guo, D. and Zhou, J.X. and Xu, M.L. and Wang, M.",
TITLE = "Memorial GAN With Joint Semantic Optimization for Unpaired Image
Captioning",
JOURNAL = Cyber,
VOLUME = "53",
YEAR = "2023",
NUMBER = "7",
MONTH = "July",
PAGES = "4388-4399",
BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT131991"}
Last update:Apr 23, 2026 at 15:05:02