@inproceedings{bb127900,
        AUTHOR = "Fu, J. and Zheng, H. and Mei, T.",
        TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural
Network for Fine-Grained Image Recognition",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4476-4484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123957"}

@inproceedings{bb127901,
        AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.",
        TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object
Classification",
        BOOKTITLE = DICTA16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123958"}

@inproceedings{bb127902,
        AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.",
        TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN
Learning and Multi-class Regression",
        BOOKTITLE = MMMod17,
        YEAR = "2017",
        PAGES = "I: 176-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123959"}

@inproceedings{bb127903,
        AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.",
        TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "3557-3565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123960"}

@inproceedings{bb127904,
        AUTHOR = "Zhang, H. and Xu, T. and Elhoseiny, M. and Huang, X.L. and Zhang, S.T. and Elgammal, A.E. and Metaxas, D.N.",
        TITLE = "SPDA-CNN: Unifying Semantic Part Detection and Abstraction for
Fine-Grained Recognition",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "1143-1152",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123961"}

@inproceedings{bb127905,
        AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.",
        TITLE = "LR-CNN for fine-grained classification with varying resolution",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3101-3105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123962"}

@inproceedings{bb127906,
        AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.",
        TITLE = "Fine-grained classification via mixture of deep convolutional neural
networks",
        BOOKTITLE = WACV16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123963"}

@inproceedings{bb127907,
        AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.",
        TITLE = "Modelling local deep convolutional neural network features to improve
fine-grained image classification",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "4112-4116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123964"}

@inproceedings{bb127908,
        AUTHOR = "Zhang, N. and Donahue, J. and Girshick, R. and Darrell, T.J.",
        TITLE = "Part-Based R-CNNs for Fine-Grained Category Detection",
        BOOKTITLE = ECCV14,
        YEAR = "2014",
        PAGES = "I: 834-849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123965"}

@article{bb127909,
        AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.",
        TITLE = "Integrating Natural-Language Understanding with
Document Structure-Analysis",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "255-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123966"}

@article{bb127910,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "247-250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123967"}

@book{bb127911,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        PUBLISHER = "Springer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123967"}

@article{bb127912,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Intelligent Multimedia",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "2-3",
        MONTH = "June",
        PAGES = "77-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123968"}

@article{bb127913,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
More Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "345-348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123969"}

@article{bb127914,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "99-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123970"}

@book{bb127915,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        PUBLISHER = "Kluwer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123970"}

@article{bb127916,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Grounding Representations",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "7-13",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123971"}

@book{bb127917,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural Language and Vision Processing",
        PUBLISHER = "Kluwer",
        YEAR = "1996",
        MONTH = "September",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123972"}

@article{bb127918,
        AUTHOR = "Siskind, J.M.",
        TITLE = "Grounding Language in Perception",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "371-391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123973"}

@article{bb127919,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images",
        JOURNAL = Computer,
        VOLUME = "28",
        YEAR = "1995",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "49-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123974"}

@article{bb127920,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "409-430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123975"}

@article{bb127921,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Collateral Text in Understanding Photos in Documents",
        JOURNAL = SPIE,
        VOLUME = "2368",
        YEAR = "1994",
        PAGES = "186-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123976"}

@article{bb127922,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "349-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123977"}

@inproceedings{bb127923,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
        BOOKTITLE = "Radius97",
        YEAR = "1997",
        PAGES = "419-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123978"}

@inproceedings{bb127924,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Linguistic Context In Vision",
        BOOKTITLE = Context95,
        YEAR = "1995",
        PAGES = "xx",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123979"}

@inproceedings{bb127925,
        AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
        TITLE = "Using Speech Input for Image Interpretation and Annotation",
        BOOKTITLE = ARPA96,
        YEAR = "1996",
        PAGES = "501-510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123980"}

@inproceedings{bb127926,
        AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
        TITLE = "Use of Collateral Text in Image Interpretation",
        BOOKTITLE = ARPA94,
        YEAR = "1994",
        PAGES = "II:897-907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123981"}

@inproceedings{bb127927,
        AUTHOR = "Srihari, R.K. and Burhans, D.T.",
        TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
        BOOKTITLE = AAAI-94,
        YEAR = "1994",
        PAGES = "793-798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123982"}

@inproceedings{bb127928,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "Caption-Aided Face Location In Newspaper Photographs",
        BOOKTITLE = ICPR92,
        YEAR = "1992",
        PAGES = "I:474-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123983"}

@inproceedings{bb127929,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
        BOOKTITLE = AAAI-92,
        YEAR = "1992",
        PAGES = "350-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123984"}

@article{bb127930,
        AUTHOR = "Schank, R.C. and Fano, A.",
        TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "261-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123985"}

@article{bb127931,
        AUTHOR = "Wilks, Y.",
        TITLE = "Language, Vision and Metaphor",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "273-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123986"}

@article{bb127932,
        AUTHOR = "Partridge, D.",
        TITLE = "Language and Vision: A Single Perceptual Mechanism",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "291-303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123987"}

@article{bb127933,
        AUTHOR = "Marconi, D.",
        TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "15-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123988"}

@article{bb127934,
        AUTHOR = "Meini, C. and Paternoster, A.",
        TITLE = "Understanding Language Through Vision",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "37-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123989"}

@article{bb127935,
        AUTHOR = "McKevitt, P. and Guo, C.M.",
        TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "49-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123990"}

@article{bb127936,
        AUTHOR = "Grumbach, A.",
        TITLE = "Grounding Symbols into Perceptions",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "131-146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123991"}

@article{bb127937,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
        TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "155-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123992"}

@article{bb127938,
        AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
        TITLE = "Conceptual description of visual scenes from linguistic models",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "173-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123993"}

@article{bb127939,
        AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
        TITLE = "Conceptual representations between video signals and natural language
descriptions",
        JOURNAL = IVC,
        VOLUME = "26",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "53-66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123994"}

@inproceedings{bb127940,
        AUTHOR = "Gerber, R. and Nagel, H.H.",
        TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123995"}

@article{bb127941,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "346-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123996"}

@inproceedings{bb127942,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-graphs for context-aware category discovery",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123997"}

@inproceedings{bb127943,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Learning the easy things first: Self-paced visual category discovery",
        BOOKTITLE = CVPR11,
        YEAR = "2011",
        PAGES = "1721-1728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123998"}

@article{bb127944,
        AUTHOR = "Yu, A. and Grauman, K.",
        TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
        JOURNAL = IJCV,
        VOLUME = "128",
        YEAR = "2020",
        NUMBER = "10-11",
        MONTH = "November",
        PAGES = "2704-2730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123999"}

@inproceedings{bb127945,
        AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.",
        TITLE = "The Abduction of Sherlock Holmes:
A Dataset for Visual Abductive Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:558-575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124000"}

@inproceedings{bb127946,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124001"}

@inproceedings{bb127947,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124002"}

@inproceedings{bb127948,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124003"}

@inproceedings{bb127949,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124004"}

@inproceedings{bb127950,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124005"}

@article{bb127951,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124006"}

@inproceedings{bb127952,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124007"}

@article{bb127953,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124008"}

@inproceedings{bb127954,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124009"}

@inproceedings{bb127955,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124010"}

@article{bb127956,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124011"}

@inproceedings{bb127957,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124012"}

@inproceedings{bb127958,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124013"}

@inproceedings{bb127959,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124014"}

@inproceedings{bb127960,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124015"}

@inproceedings{bb127961,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124016"}

@inproceedings{bb127962,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124017"}

@article{bb127963,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124018"}

@article{bb127964,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124019"}

@inproceedings{bb127965,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124020"}

@article{bb127966,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124021"}

@article{bb127967,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124022"}

@article{bb127968,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124023"}

@article{bb127969,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124023"}

@article{bb127970,
        AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.",
        TITLE = "GLA: Global-Local Attention for Image Description",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "726-737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124024"}

@article{bb127971,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.",
        TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation",
        JOURNAL = GeoRS,
        VOLUME = "56",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2183-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124025"}

@article{bb127972,
        AUTHOR = "Wu, C.L. and Wei, Y.W. and Chu, X.L. and Su, F. and Wang, L.Q.",
        TITLE = "Modeling visual and word-conditional semantic attention for image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "67",
        YEAR = "2018",
        PAGES = "100-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124026"}

@article{bb127973,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124027"}

@article{bb127974,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124028"}

@article{bb127975,
        AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.",
        TITLE = "Scene graph captioner:
Image captioning based on structural visual representation",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "477-485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124029"}

@article{bb127976,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124030"}

@article{bb127977,
        AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Dense semantic embedding network for image captioning",
        JOURNAL = PR,
        VOLUME = "90",
        YEAR = "2019",
        PAGES = "285-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124031"}

@article{bb127978,
        AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.",
        TITLE = "Description Generation for Remote Sensing Images Using Attribute
Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124032"}

@article{bb127979,
        AUTHOR = "Ding, S.T. and Qu, S. and Xi, Y.L. and Sangaiah, A.K. and Wan, S.H.",
        TITLE = "Image caption generation with high-level image features",
        JOURNAL = PRL,
        VOLUME = "123",
        YEAR = "2019",
        PAGES = "89-95",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124033"}

@article{bb127980,
        AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
        TITLE = "A survey on deep neural network-based image captioning",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "445-470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124034"}

@article{bb127981,
        AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
        TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
        JOURNAL = Surveys,
        VOLUME = "51",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "Article No 118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124035"}

@article{bb127982,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "High-Quality Image Captioning With Fine-Grained and Semantic-Guided
Visual Attention",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1681-1693",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124036"}

@inproceedings{bb127983,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "Fine-Grained and Semantic-Guided Visual Attention for Image
Captioning",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1709-1717",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124037"}

@article{bb127984,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2117-2130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124038"}

@article{bb127985,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "LCEval: Learned Composite Metric for Caption Evaluation",
        JOURNAL = IJCV,
        VOLUME = "127",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1586-1610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124039"}

@article{bb127986,
        AUTHOR = "Zhang, Z.Y. and Diao, W.H. and Zhang, W.K. and Yan, M.L. and Gao, X. and Sun, X.",
        TITLE = "LAM: Remote Sensing Image Captioning with Label-Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "20",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124040"}

@article{bb127987,
        AUTHOR = "Fu, K. and Li, Y. and Zhang, W.K. and Yu, H.F. and Sun, X.",
        TITLE = "Boosting Memory with a Persistent Memory Mechanism for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "11",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124041"}

@article{bb127988,
        AUTHOR = "Tan, J.H. and Chan, C.S. and Chuah, J.H.",
        TITLE = "COMIC: Toward A Compact Image Captioning Model With Attention",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2686-2696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124042"}

@article{bb127989,
        AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.",
        TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase
Learning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        NUMBER = "1",
        PAGES = "694-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124043"}

@article{bb127990,
        AUTHOR = "Yang, L. and Hu, H.F.",
        TITLE = "Visual Skeleton and Reparative Attention for Part-of-Speech image
captioning system",
        JOURNAL = CVIU,
        VOLUME = "189",
        YEAR = "2019",
        PAGES = "102819",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124044"}

@article{bb127991,
        AUTHOR = "Wang, J.B. and Wang, W. and Wang, L. and Wang, Z.Y. and Feng, D.D. and Tan, T.N.",
        TITLE = "Learning Visual Relationship and Context-Aware Attention for Image
Captioning",
        JOURNAL = PR,
        VOLUME = "98",
        YEAR = "2020",
        PAGES = "107075",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124045"}

@article{bb127992,
        AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
        TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2942-2956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124046"}

@article{bb127993,
        AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
        TITLE = "Modeling coverage with semantic embedding for image caption generation",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124047"}

@article{bb127994,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X.",
        TITLE = "Sound Active Attention Framework for Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "58",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1985-2000",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124048"}

@article{bb127995,
        AUTHOR = "Li, Y.Y. and Fang, S.K. and Jiao, L.C. and Liu, R.J. and Shang, R.H.",
        TITLE = "A Multi-Level Attention Model for Remote Sensing Image Captions",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124049"}

@article{bb127996,
        AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.",
        TITLE = "Leveraging unpaired out-of-domain data for image captioning",
        JOURNAL = PRL,
        VOLUME = "132",
        YEAR = "2020",
        PAGES = "132-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124050"}

@article{bb127997,
        AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.",
        TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning
Framework for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1372-1383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124051"}

@article{bb127998,
        AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.",
        TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2149-2162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124052"}

@article{bb127999,
        AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.",
        TITLE = "Cascaded Revision Network for Novel Object Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "3413-3421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124053"}

Last update:Jan 20, 2025 at 11:36:25