@inproceedings{bb122600,
        AUTHOR = "Cai, S.J. and Zuo, W.M. and Zhang, L.",
        TITLE = "Higher-Order Integration of Hierarchical Convolutional Activations
for Fine-Grained Visual Categorization",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "511-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118707"}

@inproceedings{bb122601,
        AUTHOR = "Kong, S. and Fowlkes, C.C.",
        TITLE = "Pixel-Wise Attentional Gating for Scene Parsing",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "1024-1033",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118708"}

@inproceedings{bb122602,
        AUTHOR = "Kong, S. and Fowlkes, C.C.",
        TITLE = "Recurrent Scene Parsing with Perspective Understanding in the Loop",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "956-965",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118709"}

@inproceedings{bb122603,
        AUTHOR = "Zheng, H. and Fu, J. and Mei, T. and Luo, J.",
        TITLE = "Learning Multi-attention Convolutional Neural Network for
Fine-Grained Image Recognition",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5219-5227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118710"}

@inproceedings{bb122604,
        AUTHOR = "Fu, J. and Zheng, H. and Mei, T.",
        TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural
Network for Fine-Grained Image Recognition",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4476-4484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118711"}

@inproceedings{bb122605,
        AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.",
        TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object
Classification",
        BOOKTITLE = DICTA16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118712"}

@inproceedings{bb122606,
        AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.",
        TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN
Learning and Multi-class Regression",
        BOOKTITLE = MMMod17,
        YEAR = "2017",
        PAGES = "I: 176-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118713"}

@inproceedings{bb122607,
        AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.",
        TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "3557-3565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118714"}

@inproceedings{bb122608,
        AUTHOR = "Zhang, H. and Xu, T. and Elhoseiny, M. and Huang, X.L. and Zhang, S.T. and Elgammal, A.E. and Metaxas, D.N.",
        TITLE = "SPDA-CNN: Unifying Semantic Part Detection and Abstraction for
Fine-Grained Recognition",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "1143-1152",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118715"}

@inproceedings{bb122609,
        AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.",
        TITLE = "LR-CNN for fine-grained classification with varying resolution",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3101-3105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118716"}

@inproceedings{bb122610,
        AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.",
        TITLE = "Fine-grained classification via mixture of deep convolutional neural
networks",
        BOOKTITLE = WACV16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118717"}

@inproceedings{bb122611,
        AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.",
        TITLE = "Modelling local deep convolutional neural network features to improve
fine-grained image classification",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "4112-4116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118718"}

@inproceedings{bb122612,
        AUTHOR = "Zhang, N. and Donahue, J. and Girshick, R. and Darrell, T.J.",
        TITLE = "Part-Based R-CNNs for Fine-Grained Category Detection",
        BOOKTITLE = ECCV14,
        YEAR = "2014",
        PAGES = "I: 834-849",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118719"}

@article{bb122613,
        AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.",
        TITLE = "Integrating Natural-Language Understanding with
Document Structure-Analysis",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "255-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118720"}

@article{bb122614,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "247-250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118721"}

@book{bb122615,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        PUBLISHER = "Springer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118721"}

@article{bb122616,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Intelligent Multimedia",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "2-3",
        MONTH = "June",
        PAGES = "77-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118722"}

@article{bb122617,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
More Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "345-348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118723"}

@article{bb122618,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "99-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118724"}

@book{bb122619,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        PUBLISHER = "Kluwer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118724"}

@article{bb122620,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Grounding Representations",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "7-13",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118725"}

@book{bb122621,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural Language and Vision Processing",
        PUBLISHER = "Kluwer",
        YEAR = "1996",
        MONTH = "September",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118726"}

@article{bb122622,
        AUTHOR = "Siskind, J.M.",
        TITLE = "Grounding Language in Perception",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "371-391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118727"}

@article{bb122623,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images",
        JOURNAL = Computer,
        VOLUME = "28",
        YEAR = "1995",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "49-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118728"}

@article{bb122624,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "409-430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118729"}

@article{bb122625,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Collateral Text in Understanding Photos in Documents",
        JOURNAL = SPIE,
        VOLUME = "2368",
        YEAR = "1994",
        PAGES = "186-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118730"}

@article{bb122626,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "349-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118731"}

@inproceedings{bb122627,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
        BOOKTITLE = "Radius97",
        YEAR = "1997",
        PAGES = "419-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118732"}

@inproceedings{bb122628,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Linguistic Context In Vision",
        BOOKTITLE = Context95,
        YEAR = "1995",
        PAGES = "xx",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118733"}

@inproceedings{bb122629,
        AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
        TITLE = "Using Speech Input for Image Interpretation and Annotation",
        BOOKTITLE = ARPA96,
        YEAR = "1996",
        PAGES = "501-510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118734"}

@inproceedings{bb122630,
        AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
        TITLE = "Use of Collateral Text in Image Interpretation",
        BOOKTITLE = ARPA94,
        YEAR = "1994",
        PAGES = "II:897-907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118735"}

@inproceedings{bb122631,
        AUTHOR = "Srihari, R.K. and Burhans, D.T.",
        TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
        BOOKTITLE = AAAI-94,
        YEAR = "1994",
        PAGES = "793-798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118736"}

@inproceedings{bb122632,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "Caption-Aided Face Location In Newspaper Photographs",
        BOOKTITLE = ICPR92,
        YEAR = "1992",
        PAGES = "I:474-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118737"}

@inproceedings{bb122633,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
        BOOKTITLE = AAAI-92,
        YEAR = "1992",
        PAGES = "350-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118738"}

@article{bb122634,
        AUTHOR = "Schank, R.C. and Fano, A.",
        TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "261-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118739"}

@article{bb122635,
        AUTHOR = "Wilks, Y.",
        TITLE = "Language, Vision and Metaphor",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "273-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118740"}

@article{bb122636,
        AUTHOR = "Partridge, D.",
        TITLE = "Language and Vision: A Single Perceptual Mechanism",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "291-303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118741"}

@article{bb122637,
        AUTHOR = "Marconi, D.",
        TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "15-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118742"}

@article{bb122638,
        AUTHOR = "Meini, C. and Paternoster, A.",
        TITLE = "Understanding Language Through Vision",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "37-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118743"}

@article{bb122639,
        AUTHOR = "McKevitt, P. and Guo, C.M.",
        TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "49-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118744"}

@article{bb122640,
        AUTHOR = "Grumbach, A.",
        TITLE = "Grounding Symbols into Perceptions",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "131-146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118745"}

@article{bb122641,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
        TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "155-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118746"}

@article{bb122642,
        AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
        TITLE = "Conceptual description of visual scenes from linguistic models",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "173-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118747"}

@article{bb122643,
        AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
        TITLE = "Conceptual representations between video signals and natural language
descriptions",
        JOURNAL = IVC,
        VOLUME = "26",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "53-66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118748"}

@inproceedings{bb122644,
        AUTHOR = "Gerber, R. and Nagel, H.H.",
        TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118749"}

@article{bb122645,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "346-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118750"}

@inproceedings{bb122646,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-graphs for context-aware category discovery",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118751"}

@inproceedings{bb122647,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Learning the easy things first: Self-paced visual category discovery",
        BOOKTITLE = CVPR11,
        YEAR = "2011",
        PAGES = "1721-1728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118752"}

@article{bb122648,
        AUTHOR = "Yu, A. and Grauman, K.",
        TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
        JOURNAL = IJCV,
        VOLUME = "128",
        YEAR = "2020",
        NUMBER = "10-11",
        MONTH = "November",
        PAGES = "2704-2730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118753"}

@inproceedings{bb122649,
        AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.",
        TITLE = "The Abduction of Sherlock Holmes:
A Dataset for Visual Abductive Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:558-575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118754"}

@inproceedings{bb122650,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118755"}

@inproceedings{bb122651,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118756"}

@inproceedings{bb122652,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118757"}

@inproceedings{bb122653,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118758"}

@inproceedings{bb122654,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118759"}

@article{bb122655,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118760"}

@inproceedings{bb122656,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118761"}

@article{bb122657,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118762"}

@inproceedings{bb122658,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118763"}

@inproceedings{bb122659,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118764"}

@article{bb122660,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118765"}

@inproceedings{bb122661,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118766"}

@inproceedings{bb122662,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118767"}

@inproceedings{bb122663,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118768"}

@inproceedings{bb122664,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118769"}

@inproceedings{bb122665,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118770"}

@inproceedings{bb122666,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118771"}

@article{bb122667,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118772"}

@article{bb122668,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118773"}

@inproceedings{bb122669,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118774"}

@article{bb122670,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118775"}

@article{bb122671,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118776"}

@article{bb122672,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118777"}

@article{bb122673,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118777"}

@article{bb122674,
        AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.",
        TITLE = "GLA: Global-Local Attention for Image Description",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "726-737",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118778"}

@article{bb122675,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.",
        TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation",
        JOURNAL = GeoRS,
        VOLUME = "56",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2183-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118779"}

@article{bb122676,
        AUTHOR = "Wu, C.L. and Wei, Y.W. and Chu, X.L. and Su, F. and Wang, L.Q.",
        TITLE = "Modeling visual and word-conditional semantic attention for image
captioning",
        JOURNAL = SP:IC,
        VOLUME = "67",
        YEAR = "2018",
        PAGES = "100-107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118780"}

@article{bb122677,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118781"}

@article{bb122678,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118782"}

@article{bb122679,
        AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.",
        TITLE = "Scene graph captioner:
Image captioning based on structural visual representation",
        JOURNAL = JVCIR,
        VOLUME = "58",
        YEAR = "2019",
        PAGES = "477-485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118783"}

@article{bb122680,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118784"}

@article{bb122681,
        AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.",
        TITLE = "Dense semantic embedding network for image captioning",
        JOURNAL = PR,
        VOLUME = "90",
        YEAR = "2019",
        PAGES = "285-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118785"}

@article{bb122682,
        AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.",
        TITLE = "Description Generation for Remote Sensing Images Using Attribute
Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118786"}

@article{bb122683,
        AUTHOR = "Ding, S.T. and Qu, S. and Xi, Y.L. and Sangaiah, A.K. and Wan, S.H.",
        TITLE = "Image caption generation with high-level image features",
        JOURNAL = PRL,
        VOLUME = "123",
        YEAR = "2019",
        PAGES = "89-95",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118787"}

@article{bb122684,
        AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
        TITLE = "A survey on deep neural network-based image captioning",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "445-470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118788"}

@article{bb122685,
        AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
        TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
        JOURNAL = Surveys,
        VOLUME = "51",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "Article No 118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118789"}

@article{bb122686,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "High-Quality Image Captioning With Fine-Grained and Semantic-Guided
Visual Attention",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1681-1693",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118790"}

@inproceedings{bb122687,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "Fine-Grained and Semantic-Guided Visual Attention for Image
Captioning",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1709-1717",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118791"}

@article{bb122688,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2117-2130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118792"}

@article{bb122689,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "LCEval: Learned Composite Metric for Caption Evaluation",
        JOURNAL = IJCV,
        VOLUME = "127",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1586-1610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118793"}

@article{bb122690,
        AUTHOR = "Zhang, Z.Y. and Diao, W.H. and Zhang, W.K. and Yan, M.L. and Gao, X. and Sun, X.",
        TITLE = "LAM: Remote Sensing Image Captioning with Label-Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "20",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118794"}

@article{bb122691,
        AUTHOR = "Fu, K. and Li, Y. and Zhang, W.K. and Yu, H.F. and Sun, X.",
        TITLE = "Boosting Memory with a Persistent Memory Mechanism for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "11",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118795"}

@article{bb122692,
        AUTHOR = "Tan, J.H. and Chan, C.S. and Chuah, J.H.",
        TITLE = "COMIC: Toward A Compact Image Captioning Model With Attention",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2686-2696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118796"}

@article{bb122693,
        AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.",
        TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase
Learning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        NUMBER = "1",
        PAGES = "694-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118797"}

@article{bb122694,
        AUTHOR = "Yang, L. and Hu, H.F.",
        TITLE = "Visual Skeleton and Reparative Attention for Part-of-Speech image
captioning system",
        JOURNAL = CVIU,
        VOLUME = "189",
        YEAR = "2019",
        PAGES = "102819",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118798"}

@article{bb122695,
        AUTHOR = "Wang, J.B. and Wang, W. and Wang, L. and Wang, Z.Y. and Feng, D.D. and Tan, T.N.",
        TITLE = "Learning Visual Relationship and Context-Aware Attention for Image
Captioning",
        JOURNAL = PR,
        VOLUME = "98",
        YEAR = "2020",
        PAGES = "107075",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118799"}

@article{bb122696,
        AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
        TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2942-2956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118800"}

@article{bb122697,
        AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
        TITLE = "Modeling coverage with semantic embedding for image caption generation",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118801"}

@article{bb122698,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X.",
        TITLE = "Sound Active Attention Framework for Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "58",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1985-2000",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118802"}

@article{bb122699,
        AUTHOR = "Li, Y.Y. and Fang, S.K. and Jiao, L.C. and Liu, R.J. and Shang, R.H.",
        TITLE = "A Multi-Level Attention Model for Remote Sensing Image Captions",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118803"}

Last update:Jul 18, 2024 at 20:50:34