@inproceedings{bb136000,
        AUTHOR = "Xu, Z.R. and Yu, F.X. and Liu, C.X. and Wu, Z. and Wang, H.C. and Chen, X.",
        TITLE = "FalCon: Fine-grained Feature Map Sparsity Computing with Decomposed
Convolutions for Inference Optimization",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "3634-3644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131995"}

@inproceedings{bb136001,
        AUTHOR = "Mahmoudi, M.A. and Chetouani, A. and Boufera, F. and Tabia, H.",
        TITLE = "Taylor Series Kernelized Layer for Fine-Grained Recognition",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "1914-1918",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131996"}

@inproceedings{bb136002,
        AUTHOR = "Cheng, J.C. and Vasconcelos, N.M.",
        TITLE = "Learning Deep Classifiers Consistent with Fine-Grained Novelty
Detection",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1664-1673",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131997"}

@inproceedings{bb136003,
        AUTHOR = "Ji, R. and Wen, L. and Zhang, L. and Du, D. and Wu, Y. and Zhao, C. and Liu, X. and Huang, F.",
        TITLE = "Attention Convolutional Binary Neural Tree for Fine-Grained Visual
Categorization",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10465-10474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131998"}

@inproceedings{bb136004,
        AUTHOR = "Taherkhani, F. and Kazemi, H. and Dabouei, A. and Dawson, J. and Nasrabadi, N.",
        TITLE = "A Weakly Supervised Fine Label Classifier Enhanced by Coarse
Supervision",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "6458-6467",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT131999"}

@inproceedings{bb136005,
        AUTHOR = "Yang, H. and Wu, H. and Chen, H.",
        TITLE = "Detecting 11K Classes: Large Scale Object Detection Without
Fine-Grained Bounding Boxes",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "9804-9812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132000"}

@inproceedings{bb136006,
        AUTHOR = "Wagner, J. and Kohler, J.M. and Gindele, T. and Hetzel, L. and Wiedemer, J.T. and Behnke, S.",
        TITLE = "Interpretable and Fine-Grained Visual Explanations for Convolutional
Neural Networks",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "9089-9099",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132001"}

@inproceedings{bb136007,
        AUTHOR = "Feng, Z. and Fu, K. and Zhao, Q.",
        TITLE = "Learning to Focus and Discriminate for Fine-Grained Classification",
        BOOKTITLE = ICIP19,
        YEAR = "2019",
        PAGES = "415-419",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132002"}

@inproceedings{bb136008,
        AUTHOR = "Zhong, W. and Jiang, L. and Zhang, T. and Ji, J. and Xiong, H.",
        TITLE = "A Multi-part Convolutional Attention Network for Fine-Grained Image
Recognition",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "1857-1862",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132003"}

@inproceedings{bb136009,
        AUTHOR = "Simonelli, A. and de Natale, F.G.B. and Messelodi, S. and Bulo, S.R.",
        TITLE = "Increasingly Specialized Ensemble of Convolutional Neural Networks
for Fine-Grained Recognition",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "594-598",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132004"}

@inproceedings{bb136010,
        AUTHOR = "Wang, Y. and Morariu, V.I. and Davis, L.S.",
        TITLE = "Learning a Discriminative Filter Bank Within a CNN for Fine-Grained
Recognition",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "4148-4157",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132005"}

@inproceedings{bb136011,
        AUTHOR = "Cai, S.J. and Zuo, W.M. and Zhang, L.",
        TITLE = "Higher-Order Integration of Hierarchical Convolutional Activations
for Fine-Grained Visual Categorization",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "511-520",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132006"}

@inproceedings{bb136012,
        AUTHOR = "Kong, S. and Fowlkes, C.C.",
        TITLE = "Pixel-Wise Attentional Gating for Scene Parsing",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "1024-1033",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132007"}

@inproceedings{bb136013,
        AUTHOR = "Kong, S. and Fowlkes, C.C.",
        TITLE = "Recurrent Scene Parsing with Perspective Understanding in the Loop",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "956-965",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132008"}

@inproceedings{bb136014,
        AUTHOR = "Zheng, H. and Fu, J. and Mei, T. and Luo, J.",
        TITLE = "Learning Multi-attention Convolutional Neural Network for
Fine-Grained Image Recognition",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "5219-5227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132009"}

@inproceedings{bb136015,
        AUTHOR = "Fu, J. and Zheng, H. and Mei, T.",
        TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural
Network for Fine-Grained Image Recognition",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4476-4484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132010"}

@inproceedings{bb136016,
        AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.",
        TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object
Classification",
        BOOKTITLE = DICTA16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132011"}

@inproceedings{bb136017,
        AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.",
        TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN
Learning and Multi-class Regression",
        BOOKTITLE = MMMod17,
        YEAR = "2017",
        PAGES = "I: 176-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132012"}

@inproceedings{bb136018,
        AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.",
        TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "3557-3565",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132013"}

@inproceedings{bb136019,
        AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.",
        TITLE = "LR-CNN for fine-grained classification with varying resolution",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3101-3105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132014"}

@inproceedings{bb136020,
        AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.",
        TITLE = "Fine-grained classification via mixture of deep convolutional neural
networks",
        BOOKTITLE = WACV16,
        YEAR = "2016",
        PAGES = "1-6",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132015"}

@inproceedings{bb136021,
        AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.",
        TITLE = "Modelling local deep convolutional neural network features to improve
fine-grained image classification",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "4112-4116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT132016"}

@article{bb136022,
        AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.",
        TITLE = "Integrating Natural-Language Understanding with
Document Structure-Analysis",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "255-276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132017"}

@article{bb136023,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "247-250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132018"}

@book{bb136024,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing: Theory",
        PUBLISHER = "Springer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132018"}

@article{bb136025,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Intelligent Multimedia",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "2-3",
        MONTH = "June",
        PAGES = "77-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132019"}

@article{bb136026,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
More Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "345-348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132020"}

@article{bb136027,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1994",
        NUMBER = "2-3",
        PAGES = "99-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132021"}

@book{bb136028,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Computational Models and Systems",
        PUBLISHER = "Kluwer",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132021"}

@article{bb136029,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural-Language and Vision Processing:
Grounding Representations",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "7-13",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132022"}

@book{bb136030,
        AUTHOR = "McKevitt, P.",
        TITLE = "Integration of Natural Language and Vision Processing",
        PUBLISHER = "Kluwer",
        YEAR = "1996",
        MONTH = "September",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132023"}

@article{bb136031,
        AUTHOR = "Siskind, J.M.",
        TITLE = "Grounding Language in Perception",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "371-391",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132024"}

@article{bb136032,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images",
        JOURNAL = Computer,
        VOLUME = "28",
        YEAR = "1995",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "49-56",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132025"}

@article{bb136033,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "409-430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132026"}

@article{bb136034,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Use of Collateral Text in Understanding Photos in Documents",
        JOURNAL = SPIE,
        VOLUME = "2368",
        YEAR = "1994",
        PAGES = "186-199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132027"}

@article{bb136035,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Computational Models for Integrating Linguistic and Visual Information:
A Survey",
        JOURNAL = AIR,
        VOLUME = "8",
        YEAR = "1995",
        NUMBER = "5-6",
        PAGES = "349-369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132028"}

@inproceedings{bb136036,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Using Linguistic Context for Image Interpretation and Annotation",
        BOOKTITLE = "Radius97",
        YEAR = "1997",
        PAGES = "419-427",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132029"}

@inproceedings{bb136037,
        AUTHOR = "Srihari, R.K.",
        TITLE = "Linguistic Context In Vision",
        BOOKTITLE = Context95,
        YEAR = "1995",
        PAGES = "xx",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132030"}

@inproceedings{bb136038,
        AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.",
        TITLE = "Using Speech Input for Image Interpretation and Annotation",
        BOOKTITLE = ARPA96,
        YEAR = "1996",
        PAGES = "501-510",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132031"}

@inproceedings{bb136039,
        AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.",
        TITLE = "Use of Collateral Text in Image Interpretation",
        BOOKTITLE = ARPA94,
        YEAR = "1994",
        PAGES = "II:897-907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132032"}

@inproceedings{bb136040,
        AUTHOR = "Srihari, R.K. and Burhans, D.T.",
        TITLE = "Visual Semantics: Extracting Visual Information from
Text Accompanying Pictures",
        BOOKTITLE = AAAI-94,
        YEAR = "1994",
        PAGES = "793-798",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132033"}

@inproceedings{bb136041,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "Caption-Aided Face Location In Newspaper Photographs",
        BOOKTITLE = ICPR92,
        YEAR = "1992",
        PAGES = "I:474-477",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132034"}

@inproceedings{bb136042,
        AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.",
        TITLE = "A Computational Model for Face Location Based on Cognitive Principles",
        BOOKTITLE = AAAI-92,
        YEAR = "1992",
        PAGES = "350-355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132035"}

@article{bb136043,
        AUTHOR = "Schank, R.C. and Fano, A.",
        TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "261-271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132036"}

@article{bb136044,
        AUTHOR = "Wilks, Y.",
        TITLE = "Language, Vision and Metaphor",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "273-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132037"}

@article{bb136045,
        AUTHOR = "Partridge, D.",
        TITLE = "Language and Vision: A Single Perceptual Mechanism",
        JOURNAL = AIR,
        VOLUME = "9",
        YEAR = "1995",
        NUMBER = "4-5",
        MONTH = "October",
        PAGES = "291-303",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132038"}

@article{bb136046,
        AUTHOR = "Marconi, D.",
        TITLE = "Work on the Integration of Language and Vision at the
University of Torino",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "15-20",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132039"}

@article{bb136047,
        AUTHOR = "Meini, C. and Paternoster, A.",
        TITLE = "Understanding Language Through Vision",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "37-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132040"}

@article{bb136048,
        AUTHOR = "McKevitt, P. and Guo, C.M.",
        TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "49-63",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132041"}

@article{bb136049,
        AUTHOR = "Grumbach, A.",
        TITLE = "Grounding Symbols into Perceptions",
        JOURNAL = AIR,
        VOLUME = "10",
        YEAR = "1996",
        NUMBER = "1-2",
        MONTH = "April",
        PAGES = "131-146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132042"}

@article{bb136050,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.",
        TITLE = "Bayesian reasoning on qualitative descriptions from images and speech",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "155-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132043"}

@article{bb136051,
        AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.",
        TITLE = "Conceptual description of visual scenes from linguistic models",
        JOURNAL = IVC,
        VOLUME = "18",
        YEAR = "2000",
        NUMBER = "2",
        MONTH = "January",
        PAGES = "173-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132044"}

@article{bb136052,
        AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.",
        TITLE = "Conceptual representations between video signals and natural language
descriptions",
        JOURNAL = IVC,
        VOLUME = "26",
        YEAR = "2008",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "53-66",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132045"}

@inproceedings{bb136053,
        AUTHOR = "Gerber, R. and Nagel, H.H.",
        TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text
from Image Sequences",
        BOOKTITLE = ECCV98,
        YEAR = "1998",
        PAGES = "II: 255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132046"}

@article{bb136054,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-Graphs for Context-Aware Visual Category Discovery",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "346-358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132047"}

@inproceedings{bb136055,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Object-graphs for context-aware category discovery",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132048"}

@inproceedings{bb136056,
        AUTHOR = "Lee, Y.J. and Grauman, K.",
        TITLE = "Learning the easy things first: Self-paced visual category discovery",
        BOOKTITLE = CVPR11,
        YEAR = "2011",
        PAGES = "1721-1728",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132049"}

@article{bb136057,
        AUTHOR = "Yu, A. and Grauman, K.",
        TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons",
        JOURNAL = IJCV,
        VOLUME = "128",
        YEAR = "2020",
        NUMBER = "10-11",
        MONTH = "November",
        PAGES = "2704-2730",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132050"}

@inproceedings{bb136058,
        AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.",
        TITLE = "The Abduction of Sherlock Holmes:
A Dataset for Visual Abductive Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:558-575",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132051"}

@inproceedings{bb136059,
        AUTHOR = "Perona, P.",
        TITLE = "A taxonomy of visual recognition",
        BOOKTITLE = VMV04,
        YEAR = "2004",
        PAGES = "187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132052"}

@inproceedings{bb136060,
        AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.",
        TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication",
        BOOKTITLE = ICPR98,
        YEAR = "1998",
        PAGES = "Vol II: 1216-1218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132053"}

@inproceedings{bb136061,
        AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.",
        TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration 
of Image and Natural Language Processing",
        BOOKTITLE = IJCAI97,
        YEAR = "1997",
        PAGES = "1488-1495",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132054"}

@inproceedings{bb136062,
        AUTHOR = "Satoh, S. and Kanade, T.",
        TITLE = "Name-It: Association Of Face And Name In Video",
        BOOKTITLE = CVPR97,
        YEAR = "1997",
        PAGES = "368-373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132055"}

@inproceedings{bb136063,
        AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.",
        TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding
in a Hybrid Distributed System",
        BOOKTITLE = ICIP96,
        YEAR = "1996",
        PAGES = "II: 809-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT132056"}

@article{bb136064,
        AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.",
        TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from
Visual Observation of Human Performance",
        JOURNAL = RA,
        VOLUME = "10",
        YEAR = "1994",
        PAGES = "799-822",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132057"}

@inproceedings{bb136065,
        AUTHOR = "Kuniyoshi, Y. and Inoue, H.",
        TITLE = "Indexicality and dynamic attention control in qualitative recognition
of assembly actions",
        BOOKTITLE = ECCV92,
        YEAR = "1992",
        PAGES = "874-878",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132058"}

@article{bb136066,
        AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.",
        TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing",
        JOURNAL = IJCV,
        VOLUME = "88",
        YEAR = "2010",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132059"}

@inproceedings{bb136067,
        AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.",
        TITLE = "A hierarchical and contextual model for aerial image understanding",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132060"}

@inproceedings{bb136068,
        AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.",
        TITLE = "Learning mixed templates for object recognition",
        BOOKTITLE = CVPR09,
        YEAR = "2009",
        PAGES = "272-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132061"}

@article{bb136069,
        AUTHOR = "Tu, Z.W. and Bai, X.",
        TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D
Brain Image Segmentation",
        JOURNAL = PAMI,
        VOLUME = "32",
        YEAR = "2010",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1744-1757",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132062"}

@inproceedings{bb136070,
        AUTHOR = "Tu, Z.W.",
        TITLE = "Auto-context and its application to high-level vision tasks",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132063"}

@inproceedings{bb136071,
        AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.",
        TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly
Processes",
        BOOKTITLE = WACV19,
        YEAR = "2019",
        PAGES = "426-434",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132064"}

@inproceedings{bb136072,
        AUTHOR = "Lampert, C.H.",
        TITLE = "Partitioning of image datasets using discriminative context information",
        BOOKTITLE = CVPR08,
        YEAR = "2008",
        PAGES = "1-8",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132065"}

@inproceedings{bb136073,
        AUTHOR = "Hansen, C. and Henderson, T.C.",
        TITLE = "Towards the Automatic Generation of Recognition Strategies",
        BOOKTITLE = ICCV88,
        YEAR = "1988",
        PAGES = "275-279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132066"}

@inproceedings{bb136074,
        AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.",
        TITLE = "Towards an Automatic Construction of Object Recognition Strategies",
        BOOKTITLE = ICPR88,
        YEAR = "1988",
        PAGES = "I: 371-374",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132067"}

@inproceedings{bb136075,
        AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.",
        TITLE = "Object Recognition Strategy in a Multi-Resolution System",
        BOOKTITLE = CIAP89,
        YEAR = "1989",
        PAGES = "729-733",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT132068"}

@article{bb136076,
        AUTHOR = "Feng, Y.S. and Lapata, M.",
        TITLE = "Automatic Caption Generation for News Images",
        JOURNAL = PAMI,
        VOLUME = "35",
        YEAR = "2013",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "797-812",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132069"}

@article{bb136077,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning
Challenge",
        JOURNAL = PAMI,
        VOLUME = "39",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "652-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132070"}

@inproceedings{bb136078,
        AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.",
        TITLE = "Show and tell: A neural image caption generator",
        BOOKTITLE = CVPR15,
        YEAR = "2015",
        PAGES = "3156-3164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132071"}

@article{bb136079,
        AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.",
        TITLE = "Discovering visual concept structure with sparse and incomplete tags",
        JOURNAL = AI,
        VOLUME = "250",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "16-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132072"}

@article{bb136080,
        AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.",
        TITLE = "Data-driven image captioning via salient region discovery",
        JOURNAL = IET-CV,
        VOLUME = "11",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "September",
        PAGES = "398-406",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132073"}

@article{bb136081,
        AUTHOR = "He, X.D. and Deng, L.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "34",
        YEAR = "2017",
        NUMBER = "6",
        MONTH = "November",
        PAGES = "109-116",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132074"}

@article{bb136082,
        AUTHOR = "Deng, L. and He, X.D.",
        TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview",
        JOURNAL = SPMag,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132074"}

@article{bb136083,
        AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.",
        TITLE = "More is Better: Precise and Detailed Image Captioning Using Online
Positive Recall and Missing Concepts Mining",
        JOURNAL = IP,
        VOLUME = "28",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "32-44",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132075"}

@article{bb136084,
        AUTHOR = "Gella, S. and Keller, F. and Lapata, M.",
        TITLE = "Disambiguating Visual Verbs",
        JOURNAL = PAMI,
        VOLUME = "41",
        YEAR = "2019",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "311-322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132076"}

@article{bb136085,
        AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.",
        TITLE = "Image Caption Generation with Part of Speech Guidance",
        JOURNAL = PRL,
        VOLUME = "119",
        YEAR = "2019",
        PAGES = "229-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132077"}

@article{bb136086,
        AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.H.",
        TITLE = "Dense semantic embedding network for image captioning",
        JOURNAL = PR,
        VOLUME = "90",
        YEAR = "2019",
        PAGES = "285-296",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132078"}

@article{bb136087,
        AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.",
        TITLE = "A survey on deep neural network-based image captioning",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2019",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "445-470",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132079"}

@article{bb136088,
        AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.",
        TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning",
        JOURNAL = Surveys,
        VOLUME = "51",
        YEAR = "2019",
        NUMBER = "6",
        MONTH = "February",
        PAGES = "Article No 118",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132080"}

@article{bb136089,
        AUTHOR = "Li, X. and Jiang, S.",
        TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2117-2130",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132081"}

@article{bb136090,
        AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.",
        TITLE = "LCEval: Learned Composite Metric for Caption Evaluation",
        JOURNAL = IJCV,
        VOLUME = "127",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1586-1610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132082"}

@article{bb136091,
        AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.",
        TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase
Learning",
        JOURNAL = IP,
        VOLUME = "29",
        YEAR = "2020",
        NUMBER = "1",
        PAGES = "694-709",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132083"}

@article{bb136092,
        AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.",
        TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "2942-2956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132084"}

@article{bb136093,
        AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.",
        TITLE = "Modeling coverage with semantic embedding for image caption generation",
        JOURNAL = VC,
        VOLUME = "35",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1655-1665",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132085"}

@article{bb136094,
        AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.",
        TITLE = "Leveraging unpaired out-of-domain data for image captioning",
        JOURNAL = PRL,
        VOLUME = "132",
        YEAR = "2020",
        PAGES = "132-140",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132086"}

@article{bb136095,
        AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.",
        TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning
Framework for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "1372-1383",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132087"}

@article{bb136096,
        AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.",
        TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "22",
        YEAR = "2020",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "2149-2162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132088"}

@article{bb136097,
        AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.",
        TITLE = "Cascaded Revision Network for Novel Object Captioning",
        JOURNAL = CirSysVideo,
        VOLUME = "30",
        YEAR = "2020",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "3413-3421",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132089"}

@article{bb136098,
        AUTHOR = "Shilpa, M. and He, J. and Zhao, Y.J. and Sun, B. and Yu, L.J.",
        TITLE = "Feedback evaluations to promote image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "14",
        YEAR = "2020",
        NUMBER = "13",
        MONTH = "November",
        PAGES = "3021-3027",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132090"}

@article{bb136099,
        AUTHOR = "Liu, H. and Zhang, S. and Lin, K. and Wen, J. and Li, J. and Hu, X.",
        TITLE = "Vocabulary-Wide Credit Assignment for Training Image Captioning
Models",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "2450-2460",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT132091"}

Last update:May 3, 2026 at 17:51:13