@inproceedings{bb122600, AUTHOR = "Cai, S.J. and Zuo, W.M. and Zhang, L.", TITLE = "Higher-Order Integration of Hierarchical Convolutional Activations for Fine-Grained Visual Categorization", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "511-520", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118707"} @inproceedings{bb122601, AUTHOR = "Kong, S. and Fowlkes, C.C.", TITLE = "Pixel-Wise Attentional Gating for Scene Parsing", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "1024-1033", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118708"} @inproceedings{bb122602, AUTHOR = "Kong, S. and Fowlkes, C.C.", TITLE = "Recurrent Scene Parsing with Perspective Understanding in the Loop", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "956-965", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118709"} @inproceedings{bb122603, AUTHOR = "Zheng, H. and Fu, J. and Mei, T. and Luo, J.", TITLE = "Learning Multi-attention Convolutional Neural Network for Fine-Grained Image Recognition", BOOKTITLE = ICCV17, YEAR = "2017", PAGES = "5219-5227", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118710"} @inproceedings{bb122604, AUTHOR = "Fu, J. and Zheng, H. and Mei, T.", TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural Network for Fine-Grained Image Recognition", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4476-4484", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118711"} @inproceedings{bb122605, AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.", TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object Classification", BOOKTITLE = DICTA16, YEAR = "2016", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118712"} @inproceedings{bb122606, AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.", TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN Learning and Multi-class Regression", BOOKTITLE = MMMod17, YEAR = "2017", PAGES = "I: 176-188", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118713"} @inproceedings{bb122607, AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.", TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "3557-3565", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118714"} @inproceedings{bb122608, AUTHOR = "Zhang, H. and Xu, T. and Elhoseiny, M. and Huang, X.L. and Zhang, S.T. and Elgammal, A.E. and Metaxas, D.N.", TITLE = "SPDA-CNN: Unifying Semantic Part Detection and Abstraction for Fine-Grained Recognition", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "1143-1152", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118715"} @inproceedings{bb122609, AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.", TITLE = "LR-CNN for fine-grained classification with varying resolution", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "3101-3105", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118716"} @inproceedings{bb122610, AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.", TITLE = "Fine-grained classification via mixture of deep convolutional neural networks", BOOKTITLE = WACV16, YEAR = "2016", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118717"} @inproceedings{bb122611, AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.", TITLE = "Modelling local deep convolutional neural network features to improve fine-grained image classification", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "4112-4116", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118718"} @inproceedings{bb122612, AUTHOR = "Zhang, N. and Donahue, J. and Girshick, R. and Darrell, T.J.", TITLE = "Part-Based R-CNNs for Fine-Grained Category Detection", BOOKTITLE = ECCV14, YEAR = "2014", PAGES = "I: 834-849", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT118719"} @article{bb122613, AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.", TITLE = "Integrating Natural-Language Understanding with Document Structure-Analysis", JOURNAL = AIR, VOLUME = "8", YEAR = "1994", NUMBER = "2-3", PAGES = "255-276", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118720"} @article{bb122614, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Theory", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "247-250", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118721"} @book{bb122615, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Theory", PUBLISHER = "Springer", YEAR = "1995", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118721"} @article{bb122616, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Intelligent Multimedia", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "2-3", MONTH = "June", PAGES = "77-80", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118722"} @article{bb122617, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: More Computational Models and Systems", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "345-348", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118723"} @article{bb122618, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Computational Models and Systems", JOURNAL = AIR, VOLUME = "8", YEAR = "1994", NUMBER = "2-3", PAGES = "99-104", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118724"} @book{bb122619, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Computational Models and Systems", PUBLISHER = "Kluwer", YEAR = "1995", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118724"} @article{bb122620, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Grounding Representations", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "7-13", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118725"} @book{bb122621, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural Language and Vision Processing", PUBLISHER = "Kluwer", YEAR = "1996", MONTH = "September", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118726"} @article{bb122622, AUTHOR = "Siskind, J.M.", TITLE = "Grounding Language in Perception", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "371-391", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118727"} @article{bb122623, AUTHOR = "Srihari, R.K.", TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images", JOURNAL = Computer, VOLUME = "28", YEAR = "1995", NUMBER = "9", MONTH = "September", PAGES = "49-56", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118728"} @article{bb122624, AUTHOR = "Srihari, R.K.", TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "409-430", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118729"} @article{bb122625, AUTHOR = "Srihari, R.K.", TITLE = "Use of Collateral Text in Understanding Photos in Documents", JOURNAL = SPIE, VOLUME = "2368", YEAR = "1994", PAGES = "186-199", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118730"} @article{bb122626, AUTHOR = "Srihari, R.K.", TITLE = "Computational Models for Integrating Linguistic and Visual Information: A Survey", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "349-369", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118731"} @inproceedings{bb122627, AUTHOR = "Srihari, R.K.", TITLE = "Using Linguistic Context for Image Interpretation and Annotation", BOOKTITLE = "Radius97", YEAR = "1997", PAGES = "419-427", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118732"} @inproceedings{bb122628, AUTHOR = "Srihari, R.K.", TITLE = "Linguistic Context In Vision", BOOKTITLE = Context95, YEAR = "1995", PAGES = "xx", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118733"} @inproceedings{bb122629, AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.", TITLE = "Using Speech Input for Image Interpretation and Annotation", BOOKTITLE = ARPA96, YEAR = "1996", PAGES = "501-510", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118734"} @inproceedings{bb122630, AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.", TITLE = "Use of Collateral Text in Image Interpretation", BOOKTITLE = ARPA94, YEAR = "1994", PAGES = "II:897-907", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118735"} @inproceedings{bb122631, AUTHOR = "Srihari, R.K. and Burhans, D.T.", TITLE = "Visual Semantics: Extracting Visual Information from Text Accompanying Pictures", BOOKTITLE = AAAI-94, YEAR = "1994", PAGES = "793-798", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118736"} @inproceedings{bb122632, AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.", TITLE = "Caption-Aided Face Location In Newspaper Photographs", BOOKTITLE = ICPR92, YEAR = "1992", PAGES = "I:474-477", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118737"} @inproceedings{bb122633, AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.", TITLE = "A Computational Model for Face Location Based on Cognitive Principles", BOOKTITLE = AAAI-92, YEAR = "1992", PAGES = "350-355", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118738"} @article{bb122634, AUTHOR = "Schank, R.C. and Fano, A.", TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "261-271", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118739"} @article{bb122635, AUTHOR = "Wilks, Y.", TITLE = "Language, Vision and Metaphor", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "273-289", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118740"} @article{bb122636, AUTHOR = "Partridge, D.", TITLE = "Language and Vision: A Single Perceptual Mechanism", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "291-303", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118741"} @article{bb122637, AUTHOR = "Marconi, D.", TITLE = "Work on the Integration of Language and Vision at the University of Torino", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "15-20", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118742"} @article{bb122638, AUTHOR = "Meini, C. and Paternoster, A.", TITLE = "Understanding Language Through Vision", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "37-48", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118743"} @article{bb122639, AUTHOR = "McKevitt, P. and Guo, C.M.", TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "49-63", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118744"} @article{bb122640, AUTHOR = "Grumbach, A.", TITLE = "Grounding Symbols into Perceptions", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "131-146", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118745"} @article{bb122641, AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.", TITLE = "Bayesian reasoning on qualitative descriptions from images and speech", JOURNAL = IVC, VOLUME = "18", YEAR = "2000", NUMBER = "2", MONTH = "January", PAGES = "155-172", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118746"} @article{bb122642, AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.", TITLE = "Conceptual description of visual scenes from linguistic models", JOURNAL = IVC, VOLUME = "18", YEAR = "2000", NUMBER = "2", MONTH = "January", PAGES = "173-187", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118747"} @article{bb122643, AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.", TITLE = "Conceptual representations between video signals and natural language descriptions", JOURNAL = IVC, VOLUME = "26", YEAR = "2008", NUMBER = "1", MONTH = "January", PAGES = "53-66", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118748"} @inproceedings{bb122644, AUTHOR = "Gerber, R. and Nagel, H.H.", TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text from Image Sequences", BOOKTITLE = ECCV98, YEAR = "1998", PAGES = "II: 255", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118749"} @article{bb122645, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Object-Graphs for Context-Aware Visual Category Discovery", JOURNAL = PAMI, VOLUME = "34", YEAR = "2012", NUMBER = "2", MONTH = "February", PAGES = "346-358", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118750"} @inproceedings{bb122646, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Object-graphs for context-aware category discovery", BOOKTITLE = CVPR10, YEAR = "2010", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118751"} @inproceedings{bb122647, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Learning the easy things first: Self-paced visual category discovery", BOOKTITLE = CVPR11, YEAR = "2011", PAGES = "1721-1728", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118752"} @article{bb122648, AUTHOR = "Yu, A. and Grauman, K.", TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons", JOURNAL = IJCV, VOLUME = "128", YEAR = "2020", NUMBER = "10-11", MONTH = "November", PAGES = "2704-2730", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118753"} @inproceedings{bb122649, AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.", TITLE = "The Abduction of Sherlock Holmes: A Dataset for Visual Abductive Reasoning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:558-575", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118754"} @inproceedings{bb122650, AUTHOR = "Perona, P.", TITLE = "A taxonomy of visual recognition", BOOKTITLE = VMV04, YEAR = "2004", PAGES = "187", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118755"} @inproceedings{bb122651, AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.", TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication", BOOKTITLE = ICPR98, YEAR = "1998", PAGES = "Vol II: 1216-1218", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118756"} @inproceedings{bb122652, AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.", TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration of Image and Natural Language Processing", BOOKTITLE = IJCAI97, YEAR = "1997", PAGES = "1488-1495", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118757"} @inproceedings{bb122653, AUTHOR = "Satoh, S. and Kanade, T.", TITLE = "Name-It: Association Of Face And Name In Video", BOOKTITLE = CVPR97, YEAR = "1997", PAGES = "368-373", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118758"} @inproceedings{bb122654, AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.", TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding in a Hybrid Distributed System", BOOKTITLE = ICIP96, YEAR = "1996", PAGES = "II: 809-812", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT118759"} @article{bb122655, AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.", TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from Visual Observation of Human Performance", JOURNAL = RA, VOLUME = "10", YEAR = "1994", PAGES = "799-822", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118760"} @inproceedings{bb122656, AUTHOR = "Kuniyoshi, Y. and Inoue, H.", TITLE = "Indexicality and dynamic attention control in qualitative recognition of assembly actions", BOOKTITLE = ECCV92, YEAR = "1992", PAGES = "874-878", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118761"} @article{bb122657, AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.", TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing", JOURNAL = IJCV, VOLUME = "88", YEAR = "2010", NUMBER = "2", MONTH = "June", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118762"} @inproceedings{bb122658, AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.", TITLE = "A hierarchical and contextual model for aerial image understanding", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118763"} @inproceedings{bb122659, AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.", TITLE = "Learning mixed templates for object recognition", BOOKTITLE = CVPR09, YEAR = "2009", PAGES = "272-279", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118764"} @article{bb122660, AUTHOR = "Tu, Z.W. and Bai, X.", TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D Brain Image Segmentation", JOURNAL = PAMI, VOLUME = "32", YEAR = "2010", NUMBER = "10", MONTH = "October", PAGES = "1744-1757", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118765"} @inproceedings{bb122661, AUTHOR = "Tu, Z.W.", TITLE = "Auto-context and its application to high-level vision tasks", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118766"} @inproceedings{bb122662, AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.", TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly Processes", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "426-434", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118767"} @inproceedings{bb122663, AUTHOR = "Lampert, C.H.", TITLE = "Partitioning of image datasets using discriminative context information", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118768"} @inproceedings{bb122664, AUTHOR = "Hansen, C. and Henderson, T.C.", TITLE = "Towards the Automatic Generation of Recognition Strategies", BOOKTITLE = ICCV88, YEAR = "1988", PAGES = "275-279", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118769"} @inproceedings{bb122665, AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.", TITLE = "Towards an Automatic Construction of Object Recognition Strategies", BOOKTITLE = ICPR88, YEAR = "1988", PAGES = "I: 371-374", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118770"} @inproceedings{bb122666, AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.", TITLE = "Object Recognition Strategy in a Multi-Resolution System", BOOKTITLE = CIAP89, YEAR = "1989", PAGES = "729-733", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT118771"} @article{bb122667, AUTHOR = "Feng, Y.S. and Lapata, M.", TITLE = "Automatic Caption Generation for News Images", JOURNAL = PAMI, VOLUME = "35", YEAR = "2013", NUMBER = "4", MONTH = "April", PAGES = "797-812", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118772"} @article{bb122668, AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.", TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning Challenge", JOURNAL = PAMI, VOLUME = "39", YEAR = "2017", NUMBER = "4", MONTH = "April", PAGES = "652-663", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118773"} @inproceedings{bb122669, AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.", TITLE = "Show and tell: A neural image caption generator", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "3156-3164", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118774"} @article{bb122670, AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.", TITLE = "Discovering visual concept structure with sparse and incomplete tags", JOURNAL = AI, VOLUME = "250", YEAR = "2017", NUMBER = "1", PAGES = "16-36", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118775"} @article{bb122671, AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.", TITLE = "Data-driven image captioning via salient region discovery", JOURNAL = IET-CV, VOLUME = "11", YEAR = "2017", NUMBER = "6", MONTH = "September", PAGES = "398-406", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118776"} @article{bb122672, AUTHOR = "He, X.D. and Deng, L.", TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview", JOURNAL = SPMag, VOLUME = "34", YEAR = "2017", NUMBER = "6", MONTH = "November", PAGES = "109-116", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118777"} @article{bb122673, AUTHOR = "Deng, L. and He, X.D.", TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview", JOURNAL = SPMag, VOLUME = "35", YEAR = "2018", NUMBER = "1", MONTH = "January", PAGES = "178", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118777"} @article{bb122674, AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.", TITLE = "GLA: Global-Local Attention for Image Description", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "3", MONTH = "March", PAGES = "726-737", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118778"} @article{bb122675, AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.", TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation", JOURNAL = GeoRS, VOLUME = "56", YEAR = "2018", NUMBER = "4", MONTH = "April", PAGES = "2183-2195", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118779"} @article{bb122676, AUTHOR = "Wu, C.L. and Wei, Y.W. and Chu, X.L. and Su, F. and Wang, L.Q.", TITLE = "Modeling visual and word-conditional semantic attention for image captioning", JOURNAL = SP:IC, VOLUME = "67", YEAR = "2018", PAGES = "100-107", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118780"} @article{bb122677, AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.", TITLE = "More is Better: Precise and Detailed Image Captioning Using Online Positive Recall and Missing Concepts Mining", JOURNAL = IP, VOLUME = "28", YEAR = "2019", NUMBER = "1", MONTH = "January", PAGES = "32-44", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118781"} @article{bb122678, AUTHOR = "Gella, S. and Keller, F. and Lapata, M.", TITLE = "Disambiguating Visual Verbs", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "2", MONTH = "February", PAGES = "311-322", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118782"} @article{bb122679, AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.", TITLE = "Scene graph captioner: Image captioning based on structural visual representation", JOURNAL = JVCIR, VOLUME = "58", YEAR = "2019", PAGES = "477-485", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118783"} @article{bb122680, AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.", TITLE = "Image Caption Generation with Part of Speech Guidance", JOURNAL = PRL, VOLUME = "119", YEAR = "2019", PAGES = "229-237", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118784"} @article{bb122681, AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.", TITLE = "Dense semantic embedding network for image captioning", JOURNAL = PR, VOLUME = "90", YEAR = "2019", PAGES = "285-296", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118785"} @article{bb122682, AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.", TITLE = "Description Generation for Remote Sensing Images Using Attribute Attention Mechanism", JOURNAL = RS, VOLUME = "11", YEAR = "2019", NUMBER = "6", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118786"} @article{bb122683, AUTHOR = "Ding, S.T. and Qu, S. and Xi, Y.L. and Sangaiah, A.K. and Wan, S.H.", TITLE = "Image caption generation with high-level image features", JOURNAL = PRL, VOLUME = "123", YEAR = "2019", PAGES = "89-95", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118787"} @article{bb122684, AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.", TITLE = "A survey on deep neural network-based image captioning", JOURNAL = VC, VOLUME = "35", YEAR = "2019", NUMBER = "3", MONTH = "March", PAGES = "445-470", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118788"} @article{bb122685, AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.", TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning", JOURNAL = Surveys, VOLUME = "51", YEAR = "2019", NUMBER = "6", MONTH = "February", PAGES = "Article No 118", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118789"} @article{bb122686, AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.", TITLE = "High-Quality Image Captioning With Fine-Grained and Semantic-Guided Visual Attention", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "7", MONTH = "July", PAGES = "1681-1693", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118790"} @inproceedings{bb122687, AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.", TITLE = "Fine-Grained and Semantic-Guided Visual Attention for Image Captioning", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1709-1717", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118791"} @article{bb122688, AUTHOR = "Li, X. and Jiang, S.", TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "2117-2130", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118792"} @article{bb122689, AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.", TITLE = "LCEval: Learned Composite Metric for Caption Evaluation", JOURNAL = IJCV, VOLUME = "127", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "1586-1610", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118793"} @article{bb122690, AUTHOR = "Zhang, Z.Y. and Diao, W.H. and Zhang, W.K. and Yan, M.L. and Gao, X. and Sun, X.", TITLE = "LAM: Remote Sensing Image Captioning with Label-Attention Mechanism", JOURNAL = RS, VOLUME = "11", YEAR = "2019", NUMBER = "20", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118794"} @article{bb122691, AUTHOR = "Fu, K. and Li, Y. and Zhang, W.K. and Yu, H.F. and Sun, X.", TITLE = "Boosting Memory with a Persistent Memory Mechanism for Remote Sensing Image Captioning", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "11", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118795"} @article{bb122692, AUTHOR = "Tan, J.H. and Chan, C.S. and Chuah, J.H.", TITLE = "COMIC: Toward A Compact Image Captioning Model With Attention", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "2686-2696", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118796"} @article{bb122693, AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.", TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase Learning", JOURNAL = IP, VOLUME = "29", YEAR = "2020", NUMBER = "1", PAGES = "694-709", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118797"} @article{bb122694, AUTHOR = "Yang, L. and Hu, H.F.", TITLE = "Visual Skeleton and Reparative Attention for Part-of-Speech image captioning system", JOURNAL = CVIU, VOLUME = "189", YEAR = "2019", PAGES = "102819", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118798"} @article{bb122695, AUTHOR = "Wang, J.B. and Wang, W. and Wang, L. and Wang, Z.Y. and Feng, D.D. and Tan, T.N.", TITLE = "Learning Visual Relationship and Context-Aware Attention for Image Captioning", JOURNAL = PR, VOLUME = "98", YEAR = "2020", PAGES = "107075", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118799"} @article{bb122696, AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.", TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "11", MONTH = "November", PAGES = "2942-2956", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118800"} @article{bb122697, AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.", TITLE = "Modeling coverage with semantic embedding for image caption generation", JOURNAL = VC, VOLUME = "35", YEAR = "2018", NUMBER = "11", MONTH = "November", PAGES = "1655-1665", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118801"} @article{bb122698, AUTHOR = "Lu, X. and Wang, B. and Zheng, X.", TITLE = "Sound Active Attention Framework for Remote Sensing Image Captioning", JOURNAL = GeoRS, VOLUME = "58", YEAR = "2020", NUMBER = "3", MONTH = "March", PAGES = "1985-2000", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118802"} @article{bb122699, AUTHOR = "Li, Y.Y. and Fang, S.K. and Jiao, L.C. and Liu, R.J. and Shang, R.H.", TITLE = "A Multi-Level Attention Model for Remote Sensing Image Captions", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "6", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT118803"}