@inproceedings{bb127900, AUTHOR = "Fu, J. and Zheng, H. and Mei, T.", TITLE = "Look Closer to See Better: Recurrent Attention Convolutional Neural Network for Fine-Grained Image Recognition", BOOKTITLE = CVPR17, YEAR = "2017", PAGES = "4476-4484", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123957"} @inproceedings{bb127901, AUTHOR = "Ge, Z.Y. and McCool, C. and Sanderson, C. and Wang, P. and Liu, L.Q. and Reid, I.D. and Corke, P.", TITLE = "Exploiting Temporal Information for DCNN-Based Fine-Grained Object Classification", BOOKTITLE = DICTA16, YEAR = "2016", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123958"} @inproceedings{bb127902, AUTHOR = "Ai, S.S. and Jia, C.Y. and Chen, Z.N.", TITLE = "Large-Scale Product Classification via Spatial Attention Based CNN Learning and Multi-class Regression", BOOKTITLE = MMMod17, YEAR = "2017", PAGES = "I: 176-188", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123959"} @inproceedings{bb127903, AUTHOR = "Diba, A. and Pazandeh, A.M. and Pirsiavash, H. and Van Gool, L.J.", TITLE = "DeepCAMP: Deep Convolutional Action Attribute Mid-Level Patterns", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "3557-3565", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123960"} @inproceedings{bb127904, AUTHOR = "Zhang, H. and Xu, T. and Elhoseiny, M. and Huang, X.L. and Zhang, S.T. and Elgammal, A.E. and Metaxas, D.N.", TITLE = "SPDA-CNN: Unifying Semantic Part Detection and Abstraction for Fine-Grained Recognition", BOOKTITLE = CVPR16, YEAR = "2016", PAGES = "1143-1152", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123961"} @inproceedings{bb127905, AUTHOR = "Chevalier, M. and Thome, N. and Cord, M. and Fournier, J. and Henaff, G. and Dusch, E.", TITLE = "LR-CNN for fine-grained classification with varying resolution", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "3101-3105", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123962"} @inproceedings{bb127906, AUTHOR = "Ge, Z. and Bewley, A. and McCool, C. and Corke, P. and Upcroft, B. and Sanderson, C.", TITLE = "Fine-grained classification via mixture of deep convolutional neural networks", BOOKTITLE = WACV16, YEAR = "2016", PAGES = "1-6", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123963"} @inproceedings{bb127907, AUTHOR = "Ge, Z. and McCool, C. and Sanderson, C. and Corke, P.", TITLE = "Modelling local deep convolutional neural network features to improve fine-grained image classification", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "4112-4116", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123964"} @inproceedings{bb127908, AUTHOR = "Zhang, N. and Donahue, J. and Girshick, R. and Darrell, T.J.", TITLE = "Part-Based R-CNNs for Fine-Grained Category Detection", BOOKTITLE = ECCV14, YEAR = "2014", PAGES = "I: 834-849", BIBSOURCE = "http://www.visionbib.com/bibliography/match605cnnfg2.html#TT123965"} @article{bb127909, AUTHOR = "Taylor, S.L. and Dahl, D.A. and Lipshutz, M. and Weir, C. and Norton, L.M. and Nilson, R.W. and Linebarger, M.C.", TITLE = "Integrating Natural-Language Understanding with Document Structure-Analysis", JOURNAL = AIR, VOLUME = "8", YEAR = "1994", NUMBER = "2-3", PAGES = "255-276", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123966"} @article{bb127910, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Theory", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "247-250", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123967"} @book{bb127911, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Theory", PUBLISHER = "Springer", YEAR = "1995", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123967"} @article{bb127912, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Intelligent Multimedia", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "2-3", MONTH = "June", PAGES = "77-80", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123968"} @article{bb127913, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: More Computational Models and Systems", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "345-348", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123969"} @article{bb127914, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Computational Models and Systems", JOURNAL = AIR, VOLUME = "8", YEAR = "1994", NUMBER = "2-3", PAGES = "99-104", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123970"} @book{bb127915, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Computational Models and Systems", PUBLISHER = "Kluwer", YEAR = "1995", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123970"} @article{bb127916, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural-Language and Vision Processing: Grounding Representations", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "7-13", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123971"} @book{bb127917, AUTHOR = "McKevitt, P.", TITLE = "Integration of Natural Language and Vision Processing", PUBLISHER = "Kluwer", YEAR = "1996", MONTH = "September", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123972"} @article{bb127918, AUTHOR = "Siskind, J.M.", TITLE = "Grounding Language in Perception", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "371-391", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123973"} @article{bb127919, AUTHOR = "Srihari, R.K.", TITLE = "Automatic-Indexing and Content-Based Retrieval of Captioned Images", JOURNAL = Computer, VOLUME = "28", YEAR = "1995", NUMBER = "9", MONTH = "September", PAGES = "49-56", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123974"} @article{bb127920, AUTHOR = "Srihari, R.K.", TITLE = "Use of Captions and Other Collateral Text in Understanding Photographs", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "409-430", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123975"} @article{bb127921, AUTHOR = "Srihari, R.K.", TITLE = "Use of Collateral Text in Understanding Photos in Documents", JOURNAL = SPIE, VOLUME = "2368", YEAR = "1994", PAGES = "186-199", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123976"} @article{bb127922, AUTHOR = "Srihari, R.K.", TITLE = "Computational Models for Integrating Linguistic and Visual Information: A Survey", JOURNAL = AIR, VOLUME = "8", YEAR = "1995", NUMBER = "5-6", PAGES = "349-369", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123977"} @inproceedings{bb127923, AUTHOR = "Srihari, R.K.", TITLE = "Using Linguistic Context for Image Interpretation and Annotation", BOOKTITLE = "Radius97", YEAR = "1997", PAGES = "419-427", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123978"} @inproceedings{bb127924, AUTHOR = "Srihari, R.K.", TITLE = "Linguistic Context In Vision", BOOKTITLE = Context95, YEAR = "1995", PAGES = "xx", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123979"} @inproceedings{bb127925, AUTHOR = "Srihari, R.K. and Zhang, Z. and Venkatraman, M. and Chopra, R.", TITLE = "Using Speech Input for Image Interpretation and Annotation", BOOKTITLE = ARPA96, YEAR = "1996", PAGES = "501-510", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123980"} @inproceedings{bb127926, AUTHOR = "Burhans, D.T. and Chopra, R. and Srihari, R.K. and Govindaraju, V. and Venkataraman, M.", TITLE = "Use of Collateral Text in Image Interpretation", BOOKTITLE = ARPA94, YEAR = "1994", PAGES = "II:897-907", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123981"} @inproceedings{bb127927, AUTHOR = "Srihari, R.K. and Burhans, D.T.", TITLE = "Visual Semantics: Extracting Visual Information from Text Accompanying Pictures", BOOKTITLE = AAAI-94, YEAR = "1994", PAGES = "793-798", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123982"} @inproceedings{bb127928, AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.", TITLE = "Caption-Aided Face Location In Newspaper Photographs", BOOKTITLE = ICPR92, YEAR = "1992", PAGES = "I:474-477", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123983"} @inproceedings{bb127929, AUTHOR = "Govindaraju, V. and Srihari, S.N. and Sher, D.B.", TITLE = "A Computational Model for Face Location Based on Cognitive Principles", BOOKTITLE = AAAI-92, YEAR = "1992", PAGES = "350-355", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123984"} @article{bb127930, AUTHOR = "Schank, R.C. and Fano, A.", TITLE = "Memory and Expectations in Learning, Language, and Visual Understanding", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "261-271", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123985"} @article{bb127931, AUTHOR = "Wilks, Y.", TITLE = "Language, Vision and Metaphor", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "273-289", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123986"} @article{bb127932, AUTHOR = "Partridge, D.", TITLE = "Language and Vision: A Single Perceptual Mechanism", JOURNAL = AIR, VOLUME = "9", YEAR = "1995", NUMBER = "4-5", MONTH = "October", PAGES = "291-303", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123987"} @article{bb127933, AUTHOR = "Marconi, D.", TITLE = "Work on the Integration of Language and Vision at the University of Torino", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "15-20", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123988"} @article{bb127934, AUTHOR = "Meini, C. and Paternoster, A.", TITLE = "Understanding Language Through Vision", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "37-48", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123989"} @article{bb127935, AUTHOR = "McKevitt, P. and Guo, C.M.", TITLE = "From Chinese Rooms to Irish Rooms: New Words on Visions for Language", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "49-63", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123990"} @article{bb127936, AUTHOR = "Grumbach, A.", TITLE = "Grounding Symbols into Perceptions", JOURNAL = AIR, VOLUME = "10", YEAR = "1996", NUMBER = "1-2", MONTH = "April", PAGES = "131-146", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123991"} @article{bb127937, AUTHOR = "Socher, G. and Sagerer, G.F. and Perona, P.", TITLE = "Bayesian reasoning on qualitative descriptions from images and speech", JOURNAL = IVC, VOLUME = "18", YEAR = "2000", NUMBER = "2", MONTH = "January", PAGES = "155-172", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123992"} @article{bb127938, AUTHOR = "Mukerjee, A. and Gupta, K. and Nautiyal, S. and Singh, M.P. and Mishra, N.", TITLE = "Conceptual description of visual scenes from linguistic models", JOURNAL = IVC, VOLUME = "18", YEAR = "2000", NUMBER = "2", MONTH = "January", PAGES = "173-187", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123993"} @article{bb127939, AUTHOR = "Arens, M. and Gerber, R. and Nagel, H.H.", TITLE = "Conceptual representations between video signals and natural language descriptions", JOURNAL = IVC, VOLUME = "26", YEAR = "2008", NUMBER = "1", MONTH = "January", PAGES = "53-66", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123994"} @inproceedings{bb127940, AUTHOR = "Gerber, R. and Nagel, H.H.", TITLE = "(Mis?-) Using DRT for Generation of Natural Language Text from Image Sequences", BOOKTITLE = ECCV98, YEAR = "1998", PAGES = "II: 255", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123995"} @article{bb127941, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Object-Graphs for Context-Aware Visual Category Discovery", JOURNAL = PAMI, VOLUME = "34", YEAR = "2012", NUMBER = "2", MONTH = "February", PAGES = "346-358", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123996"} @inproceedings{bb127942, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Object-graphs for context-aware category discovery", BOOKTITLE = CVPR10, YEAR = "2010", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123997"} @inproceedings{bb127943, AUTHOR = "Lee, Y.J. and Grauman, K.", TITLE = "Learning the easy things first: Self-paced visual category discovery", BOOKTITLE = CVPR11, YEAR = "2011", PAGES = "1721-1728", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123998"} @article{bb127944, AUTHOR = "Yu, A. and Grauman, K.", TITLE = "Densifying Supervision for Fine-Grained Visual Comparisons", JOURNAL = IJCV, VOLUME = "128", YEAR = "2020", NUMBER = "10-11", MONTH = "November", PAGES = "2704-2730", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT123999"} @inproceedings{bb127945, AUTHOR = "Hessel, J. and Hwang, J.D. and Park, J.S. and Zellers, R. and Bhagavatula, C. and Rohrbach, A. and Saenko, K. and Choi, Y.", TITLE = "The Abduction of Sherlock Holmes: A Dataset for Visual Abductive Reasoning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:558-575", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124000"} @inproceedings{bb127946, AUTHOR = "Perona, P.", TITLE = "A taxonomy of visual recognition", BOOKTITLE = VMV04, YEAR = "2004", PAGES = "187", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124001"} @inproceedings{bb127947, AUTHOR = "Takahashi, T. and Nakanishi, S. and Kuno, Y. and Shirai, Y.", TITLE = "Helping Computer Vision by Verbal and Nonverbal Communication", BOOKTITLE = ICPR98, YEAR = "1998", PAGES = "Vol II: 1216-1218", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124002"} @inproceedings{bb127948, AUTHOR = "Satoh, S. and Nakamura, Y. and Kanade, T.", TITLE = "Name-It: Naming and Detecting Faces in Video by the Integration of Image and Natural Language Processing", BOOKTITLE = IJCAI97, YEAR = "1997", PAGES = "1488-1495", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124003"} @inproceedings{bb127949, AUTHOR = "Satoh, S. and Kanade, T.", TITLE = "Name-It: Association Of Face And Name In Video", BOOKTITLE = CVPR97, YEAR = "1997", PAGES = "368-373", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124004"} @inproceedings{bb127950, AUTHOR = "Socher, G. and Sagerer, G.F. and Kummert, F. and Fuhr, T.", TITLE = "Talking About 3D Scenes: Integration of Image and Speech Understanding in a Hybrid Distributed System", BOOKTITLE = ICIP96, YEAR = "1996", PAGES = "II: 809-812", BIBSOURCE = "http://www.visionbib.com/bibliography/match605.html#TT124005"} @article{bb127951, AUTHOR = "Kuniyoshi, Y. and Inaba, M. and Inoue, H.", TITLE = "Learning by Watching: Extracting Reusable Task Knowledge from Visual Observation of Human Performance", JOURNAL = RA, VOLUME = "10", YEAR = "1994", PAGES = "799-822", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124006"} @inproceedings{bb127952, AUTHOR = "Kuniyoshi, Y. and Inoue, H.", TITLE = "Indexicality and dynamic attention control in qualitative recognition of assembly actions", BOOKTITLE = ECCV92, YEAR = "1992", PAGES = "874-878", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124007"} @article{bb127953, AUTHOR = "Porway, J. and Wang, Q.C. and Zhu, S.C.", TITLE = "A Hierarchical and Contextual Model for Aerial Image Parsing", JOURNAL = IJCV, VOLUME = "88", YEAR = "2010", NUMBER = "2", MONTH = "June", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124008"} @inproceedings{bb127954, AUTHOR = "Porway, J. and Wang, K. and Yao, B. and Zhu, S.C.", TITLE = "A hierarchical and contextual model for aerial image understanding", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124009"} @inproceedings{bb127955, AUTHOR = "Si, Z.Z. and Gong, H.F. and Wu, Y.N. and Zhu, S.C.", TITLE = "Learning mixed templates for object recognition", BOOKTITLE = CVPR09, YEAR = "2009", PAGES = "272-279", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124010"} @article{bb127956, AUTHOR = "Tu, Z.W. and Bai, X.", TITLE = "Auto-Context and Its Application to High-Level Vision Tasks and 3D Brain Image Segmentation", JOURNAL = PAMI, VOLUME = "32", YEAR = "2010", NUMBER = "10", MONTH = "October", PAGES = "1744-1757", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124011"} @inproceedings{bb127957, AUTHOR = "Tu, Z.W.", TITLE = "Auto-context and its application to high-level vision tasks", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124012"} @inproceedings{bb127958, AUTHOR = "Jones, J. and Hager, G.D. and Khudanpur, S.", TITLE = "Toward Computer Vision Systems That Understand Real-World Assembly Processes", BOOKTITLE = WACV19, YEAR = "2019", PAGES = "426-434", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124013"} @inproceedings{bb127959, AUTHOR = "Lampert, C.H.", TITLE = "Partitioning of image datasets using discriminative context information", BOOKTITLE = CVPR08, YEAR = "2008", PAGES = "1-8", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124014"} @inproceedings{bb127960, AUTHOR = "Hansen, C. and Henderson, T.C.", TITLE = "Towards the Automatic Generation of Recognition Strategies", BOOKTITLE = ICCV88, YEAR = "1988", PAGES = "275-279", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124015"} @inproceedings{bb127961, AUTHOR = "Cantoni, V. and Cei, U. and Ferretti, M. and Lombardi, L.", TITLE = "Towards an Automatic Construction of Object Recognition Strategies", BOOKTITLE = ICPR88, YEAR = "1988", PAGES = "I: 371-374", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124016"} @inproceedings{bb127962, AUTHOR = "Cova, G. and Griffini, A. and Lombardi, L.", TITLE = "Object Recognition Strategy in a Multi-Resolution System", BOOKTITLE = CIAP89, YEAR = "1989", PAGES = "729-733", BIBSOURCE = "http://www.visionbib.com/bibliography/match606.html#TT124017"} @article{bb127963, AUTHOR = "Feng, Y.S. and Lapata, M.", TITLE = "Automatic Caption Generation for News Images", JOURNAL = PAMI, VOLUME = "35", YEAR = "2013", NUMBER = "4", MONTH = "April", PAGES = "797-812", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124018"} @article{bb127964, AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.", TITLE = "Show and Tell: Lessons Learned from the 2015 MSCOCO Image Captioning Challenge", JOURNAL = PAMI, VOLUME = "39", YEAR = "2017", NUMBER = "4", MONTH = "April", PAGES = "652-663", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124019"} @inproceedings{bb127965, AUTHOR = "Vinyals, O. and Toshev, A. and Bengio, S. and Erhan, D.", TITLE = "Show and tell: A neural image caption generator", BOOKTITLE = CVPR15, YEAR = "2015", PAGES = "3156-3164", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124020"} @article{bb127966, AUTHOR = "Wang, J.Y. and Zhu, X.T. and Gong, S.G.", TITLE = "Discovering visual concept structure with sparse and incomplete tags", JOURNAL = AI, VOLUME = "250", YEAR = "2017", NUMBER = "1", PAGES = "16-36", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124021"} @article{bb127967, AUTHOR = "Kilickaya, M. and Akkus, B.K. and Cakici, R. and Erdem, A. and Erdem, E. and Ikizler Cinbis, N.", TITLE = "Data-driven image captioning via salient region discovery", JOURNAL = IET-CV, VOLUME = "11", YEAR = "2017", NUMBER = "6", MONTH = "September", PAGES = "398-406", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124022"} @article{bb127968, AUTHOR = "He, X.D. and Deng, L.", TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview", JOURNAL = SPMag, VOLUME = "34", YEAR = "2017", NUMBER = "6", MONTH = "November", PAGES = "109-116", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124023"} @article{bb127969, AUTHOR = "Deng, L. and He, X.D.", TITLE = "Deep Learning for Image-to-Text Generation: A Technical Overview", JOURNAL = SPMag, VOLUME = "35", YEAR = "2018", NUMBER = "1", MONTH = "January", PAGES = "178", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124023"} @article{bb127970, AUTHOR = "Li, L.H. and Tang, S. and Zhang, Y.D. and Deng, L.X. and Tian, Q.", TITLE = "GLA: Global-Local Attention for Image Description", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "3", MONTH = "March", PAGES = "726-737", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124024"} @article{bb127971, AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.", TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation", JOURNAL = GeoRS, VOLUME = "56", YEAR = "2018", NUMBER = "4", MONTH = "April", PAGES = "2183-2195", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124025"} @article{bb127972, AUTHOR = "Wu, C.L. and Wei, Y.W. and Chu, X.L. and Su, F. and Wang, L.Q.", TITLE = "Modeling visual and word-conditional semantic attention for image captioning", JOURNAL = SP:IC, VOLUME = "67", YEAR = "2018", PAGES = "100-107", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124026"} @article{bb127973, AUTHOR = "Zhang, M. and Yang, Y. and Zhang, H. and Ji, Y. and Shen, H.T. and Chua, T.", TITLE = "More is Better: Precise and Detailed Image Captioning Using Online Positive Recall and Missing Concepts Mining", JOURNAL = IP, VOLUME = "28", YEAR = "2019", NUMBER = "1", MONTH = "January", PAGES = "32-44", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124027"} @article{bb127974, AUTHOR = "Gella, S. and Keller, F. and Lapata, M.", TITLE = "Disambiguating Visual Verbs", JOURNAL = PAMI, VOLUME = "41", YEAR = "2019", NUMBER = "2", MONTH = "February", PAGES = "311-322", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124028"} @article{bb127975, AUTHOR = "Xu, N. and Liu, A.A. and Liu, J. and Nie, W.Z. and Su, Y.T.", TITLE = "Scene graph captioner: Image captioning based on structural visual representation", JOURNAL = JVCIR, VOLUME = "58", YEAR = "2019", PAGES = "477-485", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124029"} @article{bb127976, AUTHOR = "He, X.W. and Shi, B.G. and Bai, X. and Xia, G.S. and Zhang, Z.X. and Dong, W.S.", TITLE = "Image Caption Generation with Part of Speech Guidance", JOURNAL = PRL, VOLUME = "119", YEAR = "2019", PAGES = "229-237", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124030"} @article{bb127977, AUTHOR = "Xiao, X.Y. and Wang, L.F. and Ding, K. and Xiang, S.M. and Pan, C.H.", TITLE = "Dense semantic embedding network for image captioning", JOURNAL = PR, VOLUME = "90", YEAR = "2019", PAGES = "285-296", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124031"} @article{bb127978, AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.", TITLE = "Description Generation for Remote Sensing Images Using Attribute Attention Mechanism", JOURNAL = RS, VOLUME = "11", YEAR = "2019", NUMBER = "6", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124032"} @article{bb127979, AUTHOR = "Ding, S.T. and Qu, S. and Xi, Y.L. and Sangaiah, A.K. and Wan, S.H.", TITLE = "Image caption generation with high-level image features", JOURNAL = PRL, VOLUME = "123", YEAR = "2019", PAGES = "89-95", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124033"} @article{bb127980, AUTHOR = "Liu, X.X. and Xu, Q.Y. and Wang, N.", TITLE = "A survey on deep neural network-based image captioning", JOURNAL = VC, VOLUME = "35", YEAR = "2019", NUMBER = "3", MONTH = "March", PAGES = "445-470", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124034"} @article{bb127981, AUTHOR = "Hossain, M.Z. and Sohel, F. and Shiratuddin, M.F. and Laga, H.", TITLE = "A Comprehensive Survey of Deep Learning for Image Captioning", JOURNAL = Surveys, VOLUME = "51", YEAR = "2019", NUMBER = "6", MONTH = "February", PAGES = "Article No 118", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124035"} @article{bb127982, AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.", TITLE = "High-Quality Image Captioning With Fine-Grained and Semantic-Guided Visual Attention", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "7", MONTH = "July", PAGES = "1681-1693", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124036"} @inproceedings{bb127983, AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.", TITLE = "Fine-Grained and Semantic-Guided Visual Attention for Image Captioning", BOOKTITLE = WACV18, YEAR = "2018", PAGES = "1709-1717", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124037"} @article{bb127984, AUTHOR = "Li, X. and Jiang, S.", TITLE = "Know More Say Less: Image Captioning Based on Scene Graphs", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "8", MONTH = "August", PAGES = "2117-2130", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124038"} @article{bb127985, AUTHOR = "Sharif, N. and White, L. and Bennamoun, M. and Liu, W. and Shah, S.A.A.", TITLE = "LCEval: Learned Composite Metric for Caption Evaluation", JOURNAL = IJCV, VOLUME = "127", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "1586-1610", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124039"} @article{bb127986, AUTHOR = "Zhang, Z.Y. and Diao, W.H. and Zhang, W.K. and Yan, M.L. and Gao, X. and Sun, X.", TITLE = "LAM: Remote Sensing Image Captioning with Label-Attention Mechanism", JOURNAL = RS, VOLUME = "11", YEAR = "2019", NUMBER = "20", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124040"} @article{bb127987, AUTHOR = "Fu, K. and Li, Y. and Zhang, W.K. and Yu, H.F. and Sun, X.", TITLE = "Boosting Memory with a Persistent Memory Mechanism for Remote Sensing Image Captioning", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "11", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124041"} @article{bb127988, AUTHOR = "Tan, J.H. and Chan, C.S. and Chuah, J.H.", TITLE = "COMIC: Toward A Compact Image Captioning Model With Attention", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "10", MONTH = "October", PAGES = "2686-2696", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124042"} @article{bb127989, AUTHOR = "Zhou, L. and Zhang, Y. and Jiang, Y. and Zhang, T. and Fan, W.", TITLE = "Re-Caption: Saliency-Enhanced Image Captioning Through Two-Phase Learning", JOURNAL = IP, VOLUME = "29", YEAR = "2020", NUMBER = "1", PAGES = "694-709", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124043"} @article{bb127990, AUTHOR = "Yang, L. and Hu, H.F.", TITLE = "Visual Skeleton and Reparative Attention for Part-of-Speech image captioning system", JOURNAL = CVIU, VOLUME = "189", YEAR = "2019", PAGES = "102819", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124044"} @article{bb127991, AUTHOR = "Wang, J.B. and Wang, W. and Wang, L. and Wang, Z.Y. and Feng, D.D. and Tan, T.N.", TITLE = "Learning Visual Relationship and Context-Aware Attention for Image Captioning", JOURNAL = PR, VOLUME = "98", YEAR = "2020", PAGES = "107075", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124045"} @article{bb127992, AUTHOR = "Xiao, X. and Wang, L. and Ding, K. and Xiang, S. and Pan, C.", TITLE = "Deep Hierarchical Encoder-Decoder Network for Image Captioning", JOURNAL = MultMed, VOLUME = "21", YEAR = "2019", NUMBER = "11", MONTH = "November", PAGES = "2942-2956", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124046"} @article{bb127993, AUTHOR = "Jiang, T. and Zhang, Z. and Yang, Y.", TITLE = "Modeling coverage with semantic embedding for image caption generation", JOURNAL = VC, VOLUME = "35", YEAR = "2018", NUMBER = "11", MONTH = "November", PAGES = "1655-1665", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124047"} @article{bb127994, AUTHOR = "Lu, X. and Wang, B. and Zheng, X.", TITLE = "Sound Active Attention Framework for Remote Sensing Image Captioning", JOURNAL = GeoRS, VOLUME = "58", YEAR = "2020", NUMBER = "3", MONTH = "March", PAGES = "1985-2000", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124048"} @article{bb127995, AUTHOR = "Li, Y.Y. and Fang, S.K. and Jiao, L.C. and Liu, R.J. and Shang, R.H.", TITLE = "A Multi-Level Attention Model for Remote Sensing Image Captions", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "6", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124049"} @article{bb127996, AUTHOR = "Chen, X.H. and Zhang, M.X. and Wang, Z. and Zuo, L. and Li, B. and Yang, Y.", TITLE = "Leveraging unpaired out-of-domain data for image captioning", JOURNAL = PRL, VOLUME = "132", YEAR = "2020", PAGES = "132-140", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124050"} @article{bb127997, AUTHOR = "Xu, N. and Zhang, H. and Liu, A. and Nie, W. and Su, Y. and Nie, J. and Zhang, Y.", TITLE = "Multi-Level Policy and Reward-Based Deep Reinforcement Learning Framework for Image Captioning", JOURNAL = MultMed, VOLUME = "22", YEAR = "2020", NUMBER = "5", MONTH = "May", PAGES = "1372-1383", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124051"} @article{bb127998, AUTHOR = "Guo, L. and Liu, J. and Lu, S. and Lu, H.", TITLE = "Show, Tell, and Polish: Ruminant Decoding for Image Captioning", JOURNAL = MultMed, VOLUME = "22", YEAR = "2020", NUMBER = "8", MONTH = "August", PAGES = "2149-2162", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124052"} @article{bb127999, AUTHOR = "Feng, Q. and Wu, Y. and Fan, H. and Yan, C. and Xu, M. and Yang, Y.", TITLE = "Cascaded Revision Network for Novel Object Captioning", JOURNAL = CirSysVideo, VOLUME = "30", YEAR = "2020", NUMBER = "10", MONTH = "October", PAGES = "3413-3421", BIBSOURCE = "http://www.visionbib.com/bibliography/match607ic1.html#TT124053"}