Keith Price Bibliography Bibtex Entry (URL http://dx.doi.org/10.1109/WACV.2018.00190 TYPE CONFERENCE PAGES 1709-1717 MONTH NIL JOURNAL WACV18 ANCHOR 136700 YEAR 2018 BIBSOURCE http://www.visionbib.com/bibliography/match607attic3.html#TT132691 VOLUME NIL AUTHOR Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F. TITLE Fine-Grained and Semantic-Guided Visual Attention for Image Captioning)


@inproceedings{bb136700,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "Fine-Grained and Semantic-Guided Visual Attention for Image
Captioning",
        BOOKTITLE = WACV18,
        YEAR = "2018",
        PAGES = "1709-1717",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132691"}

@article{bb136701,
        AUTHOR = "Tan, J.H. and Chan, C.S. and Chuah, J.H.",
        TITLE = "COMIC: Toward A Compact Image Captioning Model With Attention",
        JOURNAL = MultMed,
        VOLUME = "21",
        YEAR = "2019",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2686-2696",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132692"}

@article{bb136702,
        AUTHOR = "Yang, L. and Hu, H.F.",
        TITLE = "Visual Skeleton and Reparative Attention for Part-of-Speech image
captioning system",
        JOURNAL = CVIU,
        VOLUME = "189",
        YEAR = "2019",
        PAGES = "102819",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132693"}

@article{bb136703,
        AUTHOR = "Wang, J.B. and Wang, W. and Wang, L. and Wang, Z.Y. and Feng, D.D. and Tan, T.N.",
        TITLE = "Learning Visual Relationship and Context-Aware Attention for Image
Captioning",
        JOURNAL = PR,
        VOLUME = "98",
        YEAR = "2020",
        PAGES = "107075",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132694"}

@article{bb136704,
        AUTHOR = "Wei, H.Y. and Li, Z.X. and Zhang, C.L. and Ma, H.F.",
        TITLE = "The synergy of double attention: Combine sentence-level and
word-level attention for image captioning",
        JOURNAL = CVIU,
        VOLUME = "201",
        YEAR = "2020",
        PAGES = "103068",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132695"}

@article{bb136705,
        AUTHOR = "Ji, J.Z. and Du, Z.R. and Zhang, X.D.",
        TITLE = "Divergent-convergent attention for image captioning",
        JOURNAL = PR,
        VOLUME = "115",
        YEAR = "2021",
        PAGES = "107928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132696"}

@article{bb136706,
        AUTHOR = "Wei, Y.W. and Wu, C.L. and Jia, Z.Y. and Hu, X.F. and Guo, S. and Shi, H.T.",
        TITLE = "Past is important: Improved image captioning by looking back in time",
        JOURNAL = SP:IC,
        VOLUME = "94",
        YEAR = "2021",
        PAGES = "116183",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132697"}

@article{bb136707,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "Exploring region relationships implicitly:
Image captioning with visual relationship attention",
        JOURNAL = IVC,
        VOLUME = "109",
        YEAR = "2021",
        PAGES = "104146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132698"}

@article{bb136708,
        AUTHOR = "Zhang, Z.J. and Wu, Q. and Wang, Y. and Chen, F.",
        TITLE = "Exploring Pairwise Relationships Adaptively From Linguistic Context
in Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "3101-3113",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132699"}

@article{bb136709,
        AUTHOR = "Zhong, X. and Nie, G.Z. and Huang, W.X. and Liu, W.X. and Ma, B. and Lin, C.W.",
        TITLE = "Attention-guided image captioning with adaptive global and local
feature fusion",
        JOURNAL = JVCIR,
        VOLUME = "78",
        YEAR = "2021",
        PAGES = "103138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132700"}

@article{bb136710,
        AUTHOR = "Wan, B.Y. and Jiang, W.H. and Fang, Y.M. and Zhu, M.W. and Li, Q. and Liu, Y.",
        TITLE = "Revisiting image captioning via maximum discrepancy competition",
        JOURNAL = PR,
        VOLUME = "122",
        YEAR = "2022",
        PAGES = "108358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132701"}

@article{bb136711,
        AUTHOR = "Chen, T.Y. and Li, Z.X. and Wu, J.L. and Ma, H.F. and Su, B.P.",
        TITLE = "Improving image captioning with Pyramid Attention and SC-GAN",
        JOURNAL = IVC,
        VOLUME = "117",
        YEAR = "2022",
        PAGES = "104340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132702"}

@article{bb136712,
        AUTHOR = "Zhou, Y.J. and Long, J.F. and Xu, S.P. and Shang, L.",
        TITLE = "Attribute-driven image captioning via soft-switch pointer",
        JOURNAL = PRL,
        VOLUME = "152",
        YEAR = "2021",
        PAGES = "34-41",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132703"}

@article{bb136713,
        AUTHOR = "Wang, Q.Z. and Wan, J. and Chan, A.B.",
        TITLE = "On Diversity in Image Captioning: Metrics and Methods",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1035-1049",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132704"}

@article{bb136714,
        AUTHOR = "Wang, J.N. and Xu, W.J. and Wang, Q.Z. and Chan, A.B.",
        TITLE = "On Distinctive Image Captioning via Comparing and Reweighting",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "2088-2103",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132705"}

@article{bb136715,
        AUTHOR = "Wang, J.N. and Xu, W.J. and Wang, Q.Z. and Chan, A.B.",
        TITLE = "Group-Based Distinctive Image Captioning with Memory Difference
Encoding and Attention",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "1435-1455",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132706"}

@inproceedings{bb136716,
        AUTHOR = "Wang, J.N. and Xu, W.J. and Wang, Q.Z. and Chan, A.B.",
        TITLE = "Compare and Reweight:
Distinctive Image Captioning Using Similar Images Sets",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "I:370-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132707"}

@article{bb136717,
        AUTHOR = "Liu, M.F. and Hu, H.J. and Li, L.J. and Yu, Y. and Guan, W.L.",
        TITLE = "Chinese Image Caption Generation via Visual Attention and Topic
Modeling",
        JOURNAL = Cyber,
        VOLUME = "52",
        YEAR = "2022",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1247-1257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132708"}

@article{bb136718,
        AUTHOR = "Li, X. and Zhang, W.K. and Sun, X. and Gao, X.",
        TITLE = "Without detection: Two-step clustering features with local-global
attention for image captioning",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "3",
        PAGES = "280-294",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132709"}

@article{bb136719,
        AUTHOR = "Yu, L.T. and Zhang, J. and Wu, Q.",
        TITLE = "Dual Attention on Pyramid Feature Maps for Image Captioning",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        NUMBER = "2022",
        PAGES = "1775-1786",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132710"}

@article{bb136720,
        AUTHOR = "Shao, X.J. and Xiang, Z.L. and Li, Y.X. and Zhang, M.J.",
        TITLE = "Variational joint self-attention for image captioning",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "8",
        PAGES = "2075-2086",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132711"}

@article{bb136721,
        AUTHOR = "Ma, Y.W. and Ji, J.Y. and Sun, X.S. and Zhou, Y. and Ji, R.R.",
        TITLE = "Towards local visual modeling for image captioning",
        JOURNAL = PR,
        VOLUME = "138",
        YEAR = "2023",
        PAGES = "109420",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132712"}

@article{bb136722,
        AUTHOR = "Barati, A. and Farsi, H. and Mohamadzadeh, S.",
        TITLE = "Integration of the latent variable knowledge into deep image
captioning with Bayesian modeling",
        JOURNAL = IET-IPR,
        VOLUME = "17",
        YEAR = "2023",
        NUMBER = "7",
        PAGES = "2256-2271",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132713"}

@article{bb136723,
        AUTHOR = "Ji, J.Y. and Huang, X.Y. and Sun, X.S. and Zhou, Y. and Luo, G. and Cao, L.J. and Liu, J.Z. and Shao, L. and Ji, R.R.",
        TITLE = "Multi-Branch Distance-Sensitive Self-Attention Network for Image
Captioning",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3962-3974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132714"}

@article{bb136724,
        AUTHOR = "Cornia, M. and Baraldi, L. and Tal, A. and Cucchiara, R.",
        TITLE = "Fully-attentive iterative networks for region-based controllable
image and video captioning",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103857",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132715"}

@article{bb136725,
        AUTHOR = "Song, L.F. and Li, F. and Wang, Y. and Liu, Y. and Wang, Y.H. and Xiang, S.M.",
        TITLE = "Image captioning: Semantic selection unit with stacked residual
attention",
        JOURNAL = IVC,
        VOLUME = "144",
        YEAR = "2024",
        PAGES = "104965",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132716"}

@article{bb136726,
        AUTHOR = "Du, R. and Zhang, W.K. and Li, S. and Chen, J.L. and Guo, Z.",
        TITLE = "Spatial guided image captioning: Guiding attention with object's
spatial interaction",
        JOURNAL = IET-IPR,
        VOLUME = "18",
        YEAR = "2024",
        NUMBER = "12",
        PAGES = "3368-3380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132717"}

@article{bb136727,
        AUTHOR = "Zhang, X.D. and Jia, A. and Ji, J.Z. and Qu, L.Q. and Ye, Q.X.",
        TITLE = "Intra- and Inter-Head Orthogonal Attention for Image Captioning",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "594-607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132718"}

@article{bb136728,
        AUTHOR = "Song, L.F. and Wang, Y. and Shi, L. and Yu, J.Z. and Li, F. and Xiang, S.M.",
        TITLE = "Transformer with token attention and attribute prediction for image
captioning",
        JOURNAL = PRL,
        VOLUME = "188",
        YEAR = "2025",
        PAGES = "74-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132719"}

@article{bb136729,
        AUTHOR = "Parseh, M.J. and Ghadiri, S.",
        TITLE = "Graph-based image captioning with semantic and spatial features",
        JOURNAL = SP:IC,
        VOLUME = "133",
        YEAR = "2025",
        PAGES = "117273",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132720"}

@inproceedings{bb136730,
        AUTHOR = "Sui, J.H. and Yu, H.M. and Liang, X.Y. and Ping, P.",
        TITLE = "Image Caption Method Based on Graph Attention Network with Global
Context",
        BOOKTITLE = ICIVC22,
        YEAR = "2022",
        PAGES = "480-487",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132721"}

@inproceedings{bb136731,
        AUTHOR = "Popattia, M. and Rafi, M. and Qureshi, R. and Nawaz, S.",
        TITLE = "Guiding Attention using Partial-Order Relationships for Image
Captioning",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4670-4679",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132722"}

@inproceedings{bb136732,
        AUTHOR = "Deb, T. and Sadmanee, A. and Bhaumik, K.K. and Ali, A.A. and Amin, M.A. and Rahman, A.K.M.M.",
        TITLE = "Variational Stacked Local Attention Networks for Diverse Video
Captioning",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2493-2502",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132723"}

@inproceedings{bb136733,
        AUTHOR = "Li, Z. and Tran, Q. and Mai, L. and Lin, Z. and Yuille, A.L.",
        TITLE = "Context-Aware Group Captioning via Self-Attention and Contrastive
Features",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "3437-3447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132724"}

@inproceedings{bb136734,
        AUTHOR = "Guo, L. and Liu, J. and Zhu, X. and Yao, P. and Lu, S. and Lu, H.",
        TITLE = "Normalized and Geometry-Aware Self-Attention Network for Image
Captioning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10324-10333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132725"}

@inproceedings{bb136735,
        AUTHOR = "Pan, Y. and Yao, T. and Li, Y. and Mei, T.",
        TITLE = "X-Linear Attention Networks for Image Captioning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10968-10977",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132726"}

@inproceedings{bb136736,
        AUTHOR = "Park, G. and Han, C. and Kim, D. and Yoon, W.J.",
        TITLE = "MHSAN: Multi-Head Self-Attention Network for Visual Semantic
Embedding",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1507-1515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132727"}

@inproceedings{bb136737,
        AUTHOR = "He, S. and Tavakoli, H.R. and Borji, A. and Pugeault, N.",
        TITLE = "Human Attention in Image Captioning: Dataset and Analysis",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "8528-8537",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132728"}

@inproceedings{bb136738,
        AUTHOR = "Huang, L. and Wang, W. and Chen, J. and Wei, X.",
        TITLE = "Attention on Attention for Image Captioning",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4633-4642",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132729"}

@inproceedings{bb136739,
        AUTHOR = "Wei, H.Y. and Li, Z.X. and Zhang, C.L.",
        TITLE = "Image Captioning Based on Visual and Semantic Attention",
        BOOKTITLE = MMMod20,
        YEAR = "2020",
        PAGES = "I:151-162",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132730"}

@inproceedings{bb136740,
        AUTHOR = "Fukui, H. and Hirakawa, T. and Yamashita, T. and Fujiyoshi, H.",
        TITLE = "Attention Branch Network: Learning of Attention Mechanism for Visual
Explanation",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10697-10706",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132731"}

@inproceedings{bb136741,
        AUTHOR = "Huang, Y. and Li, C. and Li, T. and Wan, W. and Chen, J.",
        TITLE = "Image Captioning with Attribute Refinement",
        BOOKTITLE = ICIP19,
        YEAR = "2019",
        PAGES = "1820-1824",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132732"}

@inproceedings{bb136742,
        AUTHOR = "Shi, J. and Li, Y. and Wang, S.",
        TITLE = "Cascade Attention: Multiple Feature Based Learning for Image
Captioning",
        BOOKTITLE = ICIP19,
        YEAR = "2019",
        PAGES = "1970-1974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132733"}

@inproceedings{bb136743,
        AUTHOR = "Xiao, H. and Shi, J.",
        TITLE = "A Novel Attribute Selection Mechanism for Video Captioning",
        BOOKTITLE = ICIP19,
        YEAR = "2019",
        PAGES = "619-623",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132734"}

@inproceedings{bb136744,
        AUTHOR = "Wang, Q.Z. and Chan, A.B.",
        TITLE = "Gated Hierarchical Attention for Image Captioning",
        BOOKTITLE = ACCV18,
        YEAR = "2018",
        PAGES = "IV:21-37",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132735"}

@inproceedings{bb136745,
        AUTHOR = "Wang, W.X. and Chen, Z.H. and Hu, H.F.",
        TITLE = "Multivariate Attention Network for Image Captioning",
        BOOKTITLE = ACCV18,
        YEAR = "2018",
        PAGES = "VI:587-602",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132736"}

@inproceedings{bb136746,
        AUTHOR = "Ghanimifard, M. and Dobnik, S.",
        TITLE = "Knowing When to Look for What and Where: Evaluating Generation of
Spatial Descriptions with Adaptive Attention",
        BOOKTITLE = VL18,
        YEAR = "2018",
        PAGES = "IV:153-161",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132737"}

@inproceedings{bb136747,
        AUTHOR = "Khademi, M. and Schulte, O.",
        TITLE = "Image Caption Generation with Hierarchical Contextual Visual Spatial
Attention",
        BOOKTITLE = Cognitive18,
        YEAR = "2018",
        PAGES = "2024-20248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132738"}

@inproceedings{bb136748,
        AUTHOR = "Wang, F. and Gong, X. and Huang, L.",
        TITLE = "Time-Dependent Pre-attention Model for Image Captioning",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "3297-3302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132739"}

@inproceedings{bb136749,
        AUTHOR = "Chen, S. and Zhao, Q.",
        TITLE = "Boosted Attention: Leveraging Human Attention for Image Captioning",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XI: 72-88",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132740"}

@inproceedings{bb136750,
        AUTHOR = "Fang, F. and Wang, H. and Tang, P.",
        TITLE = "Image Captioning with Word Level Attention",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "1278-1282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132741"}

@inproceedings{bb136751,
        AUTHOR = "Zhu, Z. and Xue, Z. and Yuan, Z.",
        TITLE = "Topic-Guided Attention for Image Captioning",
        BOOKTITLE = ICIP18,
        YEAR = "2018",
        PAGES = "2615-2619",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132742"}

@inproceedings{bb136752,
        AUTHOR = "Pedersoli, M. and Lucas, T. and Schmid, C. and Verbeek, J.",
        TITLE = "Areas of Attention for Image Captioning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "1251-1259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132743"}

@inproceedings{bb136753,
        AUTHOR = "Tavakoliy, H.R. and Shetty, R. and Borji, A. and Laaksonen, J.",
        TITLE = "Paying Attention to Descriptions Generated by Image Captioning Models",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2506-2515",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132744"}

@inproceedings{bb136754,
        AUTHOR = "Lu, J. and Xiong, C. and Parikh, D. and Socher, R.",
        TITLE = "Knowing When to Look: Adaptive Attention via a Visual Sentinel for
Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "3242-3250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132745"}

@inproceedings{bb136755,
        AUTHOR = "Chen, L. and Zhang, H. and Xiao, J. and Nie, L. and Shao, J. and Liu, W. and Chua, T.S.",
        TITLE = "SCA-CNN: Spatial and Channel-Wise Attention in Convolutional Networks
for Image Captioning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "6298-6306",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132746"}

@inproceedings{bb136756,
        AUTHOR = "Zanfir, M. and Marinoiu, E. and Sminchisescu, C.",
        TITLE = "Spatio-Temporal Attention Models for Grounded Video Captioning",
        BOOKTITLE = ACCV16,
        YEAR = "2016",
        PAGES = "IV: 104-119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132747"}

@inproceedings{bb136757,
        AUTHOR = "Chen, T.H. and Zeng, K.H. and Hsu, W.T. and Sun, M.",
        TITLE = "Video Captioning via Sentence Augmentation and Spatio-Temporal
Attention",
        BOOKTITLE = Assist16,
        YEAR = "2016",
        PAGES = "I: 269-286",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132748"}

@inproceedings{bb136758,
        AUTHOR = "Chen, T.L. and Zhang, Z.P. and You, Q.Z. and Fang, C. and Wang, Z.W. and Jin, H.L. and Luo, J.B.",
        TITLE = "'Factual' or 'Emotional':
Stylized Image Captioning with Adaptive Learning and Attention",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "X: 527-543",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132749"}

@inproceedings{bb136759,
        AUTHOR = "You, Q.Z. and Jin, H.L. and Wang, Z.W. and Fang, C. and Luo, J.B.",
        TITLE = "Image Captioning with Semantic Attention",
        BOOKTITLE = CVPR16,
        YEAR = "2016",
        PAGES = "4651-4659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607attic3.html#TT132750"}

@article{bb136760,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X. and Li, X.",
        TITLE = "Exploring Models and Data for Remote Sensing Image Caption Generation",
        JOURNAL = GeoRS,
        VOLUME = "56",
        YEAR = "2018",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2183-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132751"}

@article{bb136761,
        AUTHOR = "Zhang, X.R. and Wang, X. and Tang, X. and Zhou, H.Y. and Li, C.",
        TITLE = "Description Generation for Remote Sensing Images Using Attribute
Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132752"}

@article{bb136762,
        AUTHOR = "Zhang, Z.Y. and Diao, W.H. and Zhang, W.K. and Yan, M.L. and Gao, X. and Sun, X.",
        TITLE = "LAM: Remote Sensing Image Captioning with Label-Attention Mechanism",
        JOURNAL = RS,
        VOLUME = "11",
        YEAR = "2019",
        NUMBER = "20",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132753"}

@article{bb136763,
        AUTHOR = "Fu, K. and Li, Y. and Zhang, W.K. and Yu, H.F. and Sun, X.",
        TITLE = "Boosting Memory with a Persistent Memory Mechanism for Remote Sensing
Image Captioning",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "11",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132754"}

@article{bb136764,
        AUTHOR = "Lu, X. and Wang, B. and Zheng, X.",
        TITLE = "Sound Active Attention Framework for Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "58",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1985-2000",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132755"}

@article{bb136765,
        AUTHOR = "Li, Y.Y. and Fang, S.K. and Jiao, L.C. and Liu, R.J. and Shang, R.H.",
        TITLE = "A Multi-Level Attention Model for Remote Sensing Image Captions",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "6",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132756"}

@article{bb136766,
        AUTHOR = "Li, X.L. and Zhang, X.T. and Huang, W. and Wang, Q.",
        TITLE = "Truncation Cross Entropy Loss for Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "59",
        YEAR = "2021",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "5246-5257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132757"}

@article{bb136767,
        AUTHOR = "Sumbul, G. and Nayak, S. and Demir, B.",
        TITLE = "SD-RSIC: Summarization-Driven Deep Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "59",
        YEAR = "2021",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "6922-6934",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132758"}

@article{bb136768,
        AUTHOR = "Wang, Q. and Huang, W. and Zhang, X.T. and Li, X.L.",
        TITLE = "Word-Sentence Framework for Remote Sensing Image Captioning",
        JOURNAL = GeoRS,
        VOLUME = "59",
        YEAR = "2021",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "10532-10543",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132759"}

@article{bb136769,
        AUTHOR = "Yang, Q.Q. and Ni, Z.H. and Ren, P.",
        TITLE = "Meta captioning:
A meta learning based remote sensing image captioning framework",
        JOURNAL = PandRS,
        VOLUME = "186",
        YEAR = "2022",
        PAGES = "190-200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132760"}

@article{bb136770,
        AUTHOR = "Liu, Z.Y. and Dong, A.M. and Yu, J.G. and Han, Y.B. and Zhou, Y. and Zhao, K.",
        TITLE = "Scene classification for remote sensing images with self-attention
augmented CNN",
        JOURNAL = IET-IPR,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "11",
        PAGES = "3085-3096",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132761"}

@article{bb136771,
        AUTHOR = "Zhou, H.N. and Du, X.P. and Xia, L. and Li, S.",
        TITLE = "Self-Learning for Few-Shot Remote Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "14",
        YEAR = "2022",
        NUMBER = "18",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132762"}

@article{bb136772,
        AUTHOR = "Wang, Q. and Huang, W. and Zhang, X.T. and Li, X.L.",
        TITLE = "GLCM: Global-Local Captioning Model for Remote Sensing Image
Captioning",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "6910-6922",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132763"}

@article{bb136773,
        AUTHOR = "Wang, Q. and Zhou, Q. and Yang, T. and Gao, J.Y. and Ni, W.P. and Wu, J.Z.",
        TITLE = "A benchmark For multi-lingual vision-language learning in remote
sensing image captioning",
        JOURNAL = PR,
        VOLUME = "178",
        YEAR = "2026",
        PAGES = "113399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132764"}

@article{bb136774,
        AUTHOR = "Yang, T. and Zhou, Q. and Wang, Q.",
        TITLE = "DIA: Deriving linguistic information from auxiliary languages for
remote sensing image captioning",
        JOURNAL = PR,
        VOLUME = "171",
        YEAR = "2026",
        PAGES = "112209",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132765"}

@article{bb136775,
        AUTHOR = "Cheng, Q. and Xu, Y.Q. and Huang, Z.Y.",
        TITLE = "VCC-DiffNet: Visual Conditional Control Diffusion Network for Remote
Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "16",
        PAGES = "2961",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132766"}

@article{bb136776,
        AUTHOR = "Li, Y.P. and Zhang, X.R. and Zhang, T.Y. and Wang, G.C. and Wang, X.L. and Li, S.",
        TITLE = "A Patch-Level Region-Aware Module with a Multi-Label Framework for
Remote Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "21",
        PAGES = "3987",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132767"}

@article{bb136777,
        AUTHOR = "Zhang, K. and Li, P. and Wang, J.Q.",
        TITLE = "A Review of Deep Learning-Based Remote Sensing Image Caption:
Methods, Models, Comparisons and Future Directions",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "21",
        PAGES = "4113",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132768"}

@article{bb136778,
        AUTHOR = "Leng, G. and Xiong, Y.J. and Qiu, C.P. and Guo, C.Z.",
        TITLE = "Discrete diffusion models with Refined Language-Image Pre-trained
representations for remote sensing image captioning",
        JOURNAL = PRL,
        VOLUME = "186",
        YEAR = "2024",
        PAGES = "164-169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132769"}

@article{bb136779,
        AUTHOR = "Li, Y.P. and Zhang, X.R. and Wang, G.C. and Zhang, T.Y.",
        TITLE = "Exploring Difference Semantic Prior Guidance for Remote Sensing Image
Change Captioning",
        JOURNAL = RS,
        VOLUME = "18",
        YEAR = "2026",
        NUMBER = "2",
        PAGES = "232",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132770"}

@article{bb136780,
        AUTHOR = "Wang, S.A. and Ye, X.T. and Gu, Y. and Wang, J.H. and Meng, Y. and Tian, J.X. and Hou, B. and Jiao, L.C.",
        TITLE = "Multi-Label Semantic Feature Fusion for Remote Sensing Image
Captioning",
        JOURNAL = PandRS,
        VOLUME = "184",
        YEAR = "2022",
        PAGES = "1-18",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132771"}

@article{bb136781,
        AUTHOR = "Han, X. and Wu, Z.J. and Li, Y.P. and Zhang, X.R. and Wang, G.C. and Hou, B.",
        TITLE = "CSSA: A Cross-Modal Spatial-Semantic Alignment Framework for Remote
Sensing Image Captioning",
        JOURNAL = RS,
        VOLUME = "18",
        YEAR = "2026",
        NUMBER = "3",
        PAGES = "522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132772"}

@article{bb136782,
        AUTHOR = "Zhang, X.R. and Li, Y.P. and Wang, X. and Liu, F.X. and Wu, Z.J. and Cheng, X. and Jiao, L.C.",
        TITLE = "Multi-Source Interactive Stair Attention for Remote Sensing Image
Captioning",
        JOURNAL = RS,
        VOLUME = "15",
        YEAR = "2023",
        NUMBER = "3",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132773"}

@article{bb136783,
        AUTHOR = "Li, Y.P. and Zhang, X.R. and Cheng, X. and Tang, X. and Jiao, L.C.",
        TITLE = "Learning Consensus-Aware Semantic Knowledge for Remote Sensing Image
Captioning",
        JOURNAL = PR,
        VOLUME = "145",
        YEAR = "2024",
        PAGES = "109893",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132774"}

@article{bb136784,
        AUTHOR = "Guo, Z. and Liu, H.M. and Ren, Z. and Jiao, L.C. and Gou, S.P. and Li, R.M.",
        TITLE = "Attribute-Based Learning for Remote Sensing Image Captioning in
Unseen Scenes",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "7",
        PAGES = "1237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132775"}

@article{bb136785,
        AUTHOR = "Mehmood, M. and Shahzad, A. and Hussain, F. and Caceres Najarro, L.A. and Usman, M.",
        TITLE = "Remote Sensing Image Captioning via Self-Supervised DINOv3 and
Transformer Fusion",
        JOURNAL = RS,
        VOLUME = "18",
        YEAR = "2026",
        NUMBER = "6",
        PAGES = "846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132776"}

@article{bb136786,
        AUTHOR = "Zhang, C. and Ren, Z. and Hou, B. and Ning, J.W. and Wang, K. and Li, W.B. and Jiao, L.C.",
        TITLE = "Scale-Aware Prompting With Optimal Transport for Remote Sensing Image
Captioning",
        JOURNAL = IP,
        VOLUME = "35",
        YEAR = "2026",
        PAGES = "4816-4831",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132777"}

@inproceedings{bb136787,
        AUTHOR = "Wei, Y.C. and Li, L. and Geng, S.L.",
        TITLE = "Remote Sensing Image Captioning Using Hire-MLP",
        BOOKTITLE = CVIDL23,
        YEAR = "2023",
        PAGES = "109-112",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132778"}

@inproceedings{bb136788,
        AUTHOR = "Chavhan, R. and Banerjee, B. and Zhu, X.X. and Chaudhuri, S.",
        TITLE = "A Novel Actor Dual-Critic Model for Remote Sensing Image Captioning",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "4918-4925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607rsic2.html#TT132779"}

@article{bb136789,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        JOURNAL = IEICE,
        VOLUME = "E93-D",
        YEAR = "2010",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1727-1736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132780"}

@inproceedings{bb136790,
        AUTHOR = "Kuniyoshi, Y. and Harada, T. and Nakayama, H.",
        TITLE = "Dense Sampling Low-Level Statistics of Local Features",
        BOOKTITLE = CIVR09,
        YEAR = "2009",
        PAGES = "Article No 17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132780"}

@inproceedings{bb136791,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Global Gaussian approach for scene categorization using information
geometry",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "2336-2343",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132781"}

@inproceedings{bb136792,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "AI Goggles: Real-time Description and Retrieval in the Real World with
Online Learning",
        BOOKTITLE = CRV09,
        YEAR = "2009",
        PAGES = "184-191",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132782"}

@inproceedings{bb136793,
        AUTHOR = "Ushiku, Y. and Yamaguchi, M. and Mukuta, Y. and Harada, T.",
        TITLE = "Common Subspace for Model and Similarity:
Phrase Learning for Caption Generation from Images",
        BOOKTITLE = ICCV15,
        YEAR = "2015",
        PAGES = "2668-2676",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132783"}

@inproceedings{bb136794,
        AUTHOR = "Harada, T. and Nakayama, H. and Kuniyoshi, Y.",
        TITLE = "Improving Local Descriptors by Embedding Global and Local Spatial
Information",
        BOOKTITLE = ECCV10,
        YEAR = "2010",
        PAGES = "IV: 736-749",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132784"}

@inproceedings{bb136795,
        AUTHOR = "Nakayama, H. and Harada, T. and Kuniyoshi, Y.",
        TITLE = "Evaluation of dimensionality reduction methods for image
auto-annotation",
        BOOKTITLE = BMVC10,
        YEAR = "2010",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132785"}

@inproceedings{bb136796,
        AUTHOR = "Jin, J. and Nakayama, H.",
        TITLE = "Annotation order matters:
Recurrent Image Annotator for arbitrary length image tagging",
        BOOKTITLE = ICPR16,
        YEAR = "2016",
        PAGES = "2452-2457",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132786"}

@article{bb136797,
        AUTHOR = "Tariq, A. and Foroosh, H.",
        TITLE = "A Context-Driven Extractive Framework for Generating Realistic Image
Descriptions",
        JOURNAL = IP,
        VOLUME = "26",
        YEAR = "2017",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "619-632",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132787"}

@article{bb136798,
        AUTHOR = "Cheng, Q. and Zhang, Q. and Fu, P. and Tu, C.H. and Li, S.",
        TITLE = "A survey and analysis on automatic image annotation",
        JOURNAL = PR,
        VOLUME = "79",
        YEAR = "2018",
        PAGES = "242-259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132788"}

@article{bb136799,
        AUTHOR = "Ben Rejeb, I. and Ouni, S. and Barhoumi, W. and Zagrouba, E.",
        TITLE = "Fuzzy VA-Files for multi-label image annotation based on visual content
of regions",
        JOURNAL = SIViP,
        VOLUME = "12",
        YEAR = "2018",
        NUMBER = "5",
        MONTH = "July",
        PAGES = "877-884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/match607ian2.html#TT132789"}
Last update:Jun 4, 2026 at 16:38:45