@inproceedings{bb241700,
        AUTHOR = "Tripathi, A. and Mishra, A. and Chakraborty, A.",
        TITLE = "Grounding Scene Graphs on Natural Images via Visio-Lingual Message
Passing",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "4380-4389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236616"}

@inproceedings{bb241701,
        AUTHOR = "Byun, J. and Hwang, T. and Fu, J.L. and Moon, T.",
        TITLE = "GRIT-VLP: Grouped Mini-batch Sampling for Efficient Vision and Language
Pre-training",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XIX:395-412",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236617"}

@inproceedings{bb241702,
        AUTHOR = "Yan, S.P. and Hong, L.Q. and Xu, H. and Han, J.H. and Tuytelaars, T. and Li, Z.G. and He, X.M.",
        TITLE = "Generative Negative Text Replay for Continual Vision-Language
Pretraining",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:22-38",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236618"}

@inproceedings{bb241703,
        AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
        TITLE = "New Datasets and Models for Contextual Reasoning in Visual Dialog",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:434-451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236619"}

@inproceedings{bb241704,
        AUTHOR = "Pham, H.A. and Le, T.M. and Le, V. and Phuong, T.M. and Tran, T.",
        TITLE = "Video Dialog as Conversation About Objects Living in Space-Time",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIX:710-726",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236620"}

@inproceedings{bb241705,
        AUTHOR = "Zhang, Z.F. and Jiang, T.L. and Liu, C.P. and Ji, Y.",
        TITLE = "Coupling Attention and Convolution for Heuristic Network in Visual
Dialog",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2896-2900",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236621"}

@inproceedings{bb241706,
        AUTHOR = "Zhang, H.Y. and Li, Y.M. and Zhang, Z.F.",
        TITLE = "Video-Grounded Dialogues with Joint Video and Image Training",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3903-3907",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236622"}

@inproceedings{bb241707,
        AUTHOR = "Zhang, S.Y. and Jiang, X.Z. and Yang, Z.Q. and Wan, T. and Qin, Z.C.",
        TITLE = "Reasoning with Multi-Structure Commonsense Knowledge in Visual Dialog",
        BOOKTITLE = MULA22,
        YEAR = "2022",
        PAGES = "4599-4608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236623"}

@inproceedings{bb241708,
        AUTHOR = "Zhu, Y. and Weng, Y. and Zhu, F.D. and Liang, X.D. and Ye, Q.X. and Lu, Y.T. and Jiao, J.B.",
        TITLE = "Self-Motivated Communication Agent for Real-World Vision-Dialog
Navigation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1574-1583",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236624"}

@inproceedings{bb241709,
        AUTHOR = "Engin, D. and Schnitzler, F. and Duong, N.Q.K. and Avrithis, Y.",
        TITLE = "On the hidden treasure of dialog in video question answering",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2044-2053",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236625"}

@inproceedings{bb241710,
        AUTHOR = "Matsumori, S. and Shingyouchi, K. and Abe, Y. and Fukuchi, Y. and Sugiura, K. and Imai, M.",
        TITLE = "Unified Questioner Transformer for Descriptive Question Generation in
Goal-Oriented Visual Dialogue",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1878-1887",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236626"}

@inproceedings{bb241711,
        AUTHOR = "Tu, T. and Ping, Q. and Thattai, G. and Tur, G. and Natarajan, P.",
        TITLE = "Learning Better Visual Dialog Agents with Pretrained
Visual-Linguistic Representation",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5618-5627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236627"}

@inproceedings{bb241712,
        AUTHOR = "Jiang, T.L. and Ji, Y. and Liu, C.P.",
        TITLE = "Integrating Historical States and Co-attention Mechanism for Visual
Dialog",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "2041-2048",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236628"}

@inproceedings{bb241713,
        AUTHOR = "Nguyen, V.Q. and Suganuma, M. and Okatani, T.",
        TITLE = "Efficient Attention Mechanism for Visual Dialog that Can Handle All the
Interactions Between Multiple Inputs",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIV:223-240",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236629"}

@inproceedings{bb241714,
        AUTHOR = "Murahari, V. and Batra, D. and Parikh, D. and Das, A.",
        TITLE = "Large-scale Pretraining for Visual Dialog:
A Simple State-of-the-art Baseline",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVIII:336-352",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236630"}

@inproceedings{bb241715,
        AUTHOR = "Zhu, Y. and Wu, Y. and Yang, Y. and Yan, Y.",
        TITLE = "Describing Unseen Videos via Multi-Modal Cooperative Dialog Agents",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIII:153-169",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236631"}

@inproceedings{bb241716,
        AUTHOR = "Qi, J. and Niu, Y. and Huang, J. and Zhang, H.",
        TITLE = "Two Causal Principles for Improving Visual Dialog",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10857-10866",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236632"}

@inproceedings{bb241717,
        AUTHOR = "Abbasnejad, E. and Teney, D. and Parvaneh, A. and Shi, J. and van den Hengel, A.J.",
        TITLE = "Counterfactual Vision and Language Learning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10041-10051",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236633"}

@inproceedings{bb241718,
        AUTHOR = "Zhu, Y. and Zhu, F. and Zhan, Z. and Lin, B. and Jiao, J. and Chang, X. and Liang, X.",
        TITLE = "Vision-Dialog Navigation by Exploring Cross-Modal Memory",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "10727-10736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236634"}

@inproceedings{bb241719,
        AUTHOR = "Yang, T. and Zha, Z. and Zhang, H.",
        TITLE = "Making History Matter:
History-Advantage Sequence Training for Visual Dialog",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "2561-2569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236635"}

@inproceedings{bb241720,
        AUTHOR = "Guo, D. and Xu, C. and Tao, D.C.",
        TITLE = "Image-Question-Answer Synergistic Network for Visual Dialog",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10426-10435",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236636"}

@inproceedings{bb241721,
        AUTHOR = "Zheng, Z.L. and Wang, W.G. and Qi, S.Y. and Zhu, S.C.",
        TITLE = "Reasoning Visual Dialogs With Structural and Partial Observations",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6662-6671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236637"}

@inproceedings{bb241722,
        AUTHOR = "Bani, G. and Belli, D. and Dagan, G. and Geenen, A. and Skliar, A. and Venkatesh, A. and Baumgartner, T. and Bruni, E. and Fernandez, R.",
        TITLE = "Adding Object Detection Skills to Visual Dialogue Agents",
        BOOKTITLE = VL18,
        YEAR = "2018",
        PAGES = "IV:180-187",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236638"}

@inproceedings{bb241723,
        AUTHOR = "Yang, M. and Yang, N.S.R. and Zhang, K. and Tao, J.",
        TITLE = "Self-Talk: Responses to Users' Opinions and Challenges in Human
Computer Dialog",
        BOOKTITLE = ICPR18,
        YEAR = "2018",
        PAGES = "2839-2844",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236639"}

@inproceedings{bb241724,
        AUTHOR = "Jain, U. and Schwing, A. and Lazebnik, S.",
        TITLE = "Two Can Play This Game: Visual Dialog with Discriminative Question
Generation and Answering",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "5754-5763",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236640"}

@inproceedings{bb241725,
        AUTHOR = "Dokania, P.K. and Torr, P.H.S. and Siddharth, N. and Massiceti, D.",
        TITLE = "FLIPDIAL: A Generative Model for Two-Way Visual Dialogue",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6097-6105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236641"}

@inproceedings{bb241726,
        AUTHOR = "Wu, Q. and Wang, P. and Shen, C. and Reid, I.D. and van den Hengel, A.J.",
        TITLE = "Are You Talking to Me? Reasoned Visual Dialog Generation Through
Adversarial Learning",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6106-6115",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236642"}

@inproceedings{bb241727,
        AUTHOR = "Kottur, S. and Moura, J.M.F. and Parikh, D. and Batra, D. and Rohrbach, M.",
        TITLE = "Visual Coreference Resolution in Visual Dialog Using Neural Module
Networks",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "XV: 160-178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236643"}

@inproceedings{bb241728,
        AUTHOR = "Strub, F. and Seurin, M. and Perez, E. and de Vries, H. and Mary, J. and Preux, P. and Courville, A. and Pietquin, O.",
        TITLE = "Visual Reasoning with Multi-hop Feature Modulation",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "VI: 808-831",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236644"}

@inproceedings{bb241729,
        AUTHOR = "Das, A. and Kottur, S. and Moura, J.M.F. and Lee, S. and Batra, D.",
        TITLE = "Learning Cooperative Visual Dialog Agents with Deep Reinforcement
Learning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "2970-2979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236645"}

@inproceedings{bb241730,
        AUTHOR = "de Vries, H. and Strub, F. and Chandar, S. and Pietquin, O. and Larochelle, H. and Courville, A.",
        TITLE = "GuessWhat?! Visual Object Discovery through Multi-modal Dialogue",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "4466-4475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236646"}

@inproceedings{bb241731,
        AUTHOR = "Nam, H. and Ha, J.W. and Kim, J.",
        TITLE = "Dual Attention Networks for Multimodal Reasoning and Matching",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "2156-2164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236647"}

@inproceedings{bb241732,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Hoffman, J. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "Inferring and Executing Programs for Visual Reasoning",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "3008-3017",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236648"}

@inproceedings{bb241733,
        AUTHOR = "Johnson, J. and Hariharan, B. and van der Maaten, L. and Fei Fei, L. and Zitnick, C.L. and Girshick, R.",
        TITLE = "CLEVR: A Diagnostic Dataset for Compositional Language and Elementary
Visual Reasoning",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1988-1997",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236649"}

@inproceedings{bb241734,
        AUTHOR = "Das, A. and Kottur, S. and Gupta, K. and Singh, A. and Yadav, D. and Moura, J.M.F. and Parikh, D. and Batra, D.",
        TITLE = "Visual Dialog",
        BOOKTITLE = CVPR17,
        YEAR = "2017",
        PAGES = "1080-1089",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vdi3.html#TT236650"}

@article{bb241735,
        AUTHOR = "Tamaazousti, Y. and Le Borgne, H. and Popescu, A. and Gadeski, E. and Ginsca, A. and Hudelot, C.",
        TITLE = "Vision-language integration using constrained local semantic features",
        JOURNAL = CVIU,
        VOLUME = "163",
        YEAR = "2017",
        NUMBER = "1",
        PAGES = "41-57",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236651"}

@article{bb241736,
        AUTHOR = "Zhu, Y.Q. and Li, X.Y. and Zheng, M. and Yang, J.H. and Wang, Z.H. and Guo, X.Q. and Chai, Z.F. and Yuan, Y.C. and Jiang, S.Q.",
        TITLE = "Focus and Align: Learning Tube Tokens for Video-Language Pre-Training",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "8036-8050",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236652"}

@article{bb241737,
        AUTHOR = "Wu, W.H. and Sun, Z. and Song, Y.X. and Wang, J.D. and Ouyang, W.L.",
        TITLE = "Transferring Vision-Language Models for Visual Recognition:
A Classifier Perspective",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "392-409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236653"}

@article{bb241738,
        AUTHOR = "Ming, Y.F. and Li, Y.X.",
        TITLE = "How Does Fine-Tuning Impact Out-of-Distribution Detection for
Vision-Language Models?",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "596-609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236654"}

@article{bb241739,
        AUTHOR = "Zhao, C.R. and Wang, Y. and Jiang, X.Y. and Shen, Y.F. and Song, K. and Li, D.S. and Miao, D.Q.",
        TITLE = "Learning Domain Invariant Prompt for Vision-Language Models",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "1348-1360",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236655"}

@article{bb241740,
        AUTHOR = "Yang, X.F. and Liu, F. and Lin, G.S.",
        TITLE = "Neural Logic Vision Language Explainer",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3331-3340",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236656"}

@article{bb241741,
        AUTHOR = "Wang, Y.D. and Yu, Z.O. and Wang, J.D. and Heng, Q. and Chen, H. and Ye, W. and Xie, R. and Xie, X. and Zhang, S.K.",
        TITLE = "Exploring Vision-Language Models for Imbalanced Learning",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "224-237",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236657"}

@article{bb241742,
        AUTHOR = "Zeng, Y. and Zhang, X. and Li, H. and Wang, J.W. and Zhang, J.P. and Zhou, W.",
        TITLE = "X2-VLM: All-in-One Pre-Trained Model for Vision-Language Tasks",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "3156-3168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236658"}

@article{bb241743,
        AUTHOR = "Kong, D. and Kong, K. and Kang, S.J.",
        TITLE = "Image clustering using generated text centroids",
        JOURNAL = SP:IC,
        VOLUME = "125",
        YEAR = "2024",
        PAGES = "117128",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236659"}

@article{bb241744,
        AUTHOR = "Chen, X.Y. and Yang, J.H. and Chen, S. and Wang, L. and Jiang, M. and Zhao, Q.",
        TITLE = "Every Problem, Every Step, All in Focus: Learning to Solve
Vision-Language Problems With Integrated Attention",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4720-4735",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236660"}

@article{bb241745,
        AUTHOR = "Menon, S. and Chandratreya, I.P. and Vondrick, C.",
        TITLE = "Task Bias in Contrastive Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "132",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "2026-2040",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236661"}

@article{bb241746,
        AUTHOR = "Zhang, J.Y. and Huang, J.X. and Jin, S. and Lu, S.J.",
        TITLE = "Vision-Language Models for Vision Tasks: A Survey",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5625-5644",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236662"}

@article{bb241747,
        AUTHOR = "Dong, M.P. and Li, F. and Li, Z.B. and Liu, X.",
        TITLE = "Cluster prototype earth mover's distance adapters and
alignment-guided prompt learning for vision-language models",
        JOURNAL = PR,
        VOLUME = "156",
        YEAR = "2024",
        PAGES = "110861",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236663"}

@article{bb241748,
        AUTHOR = "Liu, Y. and Pan, Y. and Yin, J.",
        TITLE = "Enhancing Multi-Label Deep Hashing for Image and Audio With Joint
Internal Global Loss Constraints and Large Vision-Language Model",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2550-2554",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236664"}

@article{bb241749,
        AUTHOR = "Zhan, C.L. and Zhang, Y.F. and Lin, Y. and Wang, G.A. and Wang, H.W.",
        TITLE = "UniDCP: Unifying Multiple Medical Vision-Language Tasks via Dynamic
Cross-Modal Learnable Prompts",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "9736-9748",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236665"}

@article{bb241750,
        AUTHOR = "Su, K. and Zhang, X.X. and Zhang, S.Y. and Zhu, J. and Zhang, B.",
        TITLE = "To Boost Zero-Shot Generalization for Embodied Reasoning With
Vision-Language Pre-Training",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "5370-5381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236666"}

@article{bb241751,
        AUTHOR = "Xuan, S.Y. and Yang, M. and Zhang, S.L.",
        TITLE = "Adapting Vision-Language Models via Learning to Inject Knowledge",
        JOURNAL = IP,
        VOLUME = "33",
        YEAR = "2024",
        PAGES = "5798-5809",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236667"}

@article{bb241752,
        AUTHOR = "Zhou, W. and Zhou, Z.H.",
        TITLE = "Unsupervised Domain Adaption Harnessing Vision-Language Pre-Training",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "8201-8214",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236668"}

@article{bb241753,
        AUTHOR = "Guo, M.H. and Zhang, Y. and Mu, T.J. and Huang, S.X. and Hu, S.M.",
        TITLE = "Tuning Vision-Language Models With Multiple Prototypes Clustering",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "11186-11199",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236669"}

@article{bb241754,
        AUTHOR = "Sun, B. and Wu, Z.C. and Zhang, H. and He, J.",
        TITLE = "VTPL: Visual and text prompt learning for visual-language models",
        JOURNAL = JVCIR,
        VOLUME = "104",
        YEAR = "2024",
        PAGES = "104280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236670"}

@article{bb241755,
        AUTHOR = "Liu, L.C. and Wang, N.N. and Liu, D. and Yang, X. and Gao, X.B. and Liu, T.L.",
        TITLE = "Towards Specific Domain Prompt Learning via Improved Text Label
Optimization",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "10805-10815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236671"}

@article{bb241756,
        AUTHOR = "Liu, X. and Wu, J. and Yang, W.F. and Zhou, X. and Zhang, T.Z.",
        TITLE = "Multi-Modal Attribute Prompting for Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "34",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "11579-11591",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236672"}

@article{bb241757,
        AUTHOR = "Jiang, H.J. and Zhang, J.K. and Huang, R. and Ge, C.J. and Ni, Z. and Song, S. and Huang, G.",
        TITLE = "Cross-modal adapter for vision-language retrieval",
        JOURNAL = PR,
        VOLUME = "159",
        YEAR = "2025",
        PAGES = "111144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236673"}

@article{bb241758,
        AUTHOR = "Yellinek, N. and Karlinsky, L. and Giryes, R.",
        TITLE = "3VL: Using Trees to Improve Vision-Language Models' Interpretability",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "495-509",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236674"}

@article{bb241759,
        AUTHOR = "Yang, L.F. and Li, X. and Wang, Y.Z. and Wang, X.L. and Yang, J.",
        TITLE = "Fine-Grained Visual Text Prompting",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1594-1609",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236675"}

@article{bb241760,
        AUTHOR = "Wang, F. and Han, Z.Y. and Liu, X. and Yin, Y.L. and Gao, X.",
        TITLE = "CTPT: Continual Test-time Prompt Tuning for vision-language models",
        JOURNAL = PR,
        VOLUME = "161",
        YEAR = "2025",
        PAGES = "111300",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236676"}

@article{bb241761,
        AUTHOR = "Liang, N. and Liu, Y.",
        TITLE = "DPO: Discrete Prompt Optimization for Vision-Language Models",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "671-675",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236677"}

@article{bb241762,
        AUTHOR = "Ondeng, O. and Ouma, H. and Akuon, P.",
        TITLE = "Enriching visual feature representations for vision-language tasks
using spectral transforms",
        JOURNAL = IVC,
        VOLUME = "154",
        YEAR = "2025",
        PAGES = "105390",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236678"}

@article{bb241763,
        AUTHOR = "Xu, C. and Zhu, Y.H. and Shen, H.C. and Chen, B.H. and Liao, Y.X. and Chen, X.X. and Wang, L.M.",
        TITLE = "Progressive Visual Prompt Learning with Contrastive Feature
Re-formation",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "511-526",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236679"}

@article{bb241764,
        AUTHOR = "Long, S. and Zhao, Z. and Yuan, J.K. and Tan, Z.C. and Liu, J.J. and Feng, J.Y. and Wang, S.S. and Wang, J.D.",
        TITLE = "Mutual Prompt Leaning for Vision Language Models",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1258-1276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236680"}

@article{bb241765,
        AUTHOR = "Yin, J.H. and Zhang, X.Y. and Wu, L. and Wang, X.J.",
        TITLE = "Context-aware prompt learning for test-time vision recognition with
frozen vision-language model",
        JOURNAL = PR,
        VOLUME = "162",
        YEAR = "2025",
        PAGES = "111359",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236681"}

@article{bb241766,
        AUTHOR = "Chen, Y. and Zhang, S. and Sun, Y. and Yang, J. and Liang, W.J. and Wang, H.R.",
        TITLE = "Artificial-Spiking Hierarchical Networks for Vision-Language
Representation Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2768-2781",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236682"}

@article{bb241767,
        AUTHOR = "Li, B.Z. and Wang, S.R. and Wang, S.Q. and Ye, Y.",
        TITLE = "High Efficiency Image Compression for Large Visual-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2870-2880",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236683"}

@article{bb241768,
        AUTHOR = "Liu, L.C. and Wang, N.N. and Zhou, D.W. and Liu, D.C. and Yang, X. and Gao, X.B. and Liu, T.L.",
        TITLE = "Generalizable Prompt Learning via Gradient Constrained
Sharpness-Aware Minimization",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "1100-1113",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236684"}

@article{bb241769,
        AUTHOR = "Lu, Z. and Bai, J. and Li, X. and Xiao, Z. and Wang, X.C.",
        TITLE = "Task-to-Instance Prompt Learning for Vision-Language Models at Test
Time",
        JOURNAL = IP,
        VOLUME = "34",
        YEAR = "2025",
        PAGES = "1908-1920",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236685"}

@article{bb241770,
        AUTHOR = "Fang, Z.Q. and Yuan, Z.H. and Li, Z.Y. and Chen, J.Y. and Kuang, K. and Yao, Y.F. and Wu, F.",
        TITLE = "Cross-Modality Image Interpretation via Concept Decomposition Vector
of Visual-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "3024-3038",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236686"}

@article{bb241771,
        AUTHOR = "Ramzi, E. and Audebert, N. and Rambour, C. and Araujo, A. and Bitot, X. and Thome, N.",
        TITLE = "Optimization of Rank Losses for Image Retrieval",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4317-4329",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236687"}

@inproceedings{bb241772,
        AUTHOR = "Lafon, M. and Ramzi, E. and Rambour, C. and Audebert, N. and Thome, N.",
        TITLE = "Gallop: Learning Global and Local Prompts for Vision-language Models",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LXI: 264-282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236688"}

@article{bb241773,
        AUTHOR = "Liu, K.C. and Wang, C.Q. and Han, X.D. and Liu, Y.J. and Chen, B.Q.",
        TITLE = "Generalized Robot Vision-Language Model via Linguistic Foreground-Aware
Contrast",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3481-3518",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236689"}

@article{bb241774,
        AUTHOR = "Chen, B.Q. and Liu, Y.J. and Han, X.D. and Wang, C.Q. and Liu, K.C.",
        TITLE = "Generalized Robot Vision-Language Model via Linguistic Foreground-Aware
Contrast",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "4971-4971",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236689"}

@article{bb241775,
        AUTHOR = "Yang, L.X. and Zhang, R.Y. and Chen, Q. and Xie, X.H.",
        TITLE = "Learning with Enriched Inductive Biases for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3746-3761",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236690"}

@article{bb241776,
        AUTHOR = "Yao, H.T. and Zhang, R. and Lyu, H.H. and Zhang, Y.D. and Xu, C.S.",
        TITLE = "Bi-Modality Individual-Aware Prompt Tuning for Visual-Language Model",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "6352-6368",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236691"}

@inproceedings{bb241777,
        AUTHOR = "Yao, H.T. and Zhang, R. and Xu, C.S.",
        TITLE = "TCP: Textual-Based Class-Aware Prompt Tuning for Visual-Language
Model",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "23438-23448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236692"}

@article{bb241778,
        AUTHOR = "Hao, Z.W. and Guo, J.Y. and Shen, L. and Luo, Y. and Hu, H. and Wen, Y.G.",
        TITLE = "ADEM-VL: Adaptive and Embedded Fusion for Efficient Vision-Language
Tuning",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5527-5543",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236693"}

@article{bb241779,
        AUTHOR = "Zeng, R.F. and Yang, Z.P. and Yu, R.Y. and Zhang, Y.G.",
        TITLE = "Supplementary Prompt Learning for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5822-5839",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236694"}

@article{bb241780,
        AUTHOR = "Liu, K.C. and Liu, Y.J. and Chen, B.Q.",
        TITLE = "General 3D Vision-Language Model With Fast Rendering and Pre-Training
Vision-Language Alignment",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "7352-7368",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236695"}

@article{bb241781,
        AUTHOR = "Gao, Y.S. and Zhu, Z.X. and Wang, S.S.",
        TITLE = "Mixture of coarse and fine-grained prompt tuning for vision-language
model",
        JOURNAL = PR,
        VOLUME = "170",
        YEAR = "2026",
        PAGES = "112074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236696"}

@article{bb241782,
        AUTHOR = "Hao, F.S. and Liu, L. and Wu, F.X. and Zhang, Q.S. and Cheng, J.",
        TITLE = "Textual Embeddings are Good Class-Aware Visual Prompts for Adapting
Vision-Language Models",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "2992-2996",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236697"}

@article{bb241783,
        AUTHOR = "Liu, J. and Lu, Z.Q. and Luo, H. and Lu, Z.M. and Zheng, Y.M.",
        TITLE = "Progressive Multi-Prompt Learning for Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9562-9574",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236698"}

@article{bb241784,
        AUTHOR = "Wang, W.X. and He, X.J. and Zhang, Y. and Guo, L.T. and Shen, J.C. and Li, J.Y. and Liu, J.",
        TITLE = "CM-MaskSD: Cross-Modality Masked Self-Distillation for Referring
Image Segmentation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "6906-6916",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236699"}

@article{bb241785,
        AUTHOR = "Zhang, E. and Zhu, B. and Chen, Y.Y. and Miao, Q.H. and Tang, M. and Wang, J.Q.",
        TITLE = "Optimization of Prompt Learning via Multi-Knowledge Representation
for Vision-Language Models",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "7557-7569",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236700"}

@article{bb241786,
        AUTHOR = "Park, K.Y. and An, S. and Lee, Y.J. and Kim, D.H.",
        TITLE = "Learning Compositionality from Multifaceted Synthetic Data for
Language-based Object Detection",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7873-7896",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236701"}

@inproceedings{bb241787,
        AUTHOR = "Park, K.Y. and Saito, K. and Kim, D.H.",
        TITLE = "Weak-to-strong Compositional Learning from Generative Models for
Language-based Object Detection",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XXIII: 1-19",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236702"}

@article{bb241788,
        AUTHOR = "Sarto, S. and Moratelli, N. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "Positive-Augmented Contrastive Learning for Vision-and-Language
Evaluation and Training",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7647-7671",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236703"}

@inproceedings{bb241789,
        AUTHOR = "Stefanini, M. and Cornia, M. and Baraldi, L. and Cucchiara, R.",
        TITLE = "A Novel Attention-based Aggregation Function to Combine Vision and
Language",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "1212-1219",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236704"}

@article{bb241790,
        AUTHOR = "Liu, L.C. and Wang, N.N. and Chen, C. and Liu, D. and Yang, X. and Gao, X.B. and Liu, T.L.",
        TITLE = "Frequency-Based Comprehensive Prompt Learning for Vision-Language
Models",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "11974-11989",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236705"}

@article{bb241791,
        AUTHOR = "Yang, X. and Zhong, X.Y. and Wang, N.N.",
        TITLE = "Distribution-Aware Prompt Learning for Vision-Language Models With
Dynamic Boundary Prototype",
        JOURNAL = IP,
        VOLUME = "35",
        YEAR = "2026",
        PAGES = "3537-3549",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236706"}

@article{bb241792,
        AUTHOR = "Li, J.C. and Gao, M. and Tang, S.L. and Wei, L.H. and Xiao, J. and Wu, F. and Hong, R.C. and Wang, M. and Tian, Q.",
        TITLE = "Structure-Induced Gradient Regulation for Generalizable
Vision-Language Models",
        JOURNAL = PAMI,
        VOLUME = "48",
        YEAR = "2026",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "219-235",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236707"}

@inproceedings{bb241793,
        AUTHOR = "Li, J.C. and Gao, M. and Wei, L.H. and Tang, S.L. and Zhang, W.Q. and Li, M.Z. and Ji, W. and Tian, Q. and Chua, T.S. and Zhuang, Y.T.",
        TITLE = "Gradient-Regulated Meta-Prompt Learning for Generalizable
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2551-2562",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236708"}

@article{bb241794,
        AUTHOR = "Xiao, Y.S. and Liu, X.L. and Cheng, Q.J. and Yin, Z.F. and Liang, S.Y. and Li, J.P. and Shao, J. and Liu, A.S. and Tao, D.C.",
        TITLE = "GenderBias-VL: Benchmarking Gender Bias in Vision Language Models via
Counterfactual Probing",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8332-8355",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236709"}

@article{bb241795,
        AUTHOR = "Chen, T.Y. and Ai, J.L.",
        TITLE = "Hierarchical Prompt Engineering for Remote Sensing Scene
Understanding with Large Vision-Language Models",
        JOURNAL = RS,
        VOLUME = "17",
        YEAR = "2025",
        NUMBER = "22",
        PAGES = "3727",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236710"}

@article{bb241796,
        AUTHOR = "Xu, X. and Qin, L. and Che, W. and Kan, M.Y.",
        TITLE = "Manager: Aggregating Insights From Unimodal Experts in Two-Tower VLMs
and MLLMs",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "12278-12291",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236711"}

@article{bb241797,
        AUTHOR = "Kim, G. and Kim, S. and Lee, S.",
        TITLE = "Decoupling augmentation bias in prompt learning for vision-language
models",
        JOURNAL = PR,
        VOLUME = "172",
        YEAR = "2026",
        PAGES = "112630",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236712"}

@inproceedings{bb241798,
        AUTHOR = "Kim, G. and Kim, S. and Lee, S.",
        TITLE = "AAPL: Adding Attributes to Prompt Learning for Vision-Language Models",
        BOOKTITLE = Prompting24,
        YEAR = "2024",
        PAGES = "1572-1582",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236713"}

@article{bb241799,
        AUTHOR = "Guo, Y.C. and Gu, X.D.",
        TITLE = "MMRL++: Parameter-Efficient and Interaction-Aware Representation
Learning for Vision-Language Models",
        JOURNAL = IJCV,
        VOLUME = "134",
        YEAR = "2026",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "11",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236714"}

Last update:Apr 23, 2026 at 15:05:02