Keith Price Bibliography Bibtex Entry (ANCHOR 241400 URL http://dx.doi.org/10.1109/ICCV51070.2023.01427 TYPE CONFERENCE PAGES 15522-15532 YEAR 2023 MONTH NIL BIBSOURCE http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236316 VOLUME NIL JOURNAL ICCV23 AUTHOR Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C. TITLE Exploring Temporal Concurrency for Video-Language Representation Learning)


@inproceedings{bb241400,
        AUTHOR = "Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C.",
        TITLE = "Exploring Temporal Concurrency for Video-Language Representation
Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15522-15532",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236316"}

@inproceedings{bb241401,
        AUTHOR = "Shukor, M. and Dancette, C. and Cord, M.",
        TITLE = "eP-ALM: Efficient Perceptual Augmentation of Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21999-22012",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236317"}

@inproceedings{bb241402,
        AUTHOR = "Schulter, S. and Kumar, B.G.V. and Suh, Y.M. and Dafnis, K.M. and Zhang, Z.X. and Zhao, S.Y. and Metaxas, D.N.",
        TITLE = "OmniLabel: A Challenging Benchmark for Language-Based Object
Detection",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "11919-11928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236318"}

@inproceedings{bb241403,
        AUTHOR = "Chen, Z.L. and Huang, X. and Guan, Q.L. and Lin, L. and Luo, W.Q.",
        TITLE = "A Retrospect to Multi-prompt Learning across Vision and Language",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22133-22144",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236319"}

@inproceedings{bb241404,
        AUTHOR = "Derakhshani, M.M. and Sanchez, E. and Bulat, A. and da Costa, V.G.T. and Snoek, C.G.M. and Tzimiropoulos, G. and Martinez, B.",
        TITLE = "Bayesian Prompt Learning for Image-Language Model Generalization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15191-15200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236320"}

@inproceedings{bb241405,
        AUTHOR = "Lin, W. and Mirza, M.J. and Doveh, S. and Feris, R. and Giryes, R. and Hochreiter, S. and Karlinsky, L.",
        TITLE = "Comparison Visual Instruction Tuning",
        BOOKTITLE = Reasoning25,
        YEAR = "2025",
        PAGES = "2964-2974",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236321"}

@inproceedings{bb241406,
        AUTHOR = "Cascante Bonilla, P. and Shehada, K. and Smith, J.S. and Doveh, S. and Kim, D.H. and Panda, R. and Varol, G. and Oliva, A. and Ordonez, V. and Feris, R.S. and Karlinsky, L.",
        TITLE = "Going Beyond Nouns With Vision & Language Models Using Synthetic
Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20098-20108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236322"}

@inproceedings{bb241407,
        AUTHOR = "Upadhyay, U. and Karthik, S. and Mancini, M. and Akata, Z.",
        TITLE = "ProbVLM: Probabilistic Adapter for Frozen Vison-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1899-1910",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236323"}

@inproceedings{bb241408,
        AUTHOR = "Bitton Guetta, N. and Bitton, Y. and Hessel, J. and Schmidt, L. and Elovici, Y. and Stanovsky, G. and Schwartz, R.",
        TITLE = "Breaking Common Sense: WHOOPS! A Vision-and-Language Benchmark of
Synthetic and Compositional Images",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2616-2627",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236324"}

@inproceedings{bb241409,
        AUTHOR = "Hu, Z.Y. and Li, Y.Y. and Lyu, M.R. and Wang, L.W.",
        TITLE = "VL-PET: Vision-and-Language Parameter-Efficient Tuning via
Granularity Control",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2998-3008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236325"}

@inproceedings{bb241410,
        AUTHOR = "Slyman, E. and Kahng, M. and Lee, S.",
        TITLE = "VLSlice: Interactive Vision-and-Language Slice Discovery",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15245-15255",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236326"}

@inproceedings{bb241411,
        AUTHOR = "Najibi, M. and Ji, J.W. and Zhou, Y. and Qi, C.R. and Yan, X.C. and Ettinger, S. and Anguelov, D.",
        TITLE = "Unsupervised 3D Perception with 2D Vision-Language Distillation for
Autonomous Driving",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "8568-8578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236327"}

@inproceedings{bb241412,
        AUTHOR = "Xu, H. and Xie, S. and Huang, P.Y. and Yu, L.C. and Howes, R. and Ghosh, G. and Zettlemoyer, L. and Feichtenhofer, C.",
        TITLE = "CiT: Curation in Training for Effective Vision-Language Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15134-15143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236328"}

@inproceedings{bb241413,
        AUTHOR = "Trager, M. and Perera, P. and Zancato, L. and Achille, A. and Bhatia, P. and Soatto, S.",
        TITLE = "Linear Spaces of Meanings: Compositional Structures in
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15349-15358",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236329"}

@inproceedings{bb241414,
        AUTHOR = "Chen, Y.S. and Song, Y.Z. and Yeo, C.Y. and Liu, B. and Fu, J.L. and Shuai, H.H.",
        TITLE = "SINC: Self-Supervised In-Context Learning for Vision-Language Tasks",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15384-15396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236330"}

@inproceedings{bb241415,
        AUTHOR = "Wu, C.E. and Tian, Y. and Yu, H.C. and Wang, H. and Morgado, P. and Hu, Y.H. and Yang, L.J.",
        TITLE = "Why Is Prompt Tuning for Vision-Language Models Robust to Noisy
Labels?",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15442-15451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236331"}

@inproceedings{bb241416,
        AUTHOR = "Ouali, Y. and Bulat, A. and Matinez, B. and Tzimiropoulos, G.",
        TITLE = "Black Box Few-Shot Adaptation for Vision-Language models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15488-15500",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236332"}

@inproceedings{bb241417,
        AUTHOR = "Kan, B. and Wang, T. and Lu, W.P. and Zhen, X.T. and Guan, W. and Zheng, F.",
        TITLE = "Knowledge-Aware Prompt Tuning for Generalizable Vision-Language
Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15624-15634",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236333"}

@inproceedings{bb241418,
        AUTHOR = "Zhai, J.T. and Zhang, Q. and Wu, T. and Chen, X.Y. and Liu, J.J. and Cheng, M.M.",
        TITLE = "SLAN: Self-Locator Aided Network for Vision-Language Understanding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21892-21901",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236334"}

@inproceedings{bb241419,
        AUTHOR = "Long, S. and Zhao, Z. and Yuan, J. and Tan, Z.C. and Liu, J.J. and Zhou, L.P. and Wang, S.S. and Wang, J.D.",
        TITLE = "Task-Oriented Multi-Modal Mutual Learning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21902-21912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236335"}

@inproceedings{bb241420,
        AUTHOR = "Cho, E. and Kim, J. and Kim, H.W.J.",
        TITLE = "Distribution-Aware Prompt Tuning for Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "21947-21956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236336"}

@inproceedings{bb241421,
        AUTHOR = "Varma, M. and Delbrouck, J.B. and Hooper, S. and Chaudhari, A. and Langlotz, C.",
        TITLE = "ViLLA: Fine-Grained Vision-Language Representation Learning from
Real-World Data",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22168-22178",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236337"}

@inproceedings{bb241422,
        AUTHOR = "Zhu, H.G. and Wei, Y.C. and Liang, X.D. and Zhang, C.J. and Zhao, Y.",
        TITLE = "CTP: Towards Vision-Language Continual Pretraining via Compatible
Momentum Contrast and Topology Preservation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "22200-22210",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236338"}

@inproceedings{bb241423,
        AUTHOR = "Hall, M. and Gustafson, L. and Adcock, A. and Misra, I. and Ross, C.",
        TITLE = "Vision-Language Models Performing Zero-Shot Tasks Exhibit Disparities
Between Gender Groups",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2770-2777",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236339"}

@inproceedings{bb241424,
        AUTHOR = "Agnolucci, L. and Baldrati, A. and Todino, F. and Becattini, F. and Bertini, M. and del Bimbo, A.",
        TITLE = "ECO: Ensembling Context Optimization for Vision-Language Models",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2803-2807",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236340"}

@inproceedings{bb241425,
        AUTHOR = "Palit, V. and Pandey, R. and Arora, A. and Liang, P.P.",
        TITLE = "Towards Vision-Language Mechanistic Interpretability: A Causal
Tracing Tool for BLIP",
        BOOKTITLE = CLVL23,
        YEAR = "2023",
        PAGES = "2848-2853",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236341"}

@inproceedings{bb241426,
        AUTHOR = "Sammani, F. and Deligiannis, N.",
        TITLE = "Uni-NLX: Unifying Textual Explanations for Vision and Vision-Language
Tasks",
        BOOKTITLE = VLAR23,
        YEAR = "2023",
        PAGES = "4636-4641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236342"}

@inproceedings{bb241427,
        AUTHOR = "Lee, D.J. and Song, S. and Suh, J. and Choi, J. and Lee, S. and Kim, H.W.J.",
        TITLE = "Read-only Prompt Optimization for Vision-Language Few-shot Learning",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1401-1411",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236343"}

@inproceedings{bb241428,
        AUTHOR = "Li, X. and Fang, Y.H. and Liu, M.H. and Ling, Z. and Tu, Z.W. and Su, H.",
        TITLE = "Distilling Large Vision-Language Model with Out-of-Distribution
Generalizability",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2492-2503",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236344"}

@inproceedings{bb241429,
        AUTHOR = "Bi, J.Y. and Cheng, D. and Yao, P. and Pang, B. and Zhan, Y.F. and Yang, C.G. and Wang, Y.J. and Sun, H. and Deng, W.W. and Zhang, Q.",
        TITLE = "VL-Match: Enhancing Vision-Language Pretraining with Token-Level and
Instance-Level Matching",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2584-2593",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236345"}

@inproceedings{bb241430,
        AUTHOR = "Udandarao, V. and Gupta, A. and Albanie, S.",
        TITLE = "SuS-X: Training-Free Name-Only Transfer of Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2725-2736",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236346"}

@inproceedings{bb241431,
        AUTHOR = "Jiang, C.Y. and Xu, H.Y. and Ye, W. and Ye, Q.H. and Li, C.L. and Yan, M. and Bi, B. and Zhang, S.K. and Huang, F. and Huang, S.F.",
        TITLE = "BUS: Efficient and Effective Vision-language Pre-training with
Bottom-Up Patch Summarization",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2888-2898",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236347"}

@inproceedings{bb241432,
        AUTHOR = "Shi, C. and Yang, S.",
        TITLE = "LoGoPrompt: Synthetic Text Images Can Be Good Visual Prompts for
Vision-Language Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "2920-2929",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236348"}

@inproceedings{bb241433,
        AUTHOR = "Wang, A.J.P. and Lin, K.Q.H. and Zhang, D.J.H. and Lei, S.W.X. and Shou, M.Z.",
        TITLE = "Too Large; Data Reduction for Vision-Language Pre-Training",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3124-3134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236349"}

@inproceedings{bb241434,
        AUTHOR = "Wang, W.H. and Yang, Z. and Xu, B. and Li, J.Z. and Sun, Y.K.",
        TITLE = "ViLTA: Enhancing Vision-Language Pre-training through Textual
Augmentation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "3135-3146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236350"}

@inproceedings{bb241435,
        AUTHOR = "Boecking, B. and Usuyama, N. and Bannur, S. and Castro, D.C. and Schwaighofer, A. and Hyland, S. and Wetscherek, M. and Naumann, T. and Nori, A. and Alvarez Valle, J. and Poon, H. and Oktay, O.",
        TITLE = "Making the Most of Text Semantics to Improve Biomedical Vision-Language
Processing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:1-21",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236351"}

@inproceedings{bb241436,
        AUTHOR = "Cui, Q. and Zhou, B. and Guo, Y. and Yin, W.D. and Wu, H. and Yoshie, O. and Chen, Y.",
        TITLE = "Contrastive Vision-Language Pre-training with Limited Resources",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:236-253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236352"}

@inproceedings{bb241437,
        AUTHOR = "Hu, X.W. and Gan, Z. and Wang, J.F. and Yang, Z.Y. and Liu, Z.C. and Lu, Y. and Wang, L.J.",
        TITLE = "Scaling Up Vision-Language Pretraining for Image Captioning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "17959-17968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236353"}

@inproceedings{bb241438,
        AUTHOR = "Zhang, P.C. and Li, X.J. and Hu, X.W. and Yang, J.W. and Zhang, L. and Wang, L.J. and Choi, Y.J. and Gao, J.F.",
        TITLE = "VinVL: Revisiting Visual Representations in Vision-Language Models",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5575-5584",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236354"}

@inproceedings{bb241439,
        AUTHOR = "Li, Z.W. and Stengel Eskin, E. and Zhang, Y.X. and Xie, C. and Tran, Q. and van Durme, B. and Yuille, A.L.",
        TITLE = "Calibrating Concepts and Operations:
Towards Symbolic Reasoning on Real Images",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "14890-14899",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236355"}

@inproceedings{bb241440,
        AUTHOR = "Yang, X. and Zhang, H.W. and Qi, G.J. and Cai, J.F.",
        TITLE = "Causal Attention for Vision-Language Tasks",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "9842-9852",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236356"}

@inproceedings{bb241441,
        AUTHOR = "Zheng, W.B. and Yan, L. and Gou, C. and Wang, F.Y.",
        TITLE = "Webly Supervised Knowledge Embedding Model for Visual Reasoning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "12442-12451",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236357"}

@inproceedings{bb241442,
        AUTHOR = "Nguyen, D.K. and Okatani, T.",
        TITLE = "Multi-Task Learning of Hierarchical Vision-Language Representation",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "10484-10493",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236358"}

@inproceedings{bb241443,
        AUTHOR = "Gupta, T. and Shih, K.J. and Singh, S. and Hoiem, D.",
        TITLE = "Aligned Image-Word Representations Improve Inductive Transfer Across
Vision-Language Tasks",
        BOOKTITLE = ICCV17,
        YEAR = "2017",
        PAGES = "4223-4232",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT236359"}

@article{bb241444,
        AUTHOR = "Liang, J.W. and Liang, S.Y. and Liu, A.S. and Cao, X.C.",
        TITLE = "VL-Trojan: Multimodal Instruction Backdoor Attacks against
Autoregressive Visual Language Models",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "3994-4013",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236360"}

@article{bb241445,
        AUTHOR = "Fu, T.C. and Zhang, J.H. and Li, F.X. and Wei, P. and Zeng, X.L. and Zhou, W.",
        TITLE = "Multimodal alignment augmentation transferable attack on
vision-language pre-training models",
        JOURNAL = PRL,
        VOLUME = "191",
        YEAR = "2025",
        PAGES = "131-137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236361"}

@article{bb241446,
        AUTHOR = "Jia, X.J. and Gao, S.S. and Guo, Q. and Qin, S. and Ma, K. and Huang, Y.H. and Liu, Y. and Tsang, I.W. and Cao, X.C.",
        TITLE = "Semantic-Aligned Adversarial Evolution Triangle for
High-Transferability Vision-Language Attack",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "8489-8505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236362"}

@article{bb241447,
        AUTHOR = "Qian, Y.G. and Kong, Y.X. and Bao, Q.Q. and Gu, Z.Q. and Wang, B. and Ji, S. and Zhang, J.P. and Lei, Z.",
        TITLE = "Individual and Common Attack: Enhancing Transferability in VLP Models
Through Modal Feature Exploitation",
        JOURNAL = IP,
        VOLUME = "35",
        YEAR = "2026",
        PAGES = "1082-1095",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236363"}

@article{bb241448,
        AUTHOR = "Kuurila Zhang, H. and Chen, H.Y. and Zhao, G.Y.",
        TITLE = "Evaluating the Adversarial Robustness of Vision-Language Models for
Facial Expression Recognition",
        JOURNAL = IEEE_Int_Sys,
        VOLUME = "41",
        YEAR = "2026",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "105-112",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236364"}

@article{bb241449,
        AUTHOR = "Liu, C.H. and Wang, Y. and Cao, H.Y. and Liu, B. and Jiang, D.Q.",
        TITLE = "Evaluating the Adversarial Robustness of Vision-Language Models via
Internal Feature Perturbations",
        JOURNAL = CirSysVideo,
        VOLUME = "36",
        YEAR = "2026",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3938-3950",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236365"}

@article{bb241450,
        AUTHOR = "Lu, Z. and Xu, N. and Tian, H. and Wang, L.J. and Liu, A.A.",
        TITLE = "Medical VLP Model Is Vulnerable: Toward Multimodal Adversarial Attack
on Large Medical Vision-Language Models",
        JOURNAL = CirSysVideo,
        VOLUME = "36",
        YEAR = "2026",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "2478-2491",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236366"}

@article{bb241451,
        AUTHOR = "Wang, B. and Qian, S.S. and Xu, C.S.",
        TITLE = "Invisible Backdoor Attack With Siamese Tuning on Pre-Trained
Vision-Language Models",
        JOURNAL = MultMed,
        VOLUME = "28",
        YEAR = "2026",
        PAGES = "1663-1676",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236367"}

@article{bb241452,
        AUTHOR = "Liu, D.Z. and Liu, W.Q. and Cai, X.W. and Zhou, P. and Guan, R.W. and Qu, X.Y. and Du, B.",
        TITLE = "Generating transferable attacks across large vision-language models
using adversarial deformation learning",
        JOURNAL = PR,
        VOLUME = "176",
        YEAR = "2026",
        PAGES = "113194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236368"}

@inproceedings{bb241453,
        AUTHOR = "Cao, Y. and Xing, Y. and Zhang, J. and Lin, D. and Zhang, T.W. and Tsang, I. and Liu, Y. and Guo, Q.",
        TITLE = "SceneTAP: Scene-Coherent Typographic Adversarial Planner against
Vision-Language Models in Real-World Environments",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "25050-25059",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236369"}

@inproceedings{bb241454,
        AUTHOR = "Xie, P. and Bie, Y. and Mao, J. and Song, Y.Q. and Wang, Y. and Chen, H. and Chen, K.",
        TITLE = "Chain of Attack: On the Robustness of Vision-Language Models Against
Transfer-Based Adversarial Attacks",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "14679-14689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236370"}

@inproceedings{bb241455,
        AUTHOR = "Zhang, J.M. and Ye, J. and Ma, X. and Li, Y. and Yang, Y.F. and Chen, Y.H. and Sang, J. and Yeung, D.Y.",
        TITLE = "Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on
Vision-language Models",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "19900-19909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236371"}

@inproceedings{bb241456,
        AUTHOR = "Liang, S.Y. and Liang, J.W. and Pang, T.Y. and Du, C. and Liu, A. and Zhu, M.L. and Cao, X.C. and Tao, D.C.",
        TITLE = "Revisiting Backdoor Attacks against Large Vision-Language Models from
Domain Shift",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "9477-9486",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236372"}

@inproceedings{bb241457,
        AUTHOR = "Fime, A.A. and Hossain, M.Z. and Zaman, S. and Shahid, A.R. and Imteaj, A.",
        TITLE = "Towards Trustworthy Autonomous Vehicles with Vision-Language Models
under Targeted and Untargeted Adversarial Attacks",
        BOOKTITLE = "FaDE-TCV25",
        YEAR = "2025",
        PAGES = "619-628",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236373"}

@inproceedings{bb241458,
        AUTHOR = "Chen, L. and Chen, Y.L. and Luo, Y. and Dou, H. and Zhong, X.Y.",
        TITLE = "Attention-Guided Hierarchical Defense for Multimodal Attacks in
Vision-Language Models",
        BOOKTITLE = TrustworthyOpen25,
        YEAR = "2025",
        PAGES = "1598-1608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236374"}

@inproceedings{bb241459,
        AUTHOR = "Xing, S. and Zhao, Z.Y. and Sebe, N.",
        TITLE = "CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards
Zero-shot Adversarial Robustness of CLIP",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "15172-15182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236375"}

@inproceedings{bb241460,
        AUTHOR = "Ishmam, A.M. and Thomas, C.",
        TITLE = "Semantic Shield: Defending Vision-Language Models Against Backdooring
and Poisoning via Fine-Grained Knowledge Alignment",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "24820-24830",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236376"}

@inproceedings{bb241461,
        AUTHOR = "Wang, Y. and Liu, X.G. and Li, Y. and Chen, M. and Xiao, C.W.",
        TITLE = "Adashield: Safeguarding Multimodal Large Language Models from
Structure-based Attack via Adaptive Shield Prompting",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "XX: 77-94",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236377"}

@inproceedings{bb241462,
        AUTHOR = "Gao, S. and Jia, X.J. and Ren, X.H. and Tsang, I. and Guo, Q.",
        TITLE = "Boosting Transferability in Vision-language Attacks via Diversification
Along the Intersection Region of Adversarial Trajectory",
        BOOKTITLE = ECCV24,
        YEAR = "2024",
        PAGES = "LVII: 442-460",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236378"}

@inproceedings{bb241463,
        AUTHOR = "Bai, J. and Gao, K. and Min, S.B. and Xia, S.T. and Li, Z.F. and Liu, W.",
        TITLE = "BadCLIP: Trigger-Aware Prompt Learning for Backdoor Attacks on CLIP",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "24239-24250",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236379"}

@inproceedings{bb241464,
        AUTHOR = "Liang, S.Y. and Zhu, M.L. and Liu, A. and Wu, B.Y. and Cao, X.C. and Chang, E.C.",
        TITLE = "BadCLIP: Dual-Embedding Guided Backdoor Attack on Multimodal
Contrastive Learning",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "24645-24654",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236380"}

@inproceedings{bb241465,
        AUTHOR = "Lu, D. and Wang, Z.Q. and Wang, T. and Guan, W. and Gao, H.C. and Zheng, F.",
        TITLE = "Set-level Guidance Attack: Boosting Adversarial Transferability of
Vision-Language Pre-training Models",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "102-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803atkvlm4.html#TT236381"}

@article{bb241466,
        AUTHOR = "Zhao, Z. and Wang, S. and Gu, J. and Zhu, Y. and Mei, L. and Zhuang, Z.X. and Cui, Z.M. and Wang, Q. and Shen, D.G.",
        TITLE = "ChatCAD+: Toward a Universal and Reliable Interactive CAD Using LLMs",
        JOURNAL = MedImg,
        VOLUME = "43",
        YEAR = "2024",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "3755-3766",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236382"}

@article{bb241467,
        AUTHOR = "Luo, H.N. and Zeng, Y.J. and Yang, L. and Chen, K. and Shen, Z.X. and Lv, F.M.",
        TITLE = "VLAI: Exploration and Exploitation based on Visual-Language Aligned
Information for Robotic Object Goal Navigation",
        JOURNAL = IVC,
        VOLUME = "151",
        YEAR = "2024",
        PAGES = "105259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236383"}

@article{bb241468,
        AUTHOR = "Mansourian, A. and Oucheikh, R.",
        TITLE = "ChatGeoAI: Enabling Geospatial Analysis for Public through Natural
Language, with Large Language Models",
        JOURNAL = IJGI,
        VOLUME = "13",
        YEAR = "2024",
        NUMBER = "10",
        PAGES = "348",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236384"}

@article{bb241469,
        AUTHOR = "Li, D. and Zhao, Y. and Wang, Z.F. and Jung, C. and Zhang, Z.",
        TITLE = "Large Language Model-Driven Structured Output: A Comprehensive
Benchmark and Spatial Data Generation Framework",
        JOURNAL = IJGI,
        VOLUME = "13",
        YEAR = "2024",
        NUMBER = "11",
        PAGES = "405",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236385"}

@article{bb241470,
        AUTHOR = "Li, Y.X. and Hu, B.T. and Chen, X.Y. and Ma, L. and Xu, Y. and Zhang, M.",
        TITLE = "LMEye: An Interactive Perception Network for Large Language Models",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "10952-10964",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236386"}

@article{bb241471,
        AUTHOR = "Shao, R. and Zhang, Z.Y. and Tao, C. and Zhang, Y.S. and Peng, C.L. and Li, H.F.",
        TITLE = "Homogeneous tokenizer matters: Homogeneous visual tokenizer for
remote sensing image understanding",
        JOURNAL = PandRS,
        VOLUME = "218",
        YEAR = "2024",
        PAGES = "294-310",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236387"}

@article{bb241472,
        AUTHOR = "Wang, Z.H. and Luo, T. and Liu, C. and Liu, W.C. and Goh, R.S.M. and Wong, W.F.",
        TITLE = "Enabling Energy-Efficient Deployment of Large Language Models on
Memristor Crossbar: A Synergy of Large and Small",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "916-933",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236388"}

@article{bb241473,
        AUTHOR = "Zhan, Y. and Xiong, Z. and Yuan, Y.",
        TITLE = "SkyEyeGPT: Unifying remote sensing vision-language tasks via
instruction tuning with large language model",
        JOURNAL = PandRS,
        VOLUME = "221",
        YEAR = "2025",
        PAGES = "64-77",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236389"}

@article{bb241474,
        AUTHOR = "Zhu, Y. and Wen, Z.Y. and Li, X. and Shi, X.F. and Wu, X. and Dong, H. and Chen, J.M.",
        TITLE = "ChatNav: Leveraging LLM to Zero-Shot Semantic Reasoning in Object
Navigation",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "2369-2381",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236390"}

@article{bb241475,
        AUTHOR = "Marasco, E. and Bourlai, T.",
        TITLE = "Enhancing trust in Large Language Models for streamlined
decision-making in military operations",
        JOURNAL = IVC,
        VOLUME = "158",
        YEAR = "2025",
        PAGES = "105489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236391"}

@article{bb241476,
        AUTHOR = "Qiao, D. and Ao, X. and Liu, Y. and Chen, X.T. and Song, F.Y. and Qin, Z. and Jin, W.Q.",
        TITLE = "Tri-AFLLM: Resource-Efficient Adaptive Asynchronous Accelerated
Federated LLMs",
        JOURNAL = CirSysVideo,
        VOLUME = "35",
        YEAR = "2025",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "4198-4211",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236392"}

@article{bb241477,
        AUTHOR = "Zhang, Y.X. and Liu, C.B. and Liu, Y.Z. and Gao, Y.F. and Lu, Z.Y. and Xie, H.T. and Zhang, Y.D.",
        TITLE = "Leveraging Concise Concepts With Probabilistic Modeling for
Interpretable Visual Recognition",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "3117-3131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236393"}

@article{bb241478,
        AUTHOR = "Chen, L.F. and Hu, P. and Pan, Z.L. and Liu, Q. and Zhang, S.H. and Liu, Z.",
        TITLE = "Large Language Models Can Achieve Explainable and Training-Free
One-Shot HRRP ATR",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3395-3399",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236394"}

@article{bb241479,
        AUTHOR = "Yang, S.Y. and Yu, W.J. and Yang, W.J. and Liu, X.W. and Tan, H.B. and Lan, L. and Xiao, N.",
        TITLE = "WildVideo: Benchmarking LMMs for Understanding Video-Language
Interaction",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "9330-9344",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236395"}

@article{bb241480,
        AUTHOR = "Chen, G. and Jiao, H.Y. and Hou, S.Y. and Liu, Z.Q. and Xie, L. and Wu, S. and Wu, H.Y. and Guan, X.F. and Gui, Z.P.",
        TITLE = "GeoJSEval: An Automated Evaluation Framework for Large Language
Models on JavaScript-Based Geospatial Computation and Visualization
Code Generation",
        JOURNAL = IJGI,
        VOLUME = "14",
        YEAR = "2025",
        NUMBER = "10",
        PAGES = "382",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236396"}

@article{bb241481,
        AUTHOR = "Zhang, H.J. and Dong, L. and Liu, Y. and Huang, Y.F. and Wang, Y.L. and Wang, L.M. and Qiao, Y.",
        TITLE = "LvBench: A Benchmark for Long-form Video Understanding with Versatile
Multi-modal Question Answering",
        JOURNAL = IJCV,
        VOLUME = "133",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "7726-7747",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236397"}

@inproceedings{bb241482,
        AUTHOR = "Li, K.C. and Wang, Y. and He, Y. and Li, Y.Z. and Wang, Y. and Liu, Y. and Wang, Z. and Xu, J. and Chen, G. and Lou, P. and Wang, L.M. and Qiao, Y.",
        TITLE = "MVBench: A Comprehensive Multi-modal Video Understanding Benchmark",
        BOOKTITLE = CVPR24,
        YEAR = "2024",
        PAGES = "22195-22206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236398"}

@article{bb241483,
        AUTHOR = "Wang, X.R. and Zhang, H. and Li, B. and Liang, K.M. and Sun, H. and He, Z.J. and Ma, Z.Y. and Guo, J.",
        TITLE = "Detailed Object Description With Controllable Dimensions",
        JOURNAL = MultMed,
        VOLUME = "27",
        YEAR = "2025",
        PAGES = "8474-8485",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236399"}

@article{bb241484,
        AUTHOR = "Peng, Z.R. and Liu, X.M. and Yang, G. and Liu, J. and Peng, X.P. and Long, Y.",
        TITLE = "The uncertainty advantage: Enhancing large language models'
reliability through chain of uncertainty reasoning",
        JOURNAL = PRL,
        VOLUME = "200",
        YEAR = "2026",
        PAGES = "30-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236400"}

@article{bb241485,
        AUTHOR = "Li, Y.X. and Liu, Z.Y. and Hu, B. and Wang, W. and Ding, Y.X. and Cao, X.C. and Zhang, M.",
        TITLE = "Vision Enhancing LLMs: Empowering Multimodal Knowledge Storage and
Sharing in LLMs",
        JOURNAL = IP,
        VOLUME = "35",
        YEAR = "2026",
        PAGES = "858-871",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236401"}

@article{bb241486,
        AUTHOR = "Shi, Y.Q. and Yang, R. and Yin, C.S. and Lu, Y.W. and Huang, B. and Tao, Y. and Zhong, Y.H.",
        TITLE = "Two-Stage Fine-Tuning of Large Vision-Language Models with
Hierarchical Prompting for Few-Shot Object Detection in Remote
Sensing Images",
        JOURNAL = RS,
        VOLUME = "18",
        YEAR = "2026",
        NUMBER = "2",
        PAGES = "266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236402"}

@article{bb241487,
        AUTHOR = "Li, Q.Y. and Ma, S. and Luo, J.W. and Yu, Y. and Zhou, Y. and Wang, F.X. and Lu, X.D. and Wang, X.X. and He, X. and Chen, Y.S. and Yang, X.",
        TITLE = "Co-Training Vision-Language Models for Remote Sensing Multi-Task
Learning",
        JOURNAL = RS,
        VOLUME = "18",
        YEAR = "2026",
        NUMBER = "2",
        PAGES = "222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236403"}

@article{bb241488,
        AUTHOR = "Zhang, D. and Li, Z.Z. and Zhang, M.L. and Zhang, J.X. and Liu, Z.Y. and Yao, Y.X. and Xu, H.T. and Zheng, J.H. and Chen, X. and Zhang, Y.Y. and Yin, F. and Dong, J.H. and Guo, Z.J. and Song, L. and Liu, C.L.",
        TITLE = "From System 1 to System 2: A Survey of Reasoning Large Language
Models",
        JOURNAL = PAMI,
        VOLUME = "48",
        YEAR = "2026",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "3335-3354",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236404"}

@article{bb241489,
        AUTHOR = "Musacchio, E. and Siciliani, L. and Basile, P. and Semeraro, G.",
        TITLE = "Extending Large Language Models to multimodality for non-English
languages",
        JOURNAL = CVIU,
        VOLUME = "264",
        YEAR = "2026",
        PAGES = "104618",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236405"}

@article{bb241490,
        AUTHOR = "Wang, X. and Pan, Z. and Chen, H. and Zhu, W.W.",
        TITLE = "DiViCo: Disentangled Visual Token Compression for Efficient Large
Vision-Language Model",
        JOURNAL = CirSysVideo,
        VOLUME = "36",
        YEAR = "2026",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "1392-1405",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236406"}

@article{bb241491,
        AUTHOR = "Liu, Z.Y. and Sun, Z.Y. and Zang, Y.H. and Li, W. and Zhang, P. and Dong, X.Y. and Xiong, Y.J. and Lin, D. and Wang, J.Q.",
        TITLE = "RAR: Retrieving and Ranking Augmented MLLMs for Visual Recognition",
        JOURNAL = IP,
        VOLUME = "35",
        YEAR = "2026",
        PAGES = "388-401",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236407"}

@inproceedings{bb241492,
        AUTHOR = "Liu, Z.X. and Jiang, G.K. and Khajavi, S.",
        TITLE = "LLaVA-SCo: Teach Vision Language Models to Self-Correct",
        BOOKTITLE = WiCV25,
        YEAR = "2025",
        PAGES = "3406-3415",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236408"}

@inproceedings{bb241493,
        AUTHOR = "Khattak, M.U. and Naeem, M.F. and Hassan, J. and Naseer, M. and Tombari, F. and Khan, F.S. and Khan, S.",
        TITLE = "How Good is my Video-LMM? Complex Video Reasoning and Robustness
Evaluation Suite for Video-LMMs",
        BOOKTITLE = WhatNext25,
        YEAR = "2025",
        PAGES = "3642-3651",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236409"}

@inproceedings{bb241494,
        AUTHOR = "Pham, C. and Phan, H. and Doermann, D. and Tian, Y.J.",
        TITLE = "PLVM: A Tuning-Free Approach for Personalized Large Vision-Language
Model",
        BOOKTITLE = WhatNext25,
        YEAR = "2025",
        PAGES = "3632-3641",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236410"}

@inproceedings{bb241495,
        AUTHOR = "Xie, H.C. and Ma, R. and Zhu, J.G. and Mai, Z. and Abd Almageed, W. and Abraham, Z.",
        TITLE = "Efficiently Mitigating Video Content Misalignment on Large Vision
Model with Time-Series Data Alignment",
        BOOKTITLE = LargeVM25,
        YEAR = "2025",
        PAGES = "3292-3298",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236411"}

@inproceedings{bb241496,
        AUTHOR = "Li, Y.F. and Bao, W.T. and Ye, B.T. and Tan, Z. and Chen, T.L. and Liu, H. and Kong, Y.",
        TITLE = "Window Token Concatenation for Efficient Visual Large Language Models",
        BOOKTITLE = LargeVM25,
        YEAR = "2025",
        PAGES = "3178-3188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236412"}

@inproceedings{bb241497,
        AUTHOR = "Huang, Y.Q. and Qi, H. and Chen, Z. and Zhang, H.P. and Yu, H.C. and Zhao, Z.",
        TITLE = "Autonomous Multimodal Reasoning via Implicit Chain-of-Vision",
        BOOKTITLE = Reasoning25,
        YEAR = "2025",
        PAGES = "2954-2963",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236413"}

@inproceedings{bb241498,
        AUTHOR = "Mushtaq, E. and Fabian, Z. and Bakman, Y.F. and Ramakrishna, A. and Soltanolkotabi, M. and Avestimehr, S.",
        TITLE = "HARMONY: Hidden Activation Representations and Model Output-Aware
Uncertainty Estimation for Vision-Language Models",
        BOOKTITLE = TrustworthyOpen25,
        YEAR = "2025",
        PAGES = "1654-1659",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236414"}

@inproceedings{bb241499,
        AUTHOR = "Wang, Z. and Lo, F.P.W. and Chen, Q. and Zhang, Y.Q. and Lin, C. and Chen, X. and Yu, Z.H. and Thompson, A.J. and Yeatman, E.M. and Lo, B.P.L.",
        TITLE = "An LLM-Enabled Multi-Agent Autonomous Mechatronics Design Framework",
        BOOKTITLE = MultiEmbodied25,
        YEAR = "2025",
        PAGES = "4205-4215",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803llm4.html#TT236415"}
Last update:Mar 28, 2026 at 17:09:41