@inproceedings{bb240500,
AUTHOR = "Chen, H.N. and Ni, Y. and Huang, W.J. and Liu, Y. and Jeong, S. and Wen, F. and Bastian, N.D. and Latapie, H. and Imani, M.",
TITLE = "VLTP: Vision-Language Guided Token Pruning for Task-Oriented
Segmentation",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "9353-9363",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235417"}
@inproceedings{bb240501,
AUTHOR = "Ali, E. and Silva, S. and Khan, M.H.",
TITLE = "DPA: Dual Prototypes Alignment for Unsupervised Adaptation of
Vision-Language Models",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "6083-6093",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235418"}
@inproceedings{bb240502,
AUTHOR = "Zhang, C. and Stepputtis, S. and Sycara, K. and Xie, Y.Q.",
TITLE = "Enhancing Vision-Language Few-Shot Adaptation with Negative Learning",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5905-5915",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235419"}
@inproceedings{bb240503,
AUTHOR = "Yamada, M. and Dharamshi, N. and Kohli, A. and Kasu, P. and Khan, A. and Ghulyani, M.",
TITLE = "Unleashing Potentials of Vision-Language Models for Zero-Shot HOI
Detection",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5751-5760",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235420"}
@inproceedings{bb240504,
AUTHOR = "Imam, R. and Gani, H. and Huzaifa, M. and Nandakumar, K.",
TITLE = "Test-Time Low Rank Adaptation via Confidence Maximization for
Zero-Shot Generalization of Vision-Language Models",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "5449-5459",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235421"}
@inproceedings{bb240505,
AUTHOR = "Ghoddoosian, R. and Agarwal, N. and Dwivedi, I. and Darisuh, B.",
TITLE = "ACE: Action Concept Enhancement of Video-Language Models in
Procedural Videos",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "9521-9531",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235422"}
@inproceedings{bb240506,
AUTHOR = "Onoe, Y. and Rane, S. and Berger, Z. and Bitton, Y. and Cho, J. and Garg, R. and Ku, A. and Parekh, Z. and Pont Tuset, J. and Tanzer, G. and Wang, S. and Baldridge, J.",
TITLE = "DOCCI: Descriptions of Connected and Contrasting Images",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LX: 291-309",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235423"}
@inproceedings{bb240507,
AUTHOR = "Li, T. and Ma, M.M. and Peng, X.",
TITLE = "DEAL: Disentangle and Localize Concept-level Explanations for VLMs",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXXIX: 383-401",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235424"}
@inproceedings{bb240508,
AUTHOR = "Li, S.C. and Li, L. and Liu, Y. and Ren, S.H. and Liu, Y.X. and Gao, R.D. and Sun, X. and Hou, L.",
TITLE = "Vitatecs: A Diagnostic Dataset for Temporal Concept Understanding of
Video-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXX: 331-348",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235425"}
@inproceedings{bb240509,
AUTHOR = "Yang, Y.T. and Chen, M.H. and Qiu, Q. and Wu, J.H. and Wang, W.X. and Lin, B.B. and Guan, Z.Y. and He, X.F.",
TITLE = "Adapt2reward: Adapting Video-language Models to Generalizable Robotic
Rewards via Failure Prompts",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVII: 163-180",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235426"}
@inproceedings{bb240510,
AUTHOR = "Rahmanzadehgervi, P. and Bolton, L. and Taesiri, M.R. and Nguyen, A.T.",
TITLE = "Vision Language Models are blind",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "V: 293-309",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235427"}
@inproceedings{bb240511,
AUTHOR = "Chytas, S.P. and Kim, H.W.J. and Singh, V.",
TITLE = "Understanding Multi-compositional Learning in Vision and Language
Models via Category Theory",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVIII: 324-341",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235428"}
@inproceedings{bb240512,
AUTHOR = "Song, Y.Z. and Chen, Y.S. and Lin, T.L. and Liu, B. and Fu, J.L. and Shuai, H.H.",
TITLE = "Capture Concept Through Comparison: Vision-and-language Representation
Learning with Intrinsic Information Mining",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 220-238",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235429"}
@inproceedings{bb240513,
AUTHOR = "Adhikari, R. and Thapaliya, S. and Dhakal, M. and Khanal, B.",
TITLE = "Tunevlseg: Prompt Tuning Benchmark for Vision-language Segmentation
Models",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 44-62",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235430"}
@inproceedings{bb240514,
AUTHOR = "He, H.C. and Liu, W.B. and Xing, W.W.",
TITLE = "Biefficient: Bidirectionally Prompting Vision-language Models for
Parameter-efficient Video Recognition",
BOOKTITLE = ACCV24,
YEAR = "2024",
PAGES = "III: 257-274",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235431"}
@inproceedings{bb240515,
AUTHOR = "Yang, J.K. and Dong, Y.H. and Liu, S. and Li, B. and Wang, Z.Y. and Tan, H.R. and Jiang, C.C. and Kang, J. and Zhang, Y.H. and Zhou, K.Y. and Liu, Z.W.",
TITLE = "Octopus: Embodied Vision-language Programmer from Environmental
Feedback",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "I: 20-38",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235432"}
@inproceedings{bb240516,
AUTHOR = "Kar, O.F. and Tonioni, A. and Poklukar, P. and Kulshrestha, A. and Zamir, A. and Tombari, F.",
TITLE = "Brave: Broadening the Visual Encoding of Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XVI: 113-132",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235433"}
@inproceedings{bb240517,
AUTHOR = "Kamath, A. and Hsieh, C.Y. and Chang, K.W. and Krishna, R.",
TITLE = "The Hard Positive Truth About Vision-language Compositionality",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIV: 37-54",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235434"}
@inproceedings{bb240518,
AUTHOR = "Jia, B.X. and Chen, Y.X. and Yu, H.Y. and Wang, Y. and Niu, X.S. and Liu, T.Y. and Li, Q. and Huang, S.Y.",
TITLE = "Sceneverse: Scaling 3d Vision-language Learning for Grounded Scene
Understanding",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "IX: 289-310",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235435"}
@inproceedings{bb240519,
AUTHOR = "Zhang, Y.F. and Jiang, M. and Zhao, Q.",
TITLE = "Learning Chain of Counterfactual Thought for Bias-robust
Vision-language Reasoning",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "VIII: 334-351",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235436"}
@inproceedings{bb240520,
AUTHOR = "Li, J. and Chen, D. and Cai, T. and Chen, P.H. and Hong, Y. and Chen, Z.F. and Shen, Y.K. and Gan, C.",
TITLE = "Flexattention for Efficient High-resolution Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXV: 286-302",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235437"}
@inproceedings{bb240521,
AUTHOR = "Li, X. and Ding, J. and Chen, Z.Y. and Elhoseiny, M.",
TITLE = "UNI3DL: A Unified Model for 3d Vision-language Understanding",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXIII: 74-92",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235438"}
@inproceedings{bb240522,
AUTHOR = "Hao, T.X. and Ding, X.H. and Feng, J.X. and Yang, Y.H. and Chen, H. and Ding, G.",
TITLE = "Quantized Prompt for Efficient Generalization of Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIX: 54-73",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235439"}
@inproceedings{bb240523,
AUTHOR = "Xu, H.B. and Ke, X. and Li, Y.Z. and Xu, R. and Wu, H.Q. and Lin, X.F. and Guo, W.Z.",
TITLE = "Vision-language Action Knowledge Learning for Semantic-aware Action
Quality Assessment",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLII: 423-440",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235440"}
@inproceedings{bb240524,
AUTHOR = "Zhu, Z.Y. and Zhang, Z. and Ma, X.J. and Niu, X.S. and Chen, Y.X. and Jia, B.X. and Deng, Z.D. and Huang, S.Y. and Li, Q.",
TITLE = "Unifying 3d Vision-language Understanding via Promptable Queries",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLIV: 188-206",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235441"}
@inproceedings{bb240525,
AUTHOR = "Zhang, J.M. and Ma, X.J. and Wang, X. and Qiu, L.Y. and Wang, J.Q. and Jiang, Y.G. and Sang, J.",
TITLE = "Adversarial Prompt Tuning for Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLV: 56-72",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235442"}
@inproceedings{bb240526,
AUTHOR = "Wu, G. and Zhang, X. and Li, Z. and Chen, Z.W. and Liang, J.J. and Yang, J. and Li, X.",
TITLE = "Cascade Prompt Learning for Vision-language Model Adaptation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "L: 304-321",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235443"}
@inproceedings{bb240527,
AUTHOR = "Gao, S. and Jia, X.J. and Ren, X.H. and Tsang, I. and Guo, Q.",
TITLE = "Boosting Transferability in Vision-language Attacks via Diversification
Along the Intersection Region of Adversarial Trajectory",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVII: 442-460",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235444"}
@inproceedings{bb240528,
AUTHOR = "Jiang, H.B. and Yue, J.P. and Luo, H. and Ding, Z. and Lu, Z.Q.",
TITLE = "Reinforcement Learning Friendly Vision-language Model for Minecraft",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXVIII: 1-17",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235445"}
@inproceedings{bb240529,
AUTHOR = "Nguyen, A.T. and Tai, K.S. and Chen, B.C. and Shukla, S.N. and Yu, H.C. and Torr, P.H.S. and Tian, T.P. and Lim, S.N.",
TITLE = "ucap: An Unsupervised Prompting Method for Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXIV: 425-439",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235446"}
@inproceedings{bb240530,
AUTHOR = "Zhang, Y. and Yu, K. and Wu, S.Q. and He, Z.H.",
TITLE = "Conceptual Codebook Learning for Vision-language Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXVII: 235-251",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235447"}
@inproceedings{bb240531,
AUTHOR = "Chatterjee, A. and Luo, Y.R. and Gokhale, T. and Yang, Y.Z. and Baral, C.",
TITLE = "Revision: Rendering Tools Enable Spatial Fidelity in Vision-language
Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXX: 339-357",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235448"}
@inproceedings{bb240532,
AUTHOR = "Sharma, P. and Shaham, T.R. and Baradad, M. and Rodriiuez Munoz, A. and Duggal, S. and Isola, P. and Torralba, A. and Fu, S.",
TITLE = "A Vision Check-up for Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14410-14419",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235449"}
@inproceedings{bb240533,
AUTHOR = "Parodi, F. and Matelsky, J.K. and Regla Vargas, A. and Foglia, E.E. and Lim, C. and Weinberg, D. and Kording, K.P. and Herrick, H.M. and Platt, M.L.",
TITLE = "Vision-language models for decoding provider attention during
neonatal resuscitation",
BOOKTITLE = CVPM24,
YEAR = "2024",
PAGES = "343-353",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235450"}
@inproceedings{bb240534,
AUTHOR = "Zhang, Y.B. and Zhu, W.J. and Tang, H. and Ma, Z.Y. and Zhou, K.Y. and Zhang, L.",
TITLE = "Dual Memory Networks: A Versatile Adaptation Approach for
Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "28718-28728",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235451"}
@inproceedings{bb240535,
AUTHOR = "Guo, Y.C. and Gu, X.D.",
TITLE = "JoAPR: Cleaning the Lens of Prompt Learning for Vision-Language
Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "28695-28705",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235452"}
@inproceedings{bb240536,
AUTHOR = "Han, J. and Lin, Z.W. and Sun, Z.Y. and Gao, Y.G. and Yan, K. and Ding, S.H. and Gao, Y. and Xia, G.S.",
TITLE = "Anchor-based Robust Finetuning of Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26909-26918",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235453"}
@inproceedings{bb240537,
AUTHOR = "Cao, Q.L. and Zheng Qin, X. and Chen, Y.T. and Chao, M. and Yang, X.K.",
TITLE = "Domain Prompt Learning with Quaternion Networks",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26627-26636",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235454"}
@inproceedings{bb240538,
AUTHOR = "Li, L. and Guan, H.Y. and Qiu, J.N. and Spratling, M.",
TITLE = "One Prompt Word is Enough to Boost Adversarial Robustness for
Pre-Trained Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "24408-24419",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235455"}
@inproceedings{bb240539,
AUTHOR = "Zanella, M. and Fuchs, C. and de Vleeschouwer, C. and Ayed, I.B.",
TITLE = "Realistic Test-Time Adaptation of Vision-Language Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "25103-25112",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235456"}
@inproceedings{bb240540,
AUTHOR = "Fuchs, C. and Zanella, M. and de Vleeschouwer, C.",
TITLE = "Online Gaussian Test-Time Adaptation of Vision-Language Models",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "128-137",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235457"}
@inproceedings{bb240541,
AUTHOR = "Zanella, M. and Ayed, I.B.",
TITLE = "On the Test-Time Zero-Shot Generalization of Vision-Language Models:
Do we Really need Prompt Learning?",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23783-23793",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235458"}
@inproceedings{bb240542,
AUTHOR = "Yang, S. and Tian, Z. and Jiang, L. and Jia, J.Y.",
TITLE = "Unified Language-Driven Zero-Shot Domain Adaptation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23407-23415",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235459"}
@inproceedings{bb240543,
AUTHOR = "Cui, J.Q. and Zhu, B. and Wen, X. and Qi, X.J. and Yu, B. and Zhang, H.W.",
TITLE = "Classes Are Not Equal: An Empirical Study on Image Recognition
Fairness",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23283-23292",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235460"}
@inproceedings{bb240544,
AUTHOR = "Stojnic, V. and Kalantidis, Y. and Tolias, G.",
TITLE = "Label Propagation for Zero-shot Classification with Vision-Language
Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23209-23218",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235461"}
@inproceedings{bb240545,
AUTHOR = "Yuan, T. and Zhang, X. and Liu, K. and Liu, B. and Chen, C. and Jin, J. and Jiao, Z.Z.",
TITLE = "Towards Surveillance Video-and-Language Understanding: New Dataset,
Baselines, and Challenges",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "22052-22061",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235462"}
@inproceedings{bb240546,
AUTHOR = "Chen, Y.F. and Chen, D.P. and Liu, R.J. and Zhou, S. and Xue, W.Y. and Peng, W.",
TITLE = "Align Before Adapt: Leveraging Entity-to-Region Alignments for
Generalizable Video Action Recognition",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18688-18698",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235463"}
@inproceedings{bb240547,
AUTHOR = "Mittal, H. and Agarwal, N. and Lo, S.Y. and Lee, K.",
TITLE = "Can't make an Omelette without Breaking some Eggs: Plausible Action
Anticipation using Large Video-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18580-18590",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235464"}
@inproceedings{bb240548,
AUTHOR = "Kahatapitiya, K. and Arnab, A. and Nagran, A. and Ryoo, M.S.",
TITLE = "VicTR: Video-conditioned Text Representations for Activity
Recognition",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18547-18558",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235465"}
@inproceedings{bb240549,
AUTHOR = "Wu, T.Y. and Ho, C.H. and Vasconcelos, N.M.",
TITLE = "ProTeCt: Prompt Tuning for Taxonomic Open Set Classification",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16531-16540",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235466"}
@inproceedings{bb240550,
AUTHOR = "Zhao, G. and Li, G.B. and Chen, W. and Yu, Y.Z.",
TITLE = "OVER-NAV: Elevating Iterative Vision-and-Language Navigation with
Open-Vocabulary Detection and StructurEd Representation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16296-16306",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235467"}
@inproceedings{bb240551,
AUTHOR = "Li, X. and Wu, Y.F. and Jiang, X.H. and Guo, Z.H. and Gong, M.M. and Cao, H.Y. and Liu, Y.S. and Jiang, D.Q. and Sun, X.",
TITLE = "Enhancing Visual Document Understanding with Contrastive Learning in
Large Visual-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "15546-15555",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235468"}
@inproceedings{bb240552,
AUTHOR = "Pham, K. and Huynh, C. and Lim, S.N. and Shrivastava, A.",
TITLE = "Composing Object Relations and Attributes for Image-Text Matching",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14354-14363",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235469"}
@inproceedings{bb240553,
AUTHOR = "Xu, Z.L. and Zhu, Y. and Deng, S.Q. and Mittal, A. and Chen, Y.B. and Wang, M. and Favaro, P. and Tighe, J. and Modolo, D.",
TITLE = "Benchmarking Zero-Shot Recognition with Vision-Language Models:
Challenges on Granularity and Specificity",
BOOKTITLE = WhatNext24,
YEAR = "2024",
PAGES = "1827-1836",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235470"}
@inproceedings{bb240554,
AUTHOR = "Luo, Z.W. and Gustafsson, F.K. and Zhao, Z. and Sjolund, J. and Schon, T.B.",
TITLE = "Photo-Realistic Image Restoration in the Wild with Controlled
Vision-Language Models",
BOOKTITLE = NTIRE24,
YEAR = "2024",
PAGES = "6641-6651",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235471"}
@inproceedings{bb240555,
AUTHOR = "Huang, C.Q. and Jiang, A. and Feng, J.H. and Zhang, Y. and Wang, X.C. and Wang, Y.F.",
TITLE = "Adapting Visual-Language Models for Generalizable Anomaly Detection
in Medical Images",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "11375-11385",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235472"}
@inproceedings{bb240556,
AUTHOR = "Bang, J. and Ahn, S. and Lee, J.G.",
TITLE = "Active Prompt Learning in Vision Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26994-27004",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235473"}
@inproceedings{bb240557,
AUTHOR = "Pan, C. and Yaman, B. and Nesti, T. and Mallik, A. and Allievi, A.G. and Velipasalar, S. and Ren, L.",
TITLE = "VLP: Vision Language Planning for Autonomous Driving",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14760-14769",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235474"}
@inproceedings{bb240558,
AUTHOR = "Liang, M. and Su, J.C. and Schulter, S. and Garg, S. and Zhao, S.Y. and Wu, Y. and Chandraker, M.",
TITLE = "AIDE: An Automatic Data Engine for Object Detection in Autonomous
Driving",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14695-14706",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235475"}
@inproceedings{bb240559,
AUTHOR = "Li, Z. and Li, X. and Fu, X. and Zhang, X. and Wang, W.Q. and Chen, S. and Yang, J.",
TITLE = "PromptKD: Unsupervised Prompt Distillation for Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26607-26616",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235476"}
@inproceedings{bb240560,
AUTHOR = "Khandelwal, A.",
TITLE = "PromptSync: Bridging Domain Gaps in Vision-Language Models through
Class-Aware Prototype Alignment and Discrimination",
BOOKTITLE = ZeroShot24,
YEAR = "2024",
PAGES = "7819-7828",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235477"}
@inproceedings{bb240561,
AUTHOR = "Hirohashi, Y. and Hirakawa, T. and Yamashita, T. and Fujiyoshi, H.",
TITLE = "Prompt Learning with One-Shot Setting based Feature Space Analysis in
Vision-and-Language Models",
BOOKTITLE = ZeroShot24,
YEAR = "2024",
PAGES = "7761-7770",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235478"}
@inproceedings{bb240562,
AUTHOR = "Zhang, L. and Awal, R. and Agrawal, A.",
TITLE = "Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to
Enhance Visio-Linguistic Compositional Understanding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13774-13784",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235479"}
@inproceedings{bb240563,
AUTHOR = "Rosasco, A. and Berti, S. and Pasquale, G. and Malafronte, D. and Sato, S. and Segawa, H. and Inada, T. and Natale, L.",
TITLE = "ConCon-Chi: Concept-Context Chimera Benchmark for Personalized
Vision-Language Tasks",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "22239-22248",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235480"}
@inproceedings{bb240564,
AUTHOR = "Cheng, S. and Guo, Z.C. and Wu, J. and Fang, K. and Li, P. and Liu, H.P. and Liu, Y.",
TITLE = "EgoThink: Evaluating First-Person Perspective Thinking Capability of
Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14291-14302",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235481"}
@inproceedings{bb240565,
AUTHOR = "Kil, J. and Song, C.H. and Zheng, B. and Deng, X. and Su, Y. and Chao, W.L.",
TITLE = "Dual-View Visual Contextualization for Web Navigation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14445-14454",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235482"}
@inproceedings{bb240566,
AUTHOR = "Guo, Y.Y. and Wang, G.Z. and Kankanhalli, M.",
TITLE = "PELA: Learning Parameter-Efficient Models with Low-Rank Approximation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "15699-15709",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235483"}
@inproceedings{bb240567,
AUTHOR = "Farina, M. and Mancini, M. and Cunegatti, E. and Cunegatti, E. and Iacca, G. and Ricci, E.",
TITLE = "MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16185-16195",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235484"}
@inproceedings{bb240568,
AUTHOR = "Mu, F.Z. and Mo, S.C. and Li, Y.",
TITLE = "SnAG: Scalable and Accurate Video Grounding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18930-18940",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235485"}
@inproceedings{bb240569,
AUTHOR = "Cao, Y.H. and Ji, K.X. and Huang, Z.Y. and Zheng, C.Y. and Liu, J.J. and Wang, J. and Chen, J.D. and Yang, M.",
TITLE = "Towards Better Vision-Inspired Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13537-13547",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235486"}
@inproceedings{bb240570,
AUTHOR = "Shi, K.Y. and Dong, Q. and Goncalves, L. and Tu, Z.W. and Soatto, S.",
TITLE = "Non-autoregressive Sequence-to-Sequence Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13603-13612",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235487"}
@inproceedings{bb240571,
AUTHOR = "Man, Y.Z. and Gui, L.Y. and Wang, Y.X.",
TITLE = "Situational Awareness Matters in 3D Vision Language Reasoning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13678-13688",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235488"}
@inproceedings{bb240572,
AUTHOR = "Zheng, C.H. and Zhang, J. and Kembhavi, A. and Krishna, R.",
TITLE = "Iterated Learning Improves Compositionality in Large Vision-Language
Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13785-13795",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235489"}
@inproceedings{bb240573,
AUTHOR = "Song, C.H. and Hwang, T. and Yoon, J.Y. and Choi, S. and Gu, Y.H.",
TITLE = "SyncMask: Synchronized Attentional Masking for Fashion-centric
Vision-Language Pretraining",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13948-13957",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235490"}
@inproceedings{bb240574,
AUTHOR = "Pramanick, S. and Han, G.X. and Hou, R. and Nag, S. and Lim, S.N. and Ballas, N. and Wang, Q.F. and Chellappa, R. and Almahairi, A.",
TITLE = "Jack of All Tasks, Master of Many: Designing General-purpose
Coarse-to-Fine Vision-Language Model",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14076-14088",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235491"}
@inproceedings{bb240575,
AUTHOR = "Zeng, Y. and Huang, Y. and Zhang, J.J. and Jie, Z.Q. and Chai, Z.H. and Wang, L.",
TITLE = "Investigating Compositional Challenges in Vision-Language Models for
Visual Grounding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14141-14151",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235492"}
@inproceedings{bb240576,
AUTHOR = "Karmanov, A. and Guan, D. and Lu, S.J. and El Saddik, A. and Xing, E.",
TITLE = "Efficient Test-Time Adaptation of Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14162-14171",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235493"}
@inproceedings{bb240577,
AUTHOR = "Sameni, S. and Kafle, K. and Tan, H. and Jenni, S.",
TITLE = "Building Vision-Language Models on Solid Foundations with Masked
Distillation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "14216-14226",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235494"}
@inproceedings{bb240578,
AUTHOR = "Peng, W. and Xie, S.C. and You, Z. and Lan, S.Y. and Wu, Z.X.",
TITLE = "Synthesize, Diagnose, and Optimize: Towards Fine-Grained
Vision-Language Understanding",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13279-13288",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235495"}
@inproceedings{bb240579,
AUTHOR = "Zhao, Y. and Zhao, L. and Zhou, X.Y. and Wu, J.L. and Chu, C.T. and Miao, H. and Schroff, F. and Adam, H. and Liu, T. and Gong, B.Q. and Krahenbuhl, P. and Yuan, L.Z.",
TITLE = "Distilling Vision-Language Models on Millions of Videos",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "13106-13116",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235496"}
@inproceedings{bb240580,
AUTHOR = "Chen, J.N. and Yu, Q.H. and Shen, X.H. and Yuille, A.L. and Chen, L.C.",
TITLE = "ViTamin: Designing Scalable Vision Models in the Vision-Language Era",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12954-12966",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235497"}
@inproceedings{bb240581,
AUTHOR = "Liu, S.H. and Yu, S. and Lin, Z.Q. and Pathak, D. and Ramanan, D.",
TITLE = "Language Models as Black-Box Optimizers for Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12687-12697",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235498"}
@inproceedings{bb240582,
AUTHOR = "Howard, P. and Madasu, A. and Le, T. and Moreno, G.L. and Bhiwandiwalla, A. and Lal, V.",
TITLE = "SocialCounterfactuals: Probing and Mitigating Intersectional Social
Biases in Vision-Language Models with Counterfactual Examples",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "11975-11985",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235499"}
@inproceedings{bb240583,
AUTHOR = "Jiang, Y.K. and Huang, Z.Z. and Zhang, R.Z. and Zhang, X.F. and Zhang, S.T.",
TITLE = "ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and
Self-Prompting",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "11386-11397",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235500"}
@inproceedings{bb240584,
AUTHOR = "Kim, Y. and Mo, S. and Kim, M. and Lee, K. and Lee, J. and Shin, J.",
TITLE = "Discovering and Mitigating Visual Biases Through Keyword Explanation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "11082-11092",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235501"}
@inproceedings{bb240585,
AUTHOR = "Li, R. and Fischer, T. and Segu, M. and Pollefeys, M. and Van Gool, L.J. and Tombari, F.",
TITLE = "Know Your Neighbors: Improving Single-View Reconstruction via Spatial
Vision-Language Reasoning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "9848-9858",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235502"}
@inproceedings{bb240586,
AUTHOR = "Zeng, Z. and Wang, D. and Yang, F.Y. and Park, H. and Soatto, S. and Lao, D. and Wong, A.",
TITLE = "WorDepth: Variational Language Prior for Monocular Depth Estimation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "9708-9719",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235503"}
@inproceedings{bb240587,
AUTHOR = "Hu, Y.S. and Stretcu, O. and Lu, C.T. and Viswanathan, K. and Hata, K. and Luo, E. and Krishna, R. and Fuxman, A.",
TITLE = "Visual Program Distillation: Distilling Tools and Programmatic
Reasoning into Vision-Language Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "9590-9601",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235504"}
@inproceedings{bb240588,
AUTHOR = "Zanella, M. and Fuchs, C. and Ben Ayed, I. and de Vleeschouwer, C.",
TITLE = "Vocabulary-Free Few-Shot Learning for Vision-Language Models",
BOOKTITLE = "MULA25",
YEAR = "2025",
PAGES = "149-158",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235505"}
@inproceedings{bb240589,
AUTHOR = "Silva Rodriguez, J. and Hajimiri, S. and Ben Ayed, I. and Dolz, J.",
TITLE = "A Closer Look at the Few-Shot Adaptation of Large Vision-Language
Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23681-23690",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235506"}
@inproceedings{bb240590,
AUTHOR = "Zanella, M. and Ben Ayed, I.",
TITLE = "Low-Rank Few-Shot Adaptation of Vision-Language Models",
BOOKTITLE = Prompting24,
YEAR = "2024",
PAGES = "1593-1603",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235507"}
@inproceedings{bb240591,
AUTHOR = "Yang, C. and Xu, R. and Guo, Y. and Huang, P.X. and Chen, Y. and Ding, W. and Wang, Z.Y. and Zhou, H.",
TITLE = "Improving Vision-and-Language Reasoning via Spatial Relations
Modeling",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "758-767",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235508"}
@inproceedings{bb240592,
AUTHOR = "Shen, S. and Yang, S. and Zhang, T.J. and Zhai, B. and Gonzalez, J.E. and Keutzer, K. and Darrell, T.J.",
TITLE = "Multitask Vision-Language Prompt Tuning",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "5644-5655",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235509"}
@inproceedings{bb240593,
AUTHOR = "Zhang, G. and Zhang, Y.R. and Zhang, K. and Tresp, V.",
TITLE = "Can Vision-Language Models be a Good Guesser? Exploring VLMs for
Times and Location Reasoning",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "625-634",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235510"}
@inproceedings{bb240594,
AUTHOR = "Ganz, R. and Nuriel, O. and Aberdam, A. and Kittenplon, Y. and Mazor, S. and Litman, R.",
TITLE = "Towards Models that Can See and Read",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "21661-21671",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235511"}
@inproceedings{bb240595,
AUTHOR = "Zhang, H. and Liu, D. and Lv, Z. and Su, B. and Tao, D.C.",
TITLE = "Exploring Temporal Concurrency for Video-Language Representation
Learning",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15522-15532",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235512"}
@inproceedings{bb240596,
AUTHOR = "Shukor, M. and Dancette, C. and Cord, M.",
TITLE = "eP-ALM: Efficient Perceptual Augmentation of Language Models",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "21999-22012",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235513"}
@inproceedings{bb240597,
AUTHOR = "Schulter, S. and Kumar, B.G.V. and Suh, Y.M. and Dafnis, K.M. and Zhang, Z.X. and Zhao, S.Y. and Metaxas, D.N.",
TITLE = "OmniLabel: A Challenging Benchmark for Language-Based Object
Detection",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "11919-11928",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235514"}
@inproceedings{bb240598,
AUTHOR = "Chen, Z.L. and Huang, X. and Guan, Q.L. and Lin, L. and Luo, W.Q.",
TITLE = "A Retrospect to Multi-prompt Learning across Vision and Language",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "22133-22144",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235515"}
@inproceedings{bb240599,
AUTHOR = "Derakhshani, M.M. and Sanchez, E. and Bulat, A. and da Costa, V.G.T. and Snoek, C.G.M. and Tzimiropoulos, G. and Martinez, B.",
TITLE = "Bayesian Prompt Learning for Image-Language Model Generalization",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15191-15200",
BIBSOURCE = "http://www.visionbib.com/bibliography/applicat803vlm3.html#TT235516"}
Last update:Feb 26, 2026 at 10:58:24