@inproceedings{bb157100,
AUTHOR = "Gani, H. and Saadi, N. and Hussein, N. and Nandakumar, K.",
TITLE = "Multi-Attribute Vision Transformers are Efficient and Robust Learners",
BOOKTITLE = ICIP24,
YEAR = "2024",
PAGES = "766-772",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152973"}
@inproceedings{bb157101,
AUTHOR = "Cho, J.H. and Krahenbuhl, P.",
TITLE = "Language-Conditioned Detection Transformer",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16593-16603",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152974"}
@inproceedings{bb157102,
AUTHOR = "Lin, S. and Lyu, P. and Liu, D.R. and Tang, T. and Liang, X.D. and Song, A. and Chang, X.J.",
TITLE = "MLP Can Be a Good Transformer Learner",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "19489-19498",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152975"}
@inproceedings{bb157103,
AUTHOR = "Weng, H.H. and Huang, D. and Qiao, Y. and Hu, Z. and Lin, C.Y. and Zhang, T. and Chen, C.L.P.",
TITLE = "Desigen: A Pipeline for Controllable Design Template Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12721-12732",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152976"}
@inproceedings{bb157104,
AUTHOR = "Park, S. and Byun, H.R.",
TITLE = "Fair-VPT: Fair Visual Prompt Tuning for Image Classification",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "12268-12278",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152977"}
@inproceedings{bb157105,
AUTHOR = "Xu, H.Y. and Xiang, L. and Ye, H.Y. and Yao, D. and Chu, P.Z. and Li, B.C.",
TITLE = "Permutation Equivariance of Transformers and its Applications",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "5987-5996",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152978"}
@inproceedings{bb157106,
AUTHOR = "Zhang, Y.Y. and Ding, X.H. and Gong, K.X. and Ge, Y.X. and Shan, Y. and Yue, X.Y.",
TITLE = "Multimodal Pathway: Improve Transformers with Irrelevant Data from
Other Modalities",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6108-6117",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152979"}
@inproceedings{bb157107,
AUTHOR = "Kobayashi, T.",
TITLE = "Mean-Shift Feature Transformer",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6047-6056",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152980"}
@inproceedings{bb157108,
AUTHOR = "Shi, X.Y. and Hao, Z.C. and Yu, Z.F.",
TITLE = "SpikingResformer: Bridging ResNet and Vision Transformer in Spiking
Neural Networks",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "5610-5619",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152981"}
@inproceedings{bb157109,
AUTHOR = "Ye, H.C. and Yu, C. and Ye, P. and Xia, R. and Tang, Y.S. and Lu, J.W. and Chen, T. and Zhang, B.",
TITLE = "Once for Both: Single Stage of Importance and Sparsity Search for
Vision Transformer Compression",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "5578-5588",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152982"}
@inproceedings{bb157110,
AUTHOR = "Zhang, J. and Herrmann, C. and Hur, J. and Chen, E. and Jampani, V. and Sun, D.Q. and Yang, M.H.",
TITLE = "Telling Left from Right: Identifying Geometry-Aware Semantic
Correspondence",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "3076-3085",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152983"}
@inproceedings{bb157111,
AUTHOR = "Huang, N.C. and Chang, C.C. and Lin, W.C. and Taka, E. and Marculescu, D. and Wu, K.C.A.",
TITLE = "ELSA: Exploiting Layer-wise N:M Sparsity for Vision Transformer
Acceleration",
BOOKTITLE = ECV24,
YEAR = "2024",
PAGES = "8006-8015",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152984"}
@inproceedings{bb157112,
AUTHOR = "Devulapally, A. and Khan, M.F.F. and Advani, S. and Narayanan, V.",
TITLE = "Multi-Modal Fusion of Event and RGB for Monocular Depth Estimation
Using a Unified Transformer-based Architecture",
BOOKTITLE = MULA24,
YEAR = "2024",
PAGES = "2081-2089",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152985"}
@inproceedings{bb157113,
AUTHOR = "Yang, Z.D. and Li, Z. and Zeng, A.L. and Li, Z.X. and Yuan, C. and Li, Y.",
TITLE = "ViTKD: Feature-based Knowledge Distillation for Vision Transformers",
BOOKTITLE = PBDL24,
YEAR = "2024",
PAGES = "1379-1388",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152986"}
@inproceedings{bb157114,
AUTHOR = "Mehri, F. and Baghshah, M.S. and Pilehvar, M.T.",
TITLE = "LibraGrad: Balancing Gradient Flow for Universally Better Vision
Transformer Attributions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "67-78",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152987"}
@inproceedings{bb157115,
AUTHOR = "Mehri, F. and Fayyaz, M. and Baghshah, M.S. and Pilehvar, M.T.",
TITLE = "SkipPLUS: Skip the First Few Layers to Better Explain Vision
Transformers",
BOOKTITLE = FaDE-TCV24,
YEAR = "2024",
PAGES = "204-215",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152988"}
@inproceedings{bb157116,
AUTHOR = "Jain, S. and Dutta, T.",
TITLE = "Towards Understanding and Improving Adversarial Robustness of Vision
Transformers",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "24736-24745",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152989"}
@inproceedings{bb157117,
AUTHOR = "Yang, S. and Bai, J. and Gao, K. and Yang, Y. and Li, Y.M. and Xia, S.T.",
TITLE = "Not All Prompts Are Secure: A Switchable Backdoor Attack Against
Pre-trained Vision Transfomers",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "24431-24441",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152990"}
@inproceedings{bb157118,
AUTHOR = "Steitz, J.M.O. and Roth, S.",
TITLE = "Adapters Strike Back",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23449-23459",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152991"}
@inproceedings{bb157119,
AUTHOR = "Rangwani, H. and Mondal, P. and Mondal, P. and Mishra, M. and Asokan, A.R. and Babu, R.V.",
TITLE = "DeiT-LT: Distillation Strikes Back for Vision Transformer Training on
Long-Tailed Datasets",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23396-23406",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152992"}
@inproceedings{bb157120,
AUTHOR = "Liu, J.Y. and Teshome, W. and Ghimire, S. and Sznaier, M. and Camps, O.",
TITLE = "Solving Masked Jigsaw Puzzles with Diffusion Vision Transformers*",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "23009-23018",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152993"}
@inproceedings{bb157121,
AUTHOR = "Kim, M. and Seo, P.H. and Schmid, C. and Cho, M.",
TITLE = "Learning Correlation Structures for Vision Transformers",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18941-18951",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152994"}
@inproceedings{bb157122,
AUTHOR = "Shi, D.",
TITLE = "TransNeXt: Robust Foveal Visual Perception for Vision Transformers",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "17773-17783",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152995"}
@inproceedings{bb157123,
AUTHOR = "Agiza, A. and Neseem, M. and Reda, S.",
TITLE = "MTLoRA: A Low-Rank Adaptation Approach for Efficient Multi-Task
Learning",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16196-16205",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152996"}
@inproceedings{bb157124,
AUTHOR = "Dong, W. and Zhang, X. and Chen, B. and Yan, D.W. and Lin, Z.J. and Yan, Q. and Wang, P. and Yang, Y.",
TITLE = "Low-Rank Rescaled Vision Transformer Fine-Tuning:
A Residual Design Approach",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "16101-16110",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152997"}
@inproceedings{bb157125,
AUTHOR = "Wu, J. and Kang, W.T. and Tang, H. and Hong, Y. and Yan, Y.",
TITLE = "On the Faithfulness of Vision Transformer Explanations",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "10936-10945",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152998"}
@inproceedings{bb157126,
AUTHOR = "Navaneet, K.L. and Koohpayegani, S.A. and Sleiman, E. and Pirsiavash, H.",
TITLE = "SlowFormer: Adversarial Attack on Compute and Energy Consumption of
Efficient Vision Transformers",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "24786-24797",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT152999"}
@inproceedings{bb157127,
AUTHOR = "Koyun, O.C. and Toreyin, B.U.",
TITLE = "HaLViT: Half of the Weights are Enough",
BOOKTITLE = LargeVM24,
YEAR = "2024",
PAGES = "3669-3678",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153000"}
@inproceedings{bb157128,
AUTHOR = "Bafghi, R.A. and Harilal, N. and Monteleoni, C. and Raissi, M.",
TITLE = "Parameter Efficient Fine-tuning of Self-supervised ViTs without
Catastrophic Forgetting",
BOOKTITLE = LargeVM24,
YEAR = "2024",
PAGES = "3679-3684",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153001"}
@inproceedings{bb157129,
AUTHOR = "Raissi, M. and Monteleoni, C. and Harilal, N. and Bafghi, R.A.",
TITLE = "Parameter Efficient Fine-tuning of Self-supervised ViTs without
Catastrophic Forgetting",
BOOKTITLE = LargeVM24,
YEAR = "2024",
PAGES = "7864-7869",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153001"}
@inproceedings{bb157130,
AUTHOR = "Edalati, A. and Hameed, M.G.A. and Mosleh, A.",
TITLE = "Generalized Kronecker-based Adapters for Parameter-efficient
Fine-tuning of Vision Transformers",
BOOKTITLE = CRV23,
YEAR = "2023",
PAGES = "97-104",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153002"}
@inproceedings{bb157131,
AUTHOR = "Marouf, I.E. and Tartaglione, E. and Lathuiliere, S.",
TITLE = "Mini but Mighty: Finetuning ViTs with Mini Adapters",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "1721-1730",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153003"}
@inproceedings{bb157132,
AUTHOR = "Kim, G. and Kim, J. and Lee, J.S.",
TITLE = "Exploring Adversarial Robustness of Vision Transformers in the
Spectral Perspective",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "3964-3973",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153004"}
@inproceedings{bb157133,
AUTHOR = "Han, Q. and Zhang, G.J. and Huang, J.X. and Gao, P. and Wei, Z. and Lu, S.J.",
TITLE = "Efficient MAE towards Large-Scale Vision Transformers",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "595-604",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153005"}
@inproceedings{bb157134,
AUTHOR = "Park, J.W. and Kahatapitiya, K. and Kim, D.H. and Sudalairaj, S. and Fan, Q.F. and Ryoo, M.S.",
TITLE = "Grafting Vision Transformers",
BOOKTITLE = WACV24,
YEAR = "2024",
PAGES = "1134-1143",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153006"}
@inproceedings{bb157135,
AUTHOR = "Shimizu, S. and Tamaki, T.",
TITLE = "Joint learning of images and videos with a single Vision Transformer",
BOOKTITLE = MVA23,
YEAR = "2023",
PAGES = "1-6",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153007"}
@inproceedings{bb157136,
AUTHOR = "Chen, M.Z. and Lin, M. and Lin, Z.H. and Zhang, Y.X. and Chao, F. and Ji, R.R.",
TITLE = "SMMix: Self-Motivated Image Mixing for Vision Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "17214-17224",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153008"}
@inproceedings{bb157137,
AUTHOR = "Kim, D. and Angelova, A. and Kuo, W.C.",
TITLE = "Contrastive Feature Masking Open-Vocabulary Vision Transformer",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "15556-15566",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153009"}
@inproceedings{bb157138,
AUTHOR = "Li, Z.K. and Gu, Q.Y.",
TITLE = "I-ViT: Integer-only Quantization for Efficient Vision Transformer
Inference",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "17019-17029",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153010"}
@inproceedings{bb157139,
AUTHOR = "Frumkin, N. and Gope, D. and Marculescu, D.",
TITLE = "Jumping through Local Minima: Quantization in the Loss Landscape of
Vision Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "16932-16942",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153011"}
@inproceedings{bb157140,
AUTHOR = "Li, Z.K. and Xiao, J.R. and Yang, L.W. and Gu, Q.Y.",
TITLE = "RepQ-ViT: Scale Reparameterization for Post-Training Quantization of
Vision Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "17181-17190",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153012"}
@inproceedings{bb157141,
AUTHOR = "Wang, X. and Chu, X.J. and Han, C. and Zhang, X.Y.",
TITLE = "SCSC: Spatial Cross-scale Convolution Module to Strengthen both CNNs
and Transformers",
BOOKTITLE = NIVT23,
YEAR = "2023",
PAGES = "731-741",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153013"}
@inproceedings{bb157142,
AUTHOR = "Chen, Y.H. and Weng, Y.C. and Kao, C.H. and Chien, C. and Chiu, W.C. and Peng, W.H.",
TITLE = "TransTIC: Transferring Transformer-based Image Compression from Human
Perception to Machine Perception",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "23240-23250",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153014"}
@inproceedings{bb157143,
AUTHOR = "Li, Y.Y. and Hu, J. and Wen, Y. and Evangelidis, G. and Salahi, K. and Wang, Y.Z. and Tulyakov, S. and Ren, J.",
TITLE = "Rethinking Vision Transformers for MobileNet Size and Speed",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "16843-16854",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153015"}
@inproceedings{bb157144,
AUTHOR = "Nurgazin, M. and Tu, N.A.",
TITLE = "A Comparative Study of Vision Transformer Encoders and Few-shot
Learning for Medical Image Classification",
BOOKTITLE = CVAMD23,
YEAR = "2023",
PAGES = "2505-2513",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153016"}
@inproceedings{bb157145,
AUTHOR = "Vasu, P.K.A. and Gabriel, J. and Zhu, J. and Tuzel, O. and Ranjan, A.",
TITLE = "FastViT: A Fast Hybrid Vision Transformer using Structural
Reparameterization",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5762-5772",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153017"}
@inproceedings{bb157146,
AUTHOR = "Tang, C. and Zhang, L.L. and Jiang, H.Q. and Xu, J.H. and Cao, T. and Zhang, Q. and Yang, Y.Q. and Wang, Z. and Yang, M.",
TITLE = "ElasticViT: Conflict-aware Supernet Training for Deploying Fast
Vision Transformer on Diverse Mobile Devices",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5806-5817",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153018"}
@inproceedings{bb157147,
AUTHOR = "Lin, W.F. and Wu, Z.H. and Chen, J.Y. and Huang, J. and Jin, L.W.",
TITLE = "Scale-Aware Modulation Meet Transformer",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5992-6003",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153019"}
@inproceedings{bb157148,
AUTHOR = "He, Y.F. and Lou, Z.Y. and Zhang, L. and Liu, J. and Wu, W.J. and Zhou, H. and Zhuang, B.",
TITLE = "BiViT: Extremely Compressed Binary Vision Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5628-5640",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153020"}
@inproceedings{bb157149,
AUTHOR = "Dutson, M. and Li, Y. and Gupta, M.",
TITLE = "Eventful Transformers:
Leveraging Temporal Redundancy in Vision Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "16865-16877",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153021"}
@inproceedings{bb157150,
AUTHOR = "Wang, Z.Q. and Fang, Y.T. and Cao, J.H. and Zhang, Q. and Wang, Z. and Xu, R.",
TITLE = "Masked Spiking Transformer",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "1761-1771",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153022"}
@inproceedings{bb157151,
AUTHOR = "Peebles, W. and Xie, S.",
TITLE = "Scalable Diffusion Models with Transformers",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "4172-4182",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153023"}
@inproceedings{bb157152,
AUTHOR = "Mentzer, F. and Agustson, E. and Tschannen, M.",
TITLE = "M2T: Masking Transformers Twice for Faster Decoding",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5317-5326",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153024"}
@inproceedings{bb157153,
AUTHOR = "Yu, R.Y. and Wang, Z.N. and Wang, Y.H. and Li, K. and Liu, C. and Duan, H.Y. and Ji, X.Y. and Chen, J.",
TITLE = "LaPE: Layer-adaptive Position Embedding for Vision Transformers with
Independent Layer Normalization",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5863-5873",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153025"}
@inproceedings{bb157154,
AUTHOR = "Roy, A. and Verma, V.K. and Voonna, S. and Ghosh, K. and Ghosh, S. and Das, A.",
TITLE = "Exemplar-Free Continual Transformer with Convolutions",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5874-5884",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153026"}
@inproceedings{bb157155,
AUTHOR = "Xu, Y.X. and Li, C. and Li, D. and Sheng, X. and Jiang, F. and Tian, L. and Sirasao, A.",
TITLE = "FDViT: Improve the Hierarchical Architecture of Vision Transformer",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5927-5937",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153027"}
@inproceedings{bb157156,
AUTHOR = "Chen, Y.J. and Liu, H.M. and Yin, H.R. and Fan, B.",
TITLE = "Building Vision Transformers with Hierarchy Aware Feature Aggregation",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "5885-5895",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153028"}
@inproceedings{bb157157,
AUTHOR = "Quetu, V. and Milovanovic, M. and Tartaglione, E.",
TITLE = "Sparse Double Descent in Vision Transformers: Real or Phantom Threat?",
BOOKTITLE = CIAP23,
YEAR = "2023",
PAGES = "II:490-502",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153029"}
@inproceedings{bb157158,
AUTHOR = "Ak, K.E. and Lee, G.G. and Xu, Y. and Shen, M.W.",
TITLE = "Leveraging Efficient Training and Feature Fusion in Transformers for
Multimodal Classification",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "1420-1424",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153030"}
@inproceedings{bb157159,
AUTHOR = "Sajjadi, M.S.M. and Mahendran, A. and Kipf, T. and Pot, E. and Duckworth, D. and Lucic, M. and Greff, K.",
TITLE = "RUST: Latent Neural Scene Representations from Unposed Imagery",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "17297-17306",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153031"}
@inproceedings{bb157160,
AUTHOR = "Bowman, B. and Achille, A. and Zancato, L. and Trager, M. and Perera, P. and Paolini, G. and Soatto, S.",
TITLE = "À-la-carte Prompt Tuning (APT):
Combining Distinct Data Via Composable Prompting",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "14984-14993",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153032"}
@inproceedings{bb157161,
AUTHOR = "Nakhli, R. and Moghadam, P.A. and Mi, H.Y. and Farahani, H. and Baras, A. and Gilks, B. and Bashashati, A.",
TITLE = "Sparse Multi-Modal Graph Transformer with Shared-Context Processing
for Representation Learning of Giga-pixel Images",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11547-11557",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153033"}
@inproceedings{bb157162,
AUTHOR = "Gartner, E. and Metz, L. and Andriluka, M. and Freeman, C.D. and Sminchisescu, C.",
TITLE = "Transformer-Based Learned Optimization",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11970-11979",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153034"}
@inproceedings{bb157163,
AUTHOR = "Li, J.C. and Hassani, A. and Walton, S. and Shi, H.",
TITLE = "ConvMLP: Hierarchical Convolutional MLPs for Vision",
BOOKTITLE = WFM23,
YEAR = "2023",
PAGES = "6307-6316",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153035"}
@inproceedings{bb157164,
AUTHOR = "Walmer, M. and Suri, S. and Gupta, K. and Shrivastava, A.",
TITLE = "Teaching Matters:
Investigating the Role of Supervision in Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "7486-7496",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153036"}
@inproceedings{bb157165,
AUTHOR = "Wang, S.G. and Xie, T. and Cheng, J. and Zhang, X.C. and Liu, H.J.",
TITLE = "MDL-NAS: A Joint Multi-domain Learning Framework for Vision
Transformer",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "20094-20104",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153037"}
@inproceedings{bb157166,
AUTHOR = "Ren, S. and Wei, F.Y. and Zhang, Z. and Hu, H.",
TITLE = "TinyMIM: An Empirical Study of Distilling MIM Pre-trained Models",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "3687-3697",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153038"}
@inproceedings{bb157167,
AUTHOR = "He, J.F. and Gao, Y. and Zhang, T.Z. and Zhang, Z. and Wu, F.",
TITLE = "D2Former: Jointly Learning Hierarchical Detectors and Contextual
Descriptors via Agent-Based Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "2904-2914",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153039"}
@inproceedings{bb157168,
AUTHOR = "Chen, X.Y. and Liu, Z.J. and Tang, H.T. and Yi, L. and Zhao, H. and Han, S.",
TITLE = "SparseViT: Revisiting Activation Sparsity for Efficient
High-Resolution Vision Transformer",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "2061-2070",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153040"}
@inproceedings{bb157169,
AUTHOR = "Lin, Y.B. and Bertasius, G.",
TITLE = "Siamese Vision Transformers are Scalable Audio-Visual Learners",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIV: 303-321",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153041"}
@inproceedings{bb157170,
AUTHOR = "Lin, Y.B. and Sung, Y.L. and Lei, J. and Bansal, M. and Bertasius, G.",
TITLE = "Vision Transformers are Parameter-Efficient Audio-Visual Learners",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "2299-2309",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153042"}
@inproceedings{bb157171,
AUTHOR = "Das, R. and Dukler, Y. and Ravichandran, A. and Swaminathan, A.",
TITLE = "Learning Expressive Prompting With Residuals for Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "3366-3377",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153043"}
@inproceedings{bb157172,
AUTHOR = "Zheng, M.X. and Lou, Q. and Jiang, L.",
TITLE = "TrojViT: Trojan Insertion in Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "4025-4034",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153044"}
@inproceedings{bb157173,
AUTHOR = "Li, Y.X. and Xu, C.",
TITLE = "Trade-off between Robustness and Accuracy of Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "7558-7568",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153045"}
@inproceedings{bb157174,
AUTHOR = "Tarasiou, M. and Chavez, E. and Zafeiriou, S.",
TITLE = "ViTs for SITS: Vision Transformers for Satellite Image Time Series",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "10418-10428",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153046"}
@inproceedings{bb157175,
AUTHOR = "Yu, Z.Z. and Wu, S. and Fu, Y.G. and Zhang, S. and Lin, Y.Y.C.",
TITLE = "Hint-Aug: Drawing Hints from Foundation Vision Transformers towards
Boosted Few-shot Parameter-Efficient Tuning",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "11102-11112",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153047"}
@inproceedings{bb157176,
AUTHOR = "Hou, J. and Dai, X.L. and He, Z.J. and Dai, A. and Nießner, M.",
TITLE = "Mask3D: Pretraining 2D Vision Transformers by Learning Masked 3D
Priors",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "13510-13519",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153048"}
@inproceedings{bb157177,
AUTHOR = "Xu, Z.Z. and Liu, R.K. and Yang, S. and Chai, Z.H. and Yuan, C.",
TITLE = "Learning Imbalanced Data with Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "15793-15803",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153049"}
@inproceedings{bb157178,
AUTHOR = "Yang, H. and Yin, H.X. and Shen, M. and Molchanov, P. and Li, H. and Kautz, J.",
TITLE = "Global Vision Transformer Pruning with Hessian-Aware Saliency",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "18547-18557",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153050"}
@inproceedings{bb157179,
AUTHOR = "Nakamura, R. and Kataoka, H. and Takashima, S. and Noriega, E.J.M. and Yokota, R. and Inoue, N.",
TITLE = "Pre-training Vision Transformers with Very Limited Synthesized Images",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "20303-20312",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153051"}
@inproceedings{bb157180,
AUTHOR = "Takashima, S. and Hayamizu, R. and Inoue, N. and Kataoka, H. and Yokota, R.",
TITLE = "Visual Atoms: Pre-Training Vision Transformers with Sinusoidal Waves",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "18579-18588",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153052"}
@inproceedings{bb157181,
AUTHOR = "Liu, Y.J. and Yang, H.R. and Dong, Z. and Keutzer, K. and Du, L. and Zhang, S.H.",
TITLE = "NoisyQuant: Noisy Bias-Enhanced Post-Training Activation Quantization
for Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "20321-20330",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153053"}
@inproceedings{bb157182,
AUTHOR = "Park, J. and Johnson, J.",
TITLE = "RGB No More: Minimally-Decoded JPEG Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22334-22346",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153054"}
@inproceedings{bb157183,
AUTHOR = "Yu, C. and Chen, T. and Gan, Z.X. and Fan, J.Y.",
TITLE = "Boost Vision Transformer with GPU-Friendly Sparsity and Quantization",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22658-22668",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153055"}
@inproceedings{bb157184,
AUTHOR = "Bao, F. and Nie, S. and Xue, K.W. and Cao, Y. and Li, C.X. and Su, H. and Zhu, J.",
TITLE = "All are Worth Words: A ViT Backbone for Diffusion Models",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22669-22679",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153056"}
@inproceedings{bb157185,
AUTHOR = "Li, B. and Hu, Y.H. and Nie, X.C. and Han, C.Y. and Jiang, X.J. and Guo, T.D. and Liu, L.Q.",
TITLE = "DropKey for Vision Transformer",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22700-22709",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153057"}
@inproceedings{bb157186,
AUTHOR = "Lan, S.Y. and Yang, X.T. and Yu, Z.D. and Wu, Z.X. and Alvarez, J.M. and Anandkumar, A.",
TITLE = "Vision Transformers are Good Mask Auto-Labelers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "23745-23755",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153058"}
@inproceedings{bb157187,
AUTHOR = "Yu, L. and Xiang, W.",
TITLE = "X-Pruner: eXplainable Pruning for Vision Transformers",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "24355-24363",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153059"}
@inproceedings{bb157188,
AUTHOR = "Hukkelas, H. and Lindseth, F.",
TITLE = "Does Image Anonymization Impact Computer Vision Training?",
BOOKTITLE = WAD23,
YEAR = "2023",
PAGES = "140-150",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153060"}
@inproceedings{bb157189,
AUTHOR = "Marnissi, M.A.",
TITLE = "Revolutionizing Thermal Imaging: GAN-Based Vision Transformers for
Image Enhancement",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "2735-2739",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153061"}
@inproceedings{bb157190,
AUTHOR = "Marnissi, M.A. and Fathallah, A.",
TITLE = "GAN-based Vision Transformer for High-Quality Thermal Image
Enhancement",
BOOKTITLE = GCV23,
YEAR = "2023",
PAGES = "817-825",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153062"}
@inproceedings{bb157191,
AUTHOR = "Scheibenreif, L. and Mommert, M. and Borth, D.",
TITLE = "Masked Vision Transformers for Hyperspectral Image Classification",
BOOKTITLE = EarthVision23,
YEAR = "2023",
PAGES = "2166-2176",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153063"}
@inproceedings{bb157192,
AUTHOR = "Komorowski, P. and Baniecki, H. and Biecek, P.",
TITLE = "Towards Evaluating Explanations of Vision Transformers for Medical
Imaging",
BOOKTITLE = XAI4CV23,
YEAR = "2023",
PAGES = "3726-3732",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153064"}
@inproceedings{bb157193,
AUTHOR = "Le, P.H.C. and Li, X.",
TITLE = "BinaryViT: Pushing Binary Vision Transformers Towards Convolutional
Models",
BOOKTITLE = ECV23,
YEAR = "2023",
PAGES = "4665-4674",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153065"}
@inproceedings{bb157194,
AUTHOR = "Ma, D.N. and Zhao, P.F. and Jiao, X.",
TITLE = "PerfHD: Efficient ViT Architecture Performance Ranking using
Hyperdimensional Computing",
BOOKTITLE = NAS23,
YEAR = "2023",
PAGES = "2230-2237",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153066"}
@inproceedings{bb157195,
AUTHOR = "Wang, J. and Alamayreh, O. and Tondi, B. and Barni, M.",
TITLE = "Open Set Classification of GAN-based Image Manipulations via a
ViT-based Hybrid Architecture",
BOOKTITLE = WMF23,
YEAR = "2023",
PAGES = "953-962",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153067"}
@inproceedings{bb157196,
AUTHOR = "Tian, R. and Wu, Z.X. and Dai, Q. and Hu, H. and Qiao, Y. and Jiang, Y.G.",
TITLE = "ResFormer: Scaling ViTs with Multi-Resolution Training",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22721-22731",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153068"}
@inproceedings{bb157197,
AUTHOR = "Guo, X.D. and Sun, Y. and Zhao, R. and Kuang, L.Q. and Han, X.",
TITLE = "SWPT: Spherical Window-based Point Cloud Transformer",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "I:396-412",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153069"}
@inproceedings{bb157198,
AUTHOR = "Wang, W.J. and Chen, G. and Zhou, H.R. and Wang, X.L.",
TITLE = "OVPT: Optimal Viewset Pooling Transformer for 3d Object Recognition",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "I:486-503",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153070"}
@inproceedings{bb157199,
AUTHOR = "Kim, D. and Kim, J.",
TITLE = "Vision Transformer Compression and Architecture Exploration with
Efficient Embedding Space Search",
BOOKTITLE = ACCV22,
YEAR = "2022",
PAGES = "III:524-540",
BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT153071"}
Last update:Feb 26, 2026 at 10:58:24