@inproceedings{bb140000,
        AUTHOR = "Zhang, C.Z. and Zhang, M.Y. and Zhang, S.H. and Jin, D.S. and Zhou, Q. and Cai, Z.A. and Zhao, H. and Liu, X.L. and Liu, Z.W.",
        TITLE = "Delving Deep into the Generalization of Vision Transformers under
Distribution Shifts",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7267-7276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136005"}

@inproceedings{bb140001,
        AUTHOR = "Hou, Z. and Yu, B. and Tao, D.C.",
        TITLE = "BatchFormer: Learning to Explore Sample Relationships for Robust
Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "7246-7256",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136006"}

@inproceedings{bb140002,
        AUTHOR = "Zamir, S.W. and Arora, A. and Khan, S. and Hayat, M. and Khan, F.S. and Yang, M.H.",
        TITLE = "Restormer: Efficient Transformer for High-Resolution Image
Restoration",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "5718-5729",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136007"}

@inproceedings{bb140003,
        AUTHOR = "Zhao, H.S. and Jiang, L. and Jia, J.Y. and Torr, P.H.S. and Koltun, V.",
        TITLE = "Point Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "16239-16248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136008"}

@inproceedings{bb140004,
        AUTHOR = "Lin, K. and Wang, L.J. and Liu, Z.C.",
        TITLE = "Mesh Graphormer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12919-12928",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136009"}

@inproceedings{bb140005,
        AUTHOR = "Casey, E. and Perez, V. and Li, Z.",
        TITLE = "The Animation Transformer: Visual Correspondence via Segment Matching",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11303-11312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136010"}

@inproceedings{bb140006,
        AUTHOR = "Reizenstein, J. and Shapovalov, R. and Henzler, P. and Sbordone, L. and Labatut, P. and Novotny, D.",
        TITLE = "Common Objects in 3D: Large-Scale Learning and Evaluation of
Real-life 3D Category Reconstruction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10881-10891",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136011"}

@inproceedings{bb140007,
        AUTHOR = "Feng, W.X. and Wang, Y.J. and Ma, L.H. and Yuan, Y. and Zhang, C.",
        TITLE = "Temporal Knowledge Consistency for Unsupervised Visual Representation
Learning",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10150-10160",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136012"}

@inproceedings{bb140008,
        AUTHOR = "Wu, H.P. and Xiao, B. and Codella, N. and Liu, M.C. and Dai, X.Y. and Yuan, L. and Zhang, L.",
        TITLE = "CvT: Introducing Convolutions to Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "22-31",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136013"}

@inproceedings{bb140009,
        AUTHOR = "Touvron, H. and Cord, M. and Sablayrolles, A. and Synnaeve, G. and Jegou, H.",
        TITLE = "Going deeper with Image Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "32-42",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136014"}

@inproceedings{bb140010,
        AUTHOR = "Zhao, J.W. and Yan, K. and Zhao, Y.F. and Guo, X.W. and Huang, F.Y. and Li, J.",
        TITLE = "Transformer-based Dual Relation Graph for Multi-label Image
Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "163-172",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136015"}

@inproceedings{bb140011,
        AUTHOR = "Pan, Z.Z. and Zhuang, B. and Liu, J. and He, H.Y. and Cai, J.F.",
        TITLE = "Scalable Vision Transformers with Hierarchical Pooling",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "367-376",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136016"}

@inproceedings{bb140012,
        AUTHOR = "Yuan, L. and Chen, Y.P. and Wang, T. and Yu, W.H. and Shi, Y.J. and Jiang, Z.H. and Tay, F.E.H. and Feng, J.S. and Yan, S.C.",
        TITLE = "Tokens-to-Token ViT:
Training Vision Transformers from Scratch on ImageNet",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "538-547",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136017"}

@inproceedings{bb140013,
        AUTHOR = "Wu, B. and Xu, C.F. and Dai, X.L. and Wan, A. and Zhang, P.Z. and Yan, Z.C. and Tomizuka, M. and Gonzalez, J. and Keutzer, K. and Vajda, P.",
        TITLE = "Visual Transformers: Where Do Transformers Really Belong in Vision
Models?",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "579-589",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136018"}

@inproceedings{bb140014,
        AUTHOR = "Hu, R.H. and Singh, A.",
        TITLE = "UniT: Multimodal Multitask Learning with a Unified Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1419-1429",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136019"}

@inproceedings{bb140015,
        AUTHOR = "Qiu, Y. and Yamamoto, S. and Nakashima, K. and Suzuki, R. and Iwata, K. and Kataoka, H. and Satoh, Y.",
        TITLE = "Describing and Localizing Multiple Changes with Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "1951-1960",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136020"}

@inproceedings{bb140016,
        AUTHOR = "Song, M. and Choi, J. and Han, B.H.",
        TITLE = "Variable-Rate Deep Image Compression through Spatially-Adaptive
Feature Transform",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2360-2369",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136021"}

@inproceedings{bb140017,
        AUTHOR = "Shenga, H. and Cai, S. and Liu, Y. and Deng, B. and Huang, J.Q. and Hua, X.S. and Zhao, M.J.",
        TITLE = "Improving 3D Object Detection with Channel-wise Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2723-2732",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136022"}

@inproceedings{bb140018,
        AUTHOR = "Zhang, P.C. and Dai, X. and Yang, J.W. and Xiao, B. and Yuan, L. and Zhang, L. and Gao, J.F.",
        TITLE = "Multi-Scale Vision Longformer: A New Vision Transformer for
High-Resolution Image Encoding",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "2978-2988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136023"}

@inproceedings{bb140019,
        AUTHOR = "Dong, Q. and Tu, Z.W. and Liao, H. and Zhang, Y.T. and Mahadevan, V. and Soatto, S.",
        TITLE = "Visual Relationship Detection Using Part-and-Sum Transformers with
Composite Queries",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "3530-3539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136024"}

@inproceedings{bb140020,
        AUTHOR = "Fan, H.Q. and Xiong, B. and Mangalam, K. and Li, Y. and Yan, Z.C. and Malik, J. and Feichtenhofer, C.",
        TITLE = "Multiscale Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "6804-6815",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136025"}

@inproceedings{bb140021,
        AUTHOR = "Mahmood, K. and Mahmood, R. and van Dijk, M.",
        TITLE = "On the Robustness of Vision Transformers to Adversarial Examples",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "7818-7827",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136026"}

@inproceedings{bb140022,
        AUTHOR = "Chen, X.L. and Xie, S. and He, K.",
        TITLE = "An Empirical Study of Training Self-Supervised Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9620-9629",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136027"}

@inproceedings{bb140023,
        AUTHOR = "Caron, M. and Touvron, H. and Misra, I. and Jegou, H. and Mairal, J. and Bojanowski, P. and Joulin, A.",
        TITLE = "Emerging Properties in Self-Supervised Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9630-9640",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136028"}

@inproceedings{bb140024,
        AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.",
        TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent
Forecasting",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9793-9803",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136029"}

@inproceedings{bb140025,
        AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.",
        TITLE = "Rethinking and Improving Relative Position Encoding for Vision
Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10013-10021",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136030"}

@inproceedings{bb140026,
        AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.",
        TITLE = "Understanding Robustness of Transformers for Image Classification",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10211-10221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136031"}

@inproceedings{bb140027,
        AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.",
        TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "10428-10437",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136032"}

@inproceedings{bb140028,
        AUTHOR = "Heo, B. and Yun, S. and Han, D.Y. and Chun, S. and Choe, J. and Oh, S.J.",
        TITLE = "Rethinking Spatial Dimensions of Vision Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11916-11925",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136033"}

@inproceedings{bb140029,
        AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.",
        TITLE = "Stochastic Transformer Networks with Linear Competing Units:
Application to end-to-end SL Translation",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11926-11935",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136034"}

@inproceedings{bb140030,
        AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.",
        TITLE = "Vision Transformers for Dense Prediction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12159-12168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136035"}

@inproceedings{bb140031,
        AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.",
        TITLE = "AutoFormer: Searching Transformers for Visual Recognition",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12250-12260",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136036"}

@inproceedings{bb140032,
        AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.",
        TITLE = "Incorporating Convolution Designs into Visual Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "559-568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136037"}

@inproceedings{bb140033,
        AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L. and Tian, Q.",
        TITLE = "Visformer: The Vision-friendly Transformer",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "569-578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136038"}

@inproceedings{bb140034,
        AUTHOR = "Wang, W. and Xie, E. and Li, X. and Fan, D.P. and Song, K. and Liang, D. and Lu, T. and Luo, P. and Shao, L.",
        TITLE = "Pyramid Vision Transformer:
A Versatile Backbone for Dense Prediction without Convolutions",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "548-558",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136039"}

@inproceedings{bb140035,
        AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.",
        TITLE = "Leveraging Batch Normalization for Vision Transformers",
        BOOKTITLE = NeruArch21,
        YEAR = "2021",
        PAGES = "413-422",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136040"}

@inproceedings{bb140036,
        AUTHOR = "Zhang, Z.X. and Lu, X.Q. and Cao, G.J. and Yang, Y.T. and Jiao, L.C. and Liu, F.",
        TITLE = "ViT-YOLO: Transformer-Based YOLO for Object Detection",
        BOOKTITLE = VisDrone21,
        YEAR = "2021",
        PAGES = "2799-2808",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136041"}

@inproceedings{bb140037,
        AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.",
        TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster
Inference",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "12239-12249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136042"}

@inproceedings{bb140038,
        AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer",
        BOOKTITLE = WMF21,
        YEAR = "2021",
        PAGES = "1032-1041",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136043"}

@inproceedings{bb140039,
        AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.",
        TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks",
        BOOKTITLE = WMF20,
        YEAR = "2020",
        PAGES = "2832-2840",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136044"}

@inproceedings{bb140040,
        AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.",
        TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task
Visual Representations",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "1431-1440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136045"}

@article{bb140041,
        AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.",
        TITLE = "Inheritance Attention Matrix-Based Universal Adversarial
Perturbations on Vision Transformers",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1923-1927",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136046"}

@article{bb140042,
        AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A. and Zhang, P.Q.",
        TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data
Classification",
        JOURNAL = IP,
        VOLUME = "31",
        YEAR = "2022",
        PAGES = "3095-3110",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136047"}

@article{bb140043,
        AUTHOR = "Heo, J. and Wang, Y. and Park, J.",
        TITLE = "Occlusion-aware spatial attention transformer for occluded object
recognition",
        JOURNAL = PRL,
        VOLUME = "159",
        YEAR = "2022",
        PAGES = "70-76",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136048"}

@article{bb140044,
        AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.",
        TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained
visual categorization",
        JOURNAL = PR,
        VOLUME = "135",
        YEAR = "2023",
        PAGES = "109131",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136049"}

@article{bb140045,
        AUTHOR = "Lin, X. and Sun, S.Z. and Huang, W. and Sheng, B. and Li, P. and Feng, D.D.",
        TITLE = "EAPT: Efficient Attention Pyramid Transformer for Image Processing",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "50-61",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136050"}

@article{bb140046,
        AUTHOR = "Yu, L. and Xiang, W. and Fang, J. and Chen, Y.P.P. and Chi, L.",
        TITLE = "eX-ViT: A Novel explainable vision transformer for weakly supervised
semantic segmentation",
        JOURNAL = PR,
        VOLUME = "142",
        YEAR = "2023",
        PAGES = "109666",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136051"}

@article{bb140047,
        AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.",
        TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention
and Progressive Shift",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "11120-11135",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136052"}

@article{bb140048,
        AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G. and Liu, Y. and Li, H.S. and Qiao, Y.",
        TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual
Recognition",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "12581-12600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136053"}

@article{bb140049,
        AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L. and Song, M.L.",
        TITLE = "Constituent Attention for Vision Transformers",
        JOURNAL = CVIU,
        VOLUME = "237",
        YEAR = "2023",
        PAGES = "103838",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136054"}

@article{bb140050,
        AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.",
        TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for
Hyperspectral Image Classification",
        JOURNAL = RS,
        VOLUME = "16",
        YEAR = "2024",
        NUMBER = "2",
        PAGES = "288",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136055"}

@inproceedings{bb140051,
        AUTHOR = "Cai, H. and Li, J. and Hu, M. and Gan, C. and Han, S.",
        TITLE = "EfficientViT: Lightweight Multi-Scale Attention for High-Resolution
Dense Prediction",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17256-17267",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136056"}

@inproceedings{bb140052,
        AUTHOR = "Ryu, J. and Han, D.Y. and Lim, J.W.",
        TITLE = "Gramian Attention Heads are Strong yet Efficient Vision Learners",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5818-5828",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136057"}

@inproceedings{bb140053,
        AUTHOR = "Xu, R.H. and Zhang, H. and Hu, W.Z. and Zhang, S.L. and Wang, X.Y.",
        TITLE = "ParCNetV2: Oversized Kernel with Enhanced Attention*",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5729-5739",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136058"}

@inproceedings{bb140054,
        AUTHOR = "Zhao, B.Y. and Yu, Z. and Lan, S.Y. and Cheng, Y. and Anandkumar, A. and Lao, Y.J. and Alvarez, J.M.",
        TITLE = "Fully Attentional Networks with Self-emerging Token Labeling",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "5562-5572",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136059"}

@inproceedings{bb140055,
        AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.",
        TITLE = "Robustifying Token Attention for Vision Transformers",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "17511-17522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136060"}

@inproceedings{bb140056,
        AUTHOR = "Zhao, Y. and Tang, H.D. and Jiang, Y.Y. and A, Y. and Wu, Q. and Wang, J.",
        TITLE = "Parameter-Efficient Vision Transformer with Linear Attention",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1275-1279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136061"}

@inproceedings{bb140057,
        AUTHOR = "Shi, L. and Huang, H.D. and Song, B. and Tan, M. and Zhao, W.Z. and Xia, T. and Ren, P.J.",
        TITLE = "TAQ: Top-K Attention-Aware Quantization for Vision Transformers",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "1750-1754",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136062"}

@inproceedings{bb140058,
        AUTHOR = "Baili, N. and Frigui, H.",
        TITLE = "ADA-VIT: Attention-Guided Data Augmentation for Vision Transformers",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "385-389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136063"}

@inproceedings{bb140059,
        AUTHOR = "Ding, M.Y. and Shen, Y. and Fan, L.J. and Chen, Z.F. and Chen, Z. and Luo, P. and Tenenbaum, J. and Gan, C.",
        TITLE = "Visual Dependency Transformers:
Dependency Tree Emerges from Reversed Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14528-14539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136064"}

@inproceedings{bb140060,
        AUTHOR = "Song, J.C. and Mou, C. and Wang, S.Q. and Ma, S.W. and Zhang, J.",
        TITLE = "Optimization-Inspired Cross-Attention Transformer for Compressive
Sensing",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6174-6184",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136065"}

@inproceedings{bb140061,
        AUTHOR = "Hassani, A. and Walton, S. and Li, J.C. and Li, S. and Shi, H.",
        TITLE = "Neighborhood Attention Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6185-6194",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136066"}

@inproceedings{bb140062,
        AUTHOR = "Liu, Z.J. and Yang, X.Y. and Tang, H.T. and Yang, S. and Han, S.",
        TITLE = "FlatFormer: Flattened Window Attention for Efficient Point Cloud
Transformer",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "1200-1211",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136067"}

@inproceedings{bb140063,
        AUTHOR = "Pan, X. and Ye, T.Z. and Xia, Z.F. and Song, S. and Huang, G.",
        TITLE = "Slide-Transformer: Hierarchical Vision Transformer with Local
Self-Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2082-2091",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136068"}

@inproceedings{bb140064,
        AUTHOR = "Zhu, L. and Wang, X.J. and Ke, Z.H. and Zhang, W. and Lau, R.",
        TITLE = "BiFormer: Vision Transformer with Bi-Level Routing Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10323-10333",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136069"}

@inproceedings{bb140065,
        AUTHOR = "Long, S. and Zhao, Z. and Pi, J. and Wang, S.S. and Wang, J.D.",
        TITLE = "Beyond Attentive Tokens: Incorporating Token Importance and Diversity
for Efficient Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10334-10343",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136070"}

@inproceedings{bb140066,
        AUTHOR = "Liu, X.Y. and Peng, H. and Zheng, N.X. and Yang, Y.Q. and Hu, H. and Yuan, Y.X.",
        TITLE = "EfficientViT: Memory Efficient Vision Transformer with Cascaded Group
Attention",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14420-14430",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136071"}

@inproceedings{bb140067,
        AUTHOR = "You, H.R. and Xiong, Y. and Dai, X.L. and Wu, B. and Zhang, P.Z. and Fan, H.Q. and Vajda, P. and Lin, Y.Y.C.",
        TITLE = "Castling-ViT: Compressing Self-Attention via Switching Towards
Linear-Angular Attention at Vision Transformer Inference",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14431-14442",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136072"}

@inproceedings{bb140068,
        AUTHOR = "Grainger, R. and Paniagua, T. and Song, X. and Cuntoor, N. and Lee, M.W. and Wu, T.F.",
        TITLE = "PaCa-ViT: Learning Patch-to-Cluster Attention in Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18568-18578",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136073"}

@inproceedings{bb140069,
        AUTHOR = "Wei, C. and Duke, B. and Jiang, R. and Aarabi, P. and Taylor, G.W. and Shkurti, F.",
        TITLE = "Sparsifiner: Learning Sparse Instance-Dependent Attention for
Efficient Vision Transformers",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22680-22689",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136074"}

@inproceedings{bb140070,
        AUTHOR = "Bhattacharyya, M. and Chattopadhyay, S. and Nag, S.",
        TITLE = "DeCAtt: Efficient Vision Transformers with Decorrelated Attention
Heads",
        BOOKTITLE = ECV23,
        YEAR = "2023",
        PAGES = "4695-4699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136075"}

@inproceedings{bb140071,
        AUTHOR = "Tatsunami, Y. and Taki, M.",
        TITLE = "RaftMLP: How Much Can Be Done Without Attention and with Less Spatial
Locality?",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "VI:459-475",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136076"}

@inproceedings{bb140072,
        AUTHOR = "Bolya, D. and Fu, C.Y. and Dai, X.L. and Zhang, P.Z. and Hoffman, J.",
        TITLE = "Hydra Attention: Efficient Attention with Many Heads",
        BOOKTITLE = CADK22,
        YEAR = "2022",
        PAGES = "35-49",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136077"}

@inproceedings{bb140073,
        AUTHOR = "Chen, X.Y. and Hu, Q. and Li, K. and Zhong, C. and Wang, G.H.",
        TITLE = "Accumulated Trivial Attention Matters in Vision Transformers on Small
Datasets",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "3973-3981",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136078"}

@inproceedings{bb140074,
        AUTHOR = "Lan, H. and Wang, X. and Shen, H. and Liang, P. and Wei, X.",
        TITLE = "Couplformer: Rethinking Vision Transformer with Coupling Attention",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "6464-6473",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136079"}

@inproceedings{bb140075,
        AUTHOR = "Debnath, B. and Po, O. and Chowdhury, F.A. and Chakradhar, S.",
        TITLE = "Cosine Similarity based Few-Shot Video Classifier with
Attention-based Aggregation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "1273-1279",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136080"}

@inproceedings{bb140076,
        AUTHOR = "Mari, C.R. and Gonzalez, D.V. and Bou Balust, E.",
        TITLE = "Multi-Scale Transformer-Based Feature Combination for Image Retrieval",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3166-3170",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136081"}

@inproceedings{bb140077,
        AUTHOR = "Furukawa, R. and Hotta, K.",
        TITLE = "Local Embedding for Axial Attention",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2586-2590",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136082"}

@inproceedings{bb140078,
        AUTHOR = "Kakogeorgiou, I. and Gidaris, S. and Psomas, B. and Avrithis, Y. and Bursuc, A. and Karantzalos, K. and Komodakis, N.",
        TITLE = "What to Hide from Your Students: Attention-Guided Masked Image Modeling",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXX:300-318",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136083"}

@inproceedings{bb140079,
        AUTHOR = "Ding, M.Y. and Xiao, B. and Codella, N. and Luo, P. and Wang, J.D. and Yuan, L.",
        TITLE = "DaViT: Dual Attention Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIV:74-92",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136084"}

@inproceedings{bb140080,
        AUTHOR = "Wang, P.C. and Wang, X. and Wang, F. and Lin, M. and Chang, S.N. and Li, H. and Jin, R.",
        TITLE = "KVT: k-NN Attention for Boosting Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXIV:285-302",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136085"}

@inproceedings{bb140081,
        AUTHOR = "Rao, Y.M. and Zhao, W.L. and Zhou, J. and Lu, J.W.",
        TITLE = "AMixer:
Adaptive Weight Mixing for Self-Attention Free Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXI:50-67",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136086"}

@inproceedings{bb140082,
        AUTHOR = "Li, A. and Jiao, J. and Li, N. and Qi, W. and Xu, W. and Pang, M.",
        TITLE = "Conmw Transformer: A General Vision Transformer Backbone With
Merged-Window Attention",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1551-1555",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136087"}

@inproceedings{bb140083,
        AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.",
        TITLE = "VSA: Learning Varied-Size Window Attention in Vision Transformers",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXV:466-483",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136088"}

@inproceedings{bb140084,
        AUTHOR = "Mallick, R. and Benois Pineau, J. and Zemmari, A.",
        TITLE = "I Saw: A Self-Attention Weighted Method for Explanation of Visual
Transformers",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "3271-3275",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136089"}

@inproceedings{bb140085,
        AUTHOR = "Song, Z.K. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.",
        TITLE = "Transformer Tracking with Cyclic Shifting Window Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8781-8790",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136090"}

@inproceedings{bb140086,
        AUTHOR = "Yang, C.L. and Wang, Y.L. and Zhang, J.M. and Zhang, H. and Wei, Z.J. and Lin, Z. and Yuille, A.L.",
        TITLE = "Lite Vision Transformer with Enhanced Self-Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "11988-11998",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136091"}

@inproceedings{bb140087,
        AUTHOR = "Xia, Z.F. and Pan, X. and Song, S. and Li, L.E. and Huang, G.",
        TITLE = "Vision Transformer with Deformable Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "4784-4793",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136092"}

@inproceedings{bb140088,
        AUTHOR = "Yu, T. and Khalitov, R. and Cheng, L. and Yang, Z.R.",
        TITLE = "Paramixer: Parameterizing Mixing Links in Sparse Factors Works Better
than Dot-Product Self-Attention",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "681-690",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136093"}

@inproceedings{bb140089,
        AUTHOR = "Cheng, B. and Misra, I. and Schwing, A.G. and Kirillov, A. and Girdhar, R.",
        TITLE = "Masked-attention Mask Transformer for Universal Image Segmentation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "1280-1289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136094"}

@inproceedings{bb140090,
        AUTHOR = "Rangrej, S.B. and Srinidhi, C.L. and Clark, J.J.",
        TITLE = "Consistency driven Sequential Transformers Attention Model for
Partially Observable Scenes",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "2508-2517",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136095"}

@inproceedings{bb140091,
        AUTHOR = "Chen, C.F.R. and Fan, Q.F. and Panda, R.",
        TITLE = "CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image
Classification",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "347-356",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136096"}

@inproceedings{bb140092,
        AUTHOR = "Chefer, H. and Gur, S. and Wolf, L.B.",
        TITLE = "Generic Attention-model Explainability for Interpreting Bi-Modal and
Encoder-Decoder Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "387-396",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136097"}

@inproceedings{bb140093,
        AUTHOR = "Xu, W.J. and Xu, Y.F. and Chang, T. and Tu, Z.W.",
        TITLE = "Co-Scale Conv-Attentional Image Transformers",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "9961-9970",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136098"}

@inproceedings{bb140094,
        AUTHOR = "Yang, G.L. and Tang, H. and Ding, M.L. and Sebe, N. and Ricci, E.",
        TITLE = "Transformer-Based Attention Networks for Continuous Pixel-Wise
Prediction",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "16249-16259",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136099"}

@inproceedings{bb140095,
        AUTHOR = "Kim, K. and Wu, B.C. and Dai, X.L. and Zhang, P.Z. and Yan, Z.C. and Vajda, P. and Kim, S.",
        TITLE = "Rethinking the Self-Attention in Vision Transformers",
        BOOKTITLE = ECV21,
        YEAR = "2021",
        PAGES = "3065-3069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136100"}

@article{bb140096,
        AUTHOR = "Selva, J. and Johansen, A.S. and Escalera, S. and Nasrollahi, K. and Moeslund, T.B. and Clapes, A.",
        TITLE = "Video Transformers: A Survey",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "12922-12943",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136101"}

@article{bb140097,
        AUTHOR = "Zhang, Z.C. and Chen, Z.D. and Wang, Y.X. and Luo, X. and Xu, X.S.",
        TITLE = "A vision transformer for fine-grained classification by reducing
noise and enhancing discriminative information",
        JOURNAL = PR,
        VOLUME = "145",
        YEAR = "2024",
        PAGES = "109979",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136102"}

@article{bb140098,
        AUTHOR = "Xian, K. and Peng, J. and Cao, Z.G. and Zhang, J.M. and Lin, G.S.",
        TITLE = "ViTA: Video Transformer Adaptor for Robust Video Depth Estimation",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "3302-3316",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136103"}

@inproceedings{bb140099,
        AUTHOR = "Piergiovanni, A. and Kuo, W.C. and Angelova, A.",
        TITLE = "Rethinking Video ViTs: Sparse Video Tubes for Joint Image and Video
Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2214-2224",
        BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136104"}

Last update:Mar 25, 2024 at 16:07:51