@inproceedings{bb140000, AUTHOR = "Zhang, C.Z. and Zhang, M.Y. and Zhang, S.H. and Jin, D.S. and Zhou, Q. and Cai, Z.A. and Zhao, H. and Liu, X.L. and Liu, Z.W.", TITLE = "Delving Deep into the Generalization of Vision Transformers under Distribution Shifts", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "7267-7276", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136005"} @inproceedings{bb140001, AUTHOR = "Hou, Z. and Yu, B. and Tao, D.C.", TITLE = "BatchFormer: Learning to Explore Sample Relationships for Robust Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "7246-7256", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136006"} @inproceedings{bb140002, AUTHOR = "Zamir, S.W. and Arora, A. and Khan, S. and Hayat, M. and Khan, F.S. and Yang, M.H.", TITLE = "Restormer: Efficient Transformer for High-Resolution Image Restoration", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "5718-5729", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136007"} @inproceedings{bb140003, AUTHOR = "Zhao, H.S. and Jiang, L. and Jia, J.Y. and Torr, P.H.S. and Koltun, V.", TITLE = "Point Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "16239-16248", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136008"} @inproceedings{bb140004, AUTHOR = "Lin, K. and Wang, L.J. and Liu, Z.C.", TITLE = "Mesh Graphormer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12919-12928", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136009"} @inproceedings{bb140005, AUTHOR = "Casey, E. and Perez, V. and Li, Z.", TITLE = "The Animation Transformer: Visual Correspondence via Segment Matching", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11303-11312", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136010"} @inproceedings{bb140006, AUTHOR = "Reizenstein, J. and Shapovalov, R. and Henzler, P. and Sbordone, L. and Labatut, P. and Novotny, D.", TITLE = "Common Objects in 3D: Large-Scale Learning and Evaluation of Real-life 3D Category Reconstruction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10881-10891", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136011"} @inproceedings{bb140007, AUTHOR = "Feng, W.X. and Wang, Y.J. and Ma, L.H. and Yuan, Y. and Zhang, C.", TITLE = "Temporal Knowledge Consistency for Unsupervised Visual Representation Learning", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10150-10160", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136012"} @inproceedings{bb140008, AUTHOR = "Wu, H.P. and Xiao, B. and Codella, N. and Liu, M.C. and Dai, X.Y. and Yuan, L. and Zhang, L.", TITLE = "CvT: Introducing Convolutions to Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "22-31", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136013"} @inproceedings{bb140009, AUTHOR = "Touvron, H. and Cord, M. and Sablayrolles, A. and Synnaeve, G. and Jegou, H.", TITLE = "Going deeper with Image Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "32-42", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136014"} @inproceedings{bb140010, AUTHOR = "Zhao, J.W. and Yan, K. and Zhao, Y.F. and Guo, X.W. and Huang, F.Y. and Li, J.", TITLE = "Transformer-based Dual Relation Graph for Multi-label Image Recognition", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "163-172", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136015"} @inproceedings{bb140011, AUTHOR = "Pan, Z.Z. and Zhuang, B. and Liu, J. and He, H.Y. and Cai, J.F.", TITLE = "Scalable Vision Transformers with Hierarchical Pooling", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "367-376", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136016"} @inproceedings{bb140012, AUTHOR = "Yuan, L. and Chen, Y.P. and Wang, T. and Yu, W.H. and Shi, Y.J. and Jiang, Z.H. and Tay, F.E.H. and Feng, J.S. and Yan, S.C.", TITLE = "Tokens-to-Token ViT: Training Vision Transformers from Scratch on ImageNet", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "538-547", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136017"} @inproceedings{bb140013, AUTHOR = "Wu, B. and Xu, C.F. and Dai, X.L. and Wan, A. and Zhang, P.Z. and Yan, Z.C. and Tomizuka, M. and Gonzalez, J. and Keutzer, K. and Vajda, P.", TITLE = "Visual Transformers: Where Do Transformers Really Belong in Vision Models?", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "579-589", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136018"} @inproceedings{bb140014, AUTHOR = "Hu, R.H. and Singh, A.", TITLE = "UniT: Multimodal Multitask Learning with a Unified Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1419-1429", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136019"} @inproceedings{bb140015, AUTHOR = "Qiu, Y. and Yamamoto, S. and Nakashima, K. and Suzuki, R. and Iwata, K. and Kataoka, H. and Satoh, Y.", TITLE = "Describing and Localizing Multiple Changes with Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "1951-1960", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136020"} @inproceedings{bb140016, AUTHOR = "Song, M. and Choi, J. and Han, B.H.", TITLE = "Variable-Rate Deep Image Compression through Spatially-Adaptive Feature Transform", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2360-2369", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136021"} @inproceedings{bb140017, AUTHOR = "Shenga, H. and Cai, S. and Liu, Y. and Deng, B. and Huang, J.Q. and Hua, X.S. and Zhao, M.J.", TITLE = "Improving 3D Object Detection with Channel-wise Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2723-2732", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136022"} @inproceedings{bb140018, AUTHOR = "Zhang, P.C. and Dai, X. and Yang, J.W. and Xiao, B. and Yuan, L. and Zhang, L. and Gao, J.F.", TITLE = "Multi-Scale Vision Longformer: A New Vision Transformer for High-Resolution Image Encoding", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "2978-2988", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136023"} @inproceedings{bb140019, AUTHOR = "Dong, Q. and Tu, Z.W. and Liao, H. and Zhang, Y.T. and Mahadevan, V. and Soatto, S.", TITLE = "Visual Relationship Detection Using Part-and-Sum Transformers with Composite Queries", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "3530-3539", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136024"} @inproceedings{bb140020, AUTHOR = "Fan, H.Q. and Xiong, B. and Mangalam, K. and Li, Y. and Yan, Z.C. and Malik, J. and Feichtenhofer, C.", TITLE = "Multiscale Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "6804-6815", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136025"} @inproceedings{bb140021, AUTHOR = "Mahmood, K. and Mahmood, R. and van Dijk, M.", TITLE = "On the Robustness of Vision Transformers to Adversarial Examples", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "7818-7827", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136026"} @inproceedings{bb140022, AUTHOR = "Chen, X.L. and Xie, S. and He, K.", TITLE = "An Empirical Study of Training Self-Supervised Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9620-9629", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136027"} @inproceedings{bb140023, AUTHOR = "Caron, M. and Touvron, H. and Misra, I. and Jegou, H. and Mairal, J. and Bojanowski, P. and Joulin, A.", TITLE = "Emerging Properties in Self-Supervised Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9630-9640", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136028"} @inproceedings{bb140024, AUTHOR = "Yuan, Y. and Weng, X. and Ou, Y. and Kitani, K.", TITLE = "AgentFormer: Agent-Aware Transformers for Socio-Temporal Multi-Agent Forecasting", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9793-9803", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136029"} @inproceedings{bb140025, AUTHOR = "Wu, K. and Peng, H.W. and Chen, M.H. and Fu, J.L. and Chao, H.Y.", TITLE = "Rethinking and Improving Relative Position Encoding for Vision Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10013-10021", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136030"} @inproceedings{bb140026, AUTHOR = "Bhojanapalli, S. and Chakrabarti, A. and Glasner, D. and Li, D. and Unterthiner, T. and Veit, A.", TITLE = "Understanding Robustness of Transformers for Image Classification", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10211-10221", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136031"} @inproceedings{bb140027, AUTHOR = "Yan, B. and Peng, H. and Fu, J.L. and Wang, D. and Lu, H.C.", TITLE = "Learning Spatio-Temporal Transformer for Visual Tracking", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "10428-10437", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136032"} @inproceedings{bb140028, AUTHOR = "Heo, B. and Yun, S. and Han, D.Y. and Chun, S. and Choe, J. and Oh, S.J.", TITLE = "Rethinking Spatial Dimensions of Vision Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11916-11925", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136033"} @inproceedings{bb140029, AUTHOR = "Voskou, A. and Panousis, K.P. and Kosmopoulos, D. and Metaxas, D.N. and Chatzis, S.", TITLE = "Stochastic Transformer Networks with Linear Competing Units: Application to end-to-end SL Translation", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "11926-11935", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136034"} @inproceedings{bb140030, AUTHOR = "Ranftl, R. and Bochkovskiy, A. and Koltun, V.", TITLE = "Vision Transformers for Dense Prediction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12159-12168", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136035"} @inproceedings{bb140031, AUTHOR = "Chen, M.H. and Peng, H.W. and Fu, J.L. and Ling, H.B.", TITLE = "AutoFormer: Searching Transformers for Visual Recognition", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12250-12260", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136036"} @inproceedings{bb140032, AUTHOR = "Yuan, K. and Guo, S.P. and Liu, Z.W. and Zhou, A. and Yu, F.W. and Wu, W.", TITLE = "Incorporating Convolution Designs into Visual Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "559-568", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136037"} @inproceedings{bb140033, AUTHOR = "Chen, Z. and Xie, L.X. and Niu, J.W. and Liu, X.F. and Wei, L. and Tian, Q.", TITLE = "Visformer: The Vision-friendly Transformer", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "569-578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136038"} @inproceedings{bb140034, AUTHOR = "Wang, W. and Xie, E. and Li, X. and Fan, D.P. and Song, K. and Liang, D. and Lu, T. and Luo, P. and Shao, L.", TITLE = "Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "548-558", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136039"} @inproceedings{bb140035, AUTHOR = "Yao, Z.L. and Cao, Y. and Lin, Y.T. and Liu, Z. and Zhang, Z. and Hu, H.", TITLE = "Leveraging Batch Normalization for Vision Transformers", BOOKTITLE = NeruArch21, YEAR = "2021", PAGES = "413-422", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136040"} @inproceedings{bb140036, AUTHOR = "Zhang, Z.X. and Lu, X.Q. and Cao, G.J. and Yang, Y.T. and Jiao, L.C. and Liu, F.", TITLE = "ViT-YOLO: Transformer-Based YOLO for Object Detection", BOOKTITLE = VisDrone21, YEAR = "2021", PAGES = "2799-2808", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136041"} @inproceedings{bb140037, AUTHOR = "Graham, B. and El Nouby, A. and Touvron, H. and Stock, P. and Joulin, A. and Jegou, H. and Douze, M.", TITLE = "LeViT: a Vision Transformer in ConvNet's Clothing for Faster Inference", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "12239-12249", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136042"} @inproceedings{bb140038, AUTHOR = "Horvath, J. and Baireddy, S. and Hao, H.X. and Montserrat, D.M. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Vision Transformer", BOOKTITLE = WMF21, YEAR = "2021", PAGES = "1032-1041", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136043"} @inproceedings{bb140039, AUTHOR = "Horvath, J. and Montserrat, D.M. and Hao, H.X. and Delp, E.J.", TITLE = "Manipulation Detection in Satellite Images Using Deep Belief Networks", BOOKTITLE = WMF20, YEAR = "2020", PAGES = "2832-2840", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136044"} @inproceedings{bb140040, AUTHOR = "Beal, J. and Wu, H.Y. and Park, D.H. and Zhai, A. and Kislyuk, D.", TITLE = "Billion-Scale Pretraining with Vision Transformers for Multi-Task Visual Representations", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "1431-1440", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vit2.html#TT136045"} @article{bb140041, AUTHOR = "Hu, H.Q. and Lu, X.F. and Zhang, X.P. and Zhang, T.X. and Sun, G.L.", TITLE = "Inheritance Attention Matrix-Based Universal Adversarial Perturbations on Vision Transformers", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1923-1927", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136046"} @article{bb140042, AUTHOR = "Xue, Z.X. and Tan, X. and Yu, X. and Liu, B. and Yu, A. and Zhang, P.Q.", TITLE = "Deep Hierarchical Vision Transformer for Hyperspectral and LiDAR Data Classification", JOURNAL = IP, VOLUME = "31", YEAR = "2022", PAGES = "3095-3110", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136047"} @article{bb140043, AUTHOR = "Heo, J. and Wang, Y. and Park, J.", TITLE = "Occlusion-aware spatial attention transformer for occluded object recognition", JOURNAL = PRL, VOLUME = "159", YEAR = "2022", PAGES = "70-76", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136048"} @article{bb140044, AUTHOR = "Yu, X.H. and Wang, J. and Zhao, Y. and Gao, Y.S.", TITLE = "Mix-ViT: Mixing attentive vision transformer for ultra-fine-grained visual categorization", JOURNAL = PR, VOLUME = "135", YEAR = "2023", PAGES = "109131", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136049"} @article{bb140045, AUTHOR = "Lin, X. and Sun, S.Z. and Huang, W. and Sheng, B. and Li, P. and Feng, D.D.", TITLE = "EAPT: Efficient Attention Pyramid Transformer for Image Processing", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "50-61", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136050"} @article{bb140046, AUTHOR = "Yu, L. and Xiang, W. and Fang, J. and Chen, Y.P.P. and Chi, L.", TITLE = "eX-ViT: A Novel explainable vision transformer for weakly supervised semantic segmentation", JOURNAL = PR, VOLUME = "142", YEAR = "2023", PAGES = "109666", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136051"} @article{bb140047, AUTHOR = "Wu, G. and Zheng, W.S. and Lu, Y.T. and Tian, Q.", TITLE = "PSLT: A Light-Weight Vision Transformer With Ladder Self-Attention and Progressive Shift", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "11120-11135", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136052"} @article{bb140048, AUTHOR = "Li, K.C. and Wang, Y. and Zhang, J.H. and Gao, P. and Song, G. and Liu, Y. and Li, H.S. and Qiao, Y.", TITLE = "UniFormer: Unifying Convolution and Self-Attention for Visual Recognition", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "10", MONTH = "October", PAGES = "12581-12600", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136053"} @article{bb140049, AUTHOR = "Li, H.L. and Xue, M.Q. and Song, J. and Zhang, H.F. and Huang, W.Q. and Liang, L. and Song, M.L.", TITLE = "Constituent Attention for Vision Transformers", JOURNAL = CVIU, VOLUME = "237", YEAR = "2023", PAGES = "103838", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136054"} @article{bb140050, AUTHOR = "Qin, R. and Wang, C.Z. and Wu, Y.M. and Du, H. and Lv, M.Y.", TITLE = "A U-Shaped Convolution-Aided Transformer with Double Attention for Hyperspectral Image Classification", JOURNAL = RS, VOLUME = "16", YEAR = "2024", NUMBER = "2", PAGES = "288", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136055"} @inproceedings{bb140051, AUTHOR = "Cai, H. and Li, J. and Hu, M. and Gan, C. and Han, S.", TITLE = "EfficientViT: Lightweight Multi-Scale Attention for High-Resolution Dense Prediction", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17256-17267", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136056"} @inproceedings{bb140052, AUTHOR = "Ryu, J. and Han, D.Y. and Lim, J.W.", TITLE = "Gramian Attention Heads are Strong yet Efficient Vision Learners", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5818-5828", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136057"} @inproceedings{bb140053, AUTHOR = "Xu, R.H. and Zhang, H. and Hu, W.Z. and Zhang, S.L. and Wang, X.Y.", TITLE = "ParCNetV2: Oversized Kernel with Enhanced Attention*", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5729-5739", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136058"} @inproceedings{bb140054, AUTHOR = "Zhao, B.Y. and Yu, Z. and Lan, S.Y. and Cheng, Y. and Anandkumar, A. and Lao, Y.J. and Alvarez, J.M.", TITLE = "Fully Attentional Networks with Self-emerging Token Labeling", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "5562-5572", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136059"} @inproceedings{bb140055, AUTHOR = "Guo, Y. and Stutz, D. and Schiele, B.", TITLE = "Robustifying Token Attention for Vision Transformers", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "17511-17522", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136060"} @inproceedings{bb140056, AUTHOR = "Zhao, Y. and Tang, H.D. and Jiang, Y.Y. and A, Y. and Wu, Q. and Wang, J.", TITLE = "Parameter-Efficient Vision Transformer with Linear Attention", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1275-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136061"} @inproceedings{bb140057, AUTHOR = "Shi, L. and Huang, H.D. and Song, B. and Tan, M. and Zhao, W.Z. and Xia, T. and Ren, P.J.", TITLE = "TAQ: Top-K Attention-Aware Quantization for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "1750-1754", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136062"} @inproceedings{bb140058, AUTHOR = "Baili, N. and Frigui, H.", TITLE = "ADA-VIT: Attention-Guided Data Augmentation for Vision Transformers", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "385-389", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136063"} @inproceedings{bb140059, AUTHOR = "Ding, M.Y. and Shen, Y. and Fan, L.J. and Chen, Z.F. and Chen, Z. and Luo, P. and Tenenbaum, J. and Gan, C.", TITLE = "Visual Dependency Transformers: Dependency Tree Emerges from Reversed Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14528-14539", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136064"} @inproceedings{bb140060, AUTHOR = "Song, J.C. and Mou, C. and Wang, S.Q. and Ma, S.W. and Zhang, J.", TITLE = "Optimization-Inspired Cross-Attention Transformer for Compressive Sensing", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6174-6184", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136065"} @inproceedings{bb140061, AUTHOR = "Hassani, A. and Walton, S. and Li, J.C. and Li, S. and Shi, H.", TITLE = "Neighborhood Attention Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6185-6194", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136066"} @inproceedings{bb140062, AUTHOR = "Liu, Z.J. and Yang, X.Y. and Tang, H.T. and Yang, S. and Han, S.", TITLE = "FlatFormer: Flattened Window Attention for Efficient Point Cloud Transformer", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "1200-1211", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136067"} @inproceedings{bb140063, AUTHOR = "Pan, X. and Ye, T.Z. and Xia, Z.F. and Song, S. and Huang, G.", TITLE = "Slide-Transformer: Hierarchical Vision Transformer with Local Self-Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2082-2091", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136068"} @inproceedings{bb140064, AUTHOR = "Zhu, L. and Wang, X.J. and Ke, Z.H. and Zhang, W. and Lau, R.", TITLE = "BiFormer: Vision Transformer with Bi-Level Routing Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10323-10333", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136069"} @inproceedings{bb140065, AUTHOR = "Long, S. and Zhao, Z. and Pi, J. and Wang, S.S. and Wang, J.D.", TITLE = "Beyond Attentive Tokens: Incorporating Token Importance and Diversity for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10334-10343", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136070"} @inproceedings{bb140066, AUTHOR = "Liu, X.Y. and Peng, H. and Zheng, N.X. and Yang, Y.Q. and Hu, H. and Yuan, Y.X.", TITLE = "EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14420-14430", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136071"} @inproceedings{bb140067, AUTHOR = "You, H.R. and Xiong, Y. and Dai, X.L. and Wu, B. and Zhang, P.Z. and Fan, H.Q. and Vajda, P. and Lin, Y.Y.C.", TITLE = "Castling-ViT: Compressing Self-Attention via Switching Towards Linear-Angular Attention at Vision Transformer Inference", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14431-14442", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136072"} @inproceedings{bb140068, AUTHOR = "Grainger, R. and Paniagua, T. and Song, X. and Cuntoor, N. and Lee, M.W. and Wu, T.F.", TITLE = "PaCa-ViT: Learning Patch-to-Cluster Attention in Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18568-18578", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136073"} @inproceedings{bb140069, AUTHOR = "Wei, C. and Duke, B. and Jiang, R. and Aarabi, P. and Taylor, G.W. and Shkurti, F.", TITLE = "Sparsifiner: Learning Sparse Instance-Dependent Attention for Efficient Vision Transformers", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22680-22689", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136074"} @inproceedings{bb140070, AUTHOR = "Bhattacharyya, M. and Chattopadhyay, S. and Nag, S.", TITLE = "DeCAtt: Efficient Vision Transformers with Decorrelated Attention Heads", BOOKTITLE = ECV23, YEAR = "2023", PAGES = "4695-4699", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136075"} @inproceedings{bb140071, AUTHOR = "Tatsunami, Y. and Taki, M.", TITLE = "RaftMLP: How Much Can Be Done Without Attention and with Less Spatial Locality?", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "VI:459-475", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136076"} @inproceedings{bb140072, AUTHOR = "Bolya, D. and Fu, C.Y. and Dai, X.L. and Zhang, P.Z. and Hoffman, J.", TITLE = "Hydra Attention: Efficient Attention with Many Heads", BOOKTITLE = CADK22, YEAR = "2022", PAGES = "35-49", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136077"} @inproceedings{bb140073, AUTHOR = "Chen, X.Y. and Hu, Q. and Li, K. and Zhong, C. and Wang, G.H.", TITLE = "Accumulated Trivial Attention Matters in Vision Transformers on Small Datasets", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "3973-3981", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136078"} @inproceedings{bb140074, AUTHOR = "Lan, H. and Wang, X. and Shen, H. and Liang, P. and Wei, X.", TITLE = "Couplformer: Rethinking Vision Transformer with Coupling Attention", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "6464-6473", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136079"} @inproceedings{bb140075, AUTHOR = "Debnath, B. and Po, O. and Chowdhury, F.A. and Chakradhar, S.", TITLE = "Cosine Similarity based Few-Shot Video Classifier with Attention-based Aggregation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "1273-1279", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136080"} @inproceedings{bb140076, AUTHOR = "Mari, C.R. and Gonzalez, D.V. and Bou Balust, E.", TITLE = "Multi-Scale Transformer-Based Feature Combination for Image Retrieval", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3166-3170", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136081"} @inproceedings{bb140077, AUTHOR = "Furukawa, R. and Hotta, K.", TITLE = "Local Embedding for Axial Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2586-2590", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136082"} @inproceedings{bb140078, AUTHOR = "Kakogeorgiou, I. and Gidaris, S. and Psomas, B. and Avrithis, Y. and Bursuc, A. and Karantzalos, K. and Komodakis, N.", TITLE = "What to Hide from Your Students: Attention-Guided Masked Image Modeling", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXX:300-318", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136083"} @inproceedings{bb140079, AUTHOR = "Ding, M.Y. and Xiao, B. and Codella, N. and Luo, P. and Wang, J.D. and Yuan, L.", TITLE = "DaViT: Dual Attention Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:74-92", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136084"} @inproceedings{bb140080, AUTHOR = "Wang, P.C. and Wang, X. and Wang, F. and Lin, M. and Chang, S.N. and Li, H. and Jin, R.", TITLE = "KVT: k-NN Attention for Boosting Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXIV:285-302", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136085"} @inproceedings{bb140081, AUTHOR = "Rao, Y.M. and Zhao, W.L. and Zhou, J. and Lu, J.W.", TITLE = "AMixer: Adaptive Weight Mixing for Self-Attention Free Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXI:50-67", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136086"} @inproceedings{bb140082, AUTHOR = "Li, A. and Jiao, J. and Li, N. and Qi, W. and Xu, W. and Pang, M.", TITLE = "Conmw Transformer: A General Vision Transformer Backbone With Merged-Window Attention", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1551-1555", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136087"} @inproceedings{bb140083, AUTHOR = "Zhang, Q.M. and Xu, Y.F. and Zhang, J. and Tao, D.C.", TITLE = "VSA: Learning Varied-Size Window Attention in Vision Transformers", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXV:466-483", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136088"} @inproceedings{bb140084, AUTHOR = "Mallick, R. and Benois Pineau, J. and Zemmari, A.", TITLE = "I Saw: A Self-Attention Weighted Method for Explanation of Visual Transformers", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "3271-3275", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136089"} @inproceedings{bb140085, AUTHOR = "Song, Z.K. and Yu, J.Q. and Chen, Y.P.P. and Yang, W.", TITLE = "Transformer Tracking with Cyclic Shifting Window Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8781-8790", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136090"} @inproceedings{bb140086, AUTHOR = "Yang, C.L. and Wang, Y.L. and Zhang, J.M. and Zhang, H. and Wei, Z.J. and Lin, Z. and Yuille, A.L.", TITLE = "Lite Vision Transformer with Enhanced Self-Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "11988-11998", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136091"} @inproceedings{bb140087, AUTHOR = "Xia, Z.F. and Pan, X. and Song, S. and Li, L.E. and Huang, G.", TITLE = "Vision Transformer with Deformable Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "4784-4793", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136092"} @inproceedings{bb140088, AUTHOR = "Yu, T. and Khalitov, R. and Cheng, L. and Yang, Z.R.", TITLE = "Paramixer: Parameterizing Mixing Links in Sparse Factors Works Better than Dot-Product Self-Attention", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "681-690", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136093"} @inproceedings{bb140089, AUTHOR = "Cheng, B. and Misra, I. and Schwing, A.G. and Kirillov, A. and Girdhar, R.", TITLE = "Masked-attention Mask Transformer for Universal Image Segmentation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "1280-1289", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136094"} @inproceedings{bb140090, AUTHOR = "Rangrej, S.B. and Srinidhi, C.L. and Clark, J.J.", TITLE = "Consistency driven Sequential Transformers Attention Model for Partially Observable Scenes", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "2508-2517", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136095"} @inproceedings{bb140091, AUTHOR = "Chen, C.F.R. and Fan, Q.F. and Panda, R.", TITLE = "CrossViT: Cross-Attention Multi-Scale Vision Transformer for Image Classification", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "347-356", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136096"} @inproceedings{bb140092, AUTHOR = "Chefer, H. and Gur, S. and Wolf, L.B.", TITLE = "Generic Attention-model Explainability for Interpreting Bi-Modal and Encoder-Decoder Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "387-396", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136097"} @inproceedings{bb140093, AUTHOR = "Xu, W.J. and Xu, Y.F. and Chang, T. and Tu, Z.W.", TITLE = "Co-Scale Conv-Attentional Image Transformers", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "9961-9970", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136098"} @inproceedings{bb140094, AUTHOR = "Yang, G.L. and Tang, H. and Ding, M.L. and Sebe, N. and Ricci, E.", TITLE = "Transformer-Based Attention Networks for Continuous Pixel-Wise Prediction", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "16249-16259", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136099"} @inproceedings{bb140095, AUTHOR = "Kim, K. and Wu, B.C. and Dai, X.L. and Zhang, P.Z. and Yan, Z.C. and Vajda, P. and Kim, S.", TITLE = "Rethinking the Self-Attention in Vision Transformers", BOOKTITLE = ECV21, YEAR = "2021", PAGES = "3065-3069", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651atvit4.html#TT136100"} @article{bb140096, AUTHOR = "Selva, J. and Johansen, A.S. and Escalera, S. and Nasrollahi, K. and Moeslund, T.B. and Clapes, A.", TITLE = "Video Transformers: A Survey", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "12922-12943", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136101"} @article{bb140097, AUTHOR = "Zhang, Z.C. and Chen, Z.D. and Wang, Y.X. and Luo, X. and Xu, X.S.", TITLE = "A vision transformer for fine-grained classification by reducing noise and enhancing discriminative information", JOURNAL = PR, VOLUME = "145", YEAR = "2024", PAGES = "109979", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136102"} @article{bb140098, AUTHOR = "Xian, K. and Peng, J. and Cao, Z.G. and Zhang, J.M. and Lin, G.S.", TITLE = "ViTA: Video Transformer Adaptor for Robust Video Depth Estimation", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "3302-3316", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136103"} @inproceedings{bb140099, AUTHOR = "Piergiovanni, A. and Kuo, W.C. and Angelova, A.", TITLE = "Rethinking Video ViTs: Sparse Video Tubes for Joint Image and Video Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2214-2224", BIBSOURCE = "http://www.visionbib.com/bibliography/pattern651vidt3.html#TT136104"}