@inproceedings{bb219400, AUTHOR = "Aodha, O.M. and Brostow, G.J. and Pollefeys, M.", TITLE = "Segmenting video into classes of algorithm-suitability", BOOKTITLE = CVPR10, YEAR = "2010", PAGES = "1054-1061", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214403"} @article{bb219401, AUTHOR = "Suresha, M. and Kuppa, S. and Raghukumar, D.S.", TITLE = "A study on deep learning spatiotemporal models and feature extraction techniques for video understanding", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "2", MONTH = "June", PAGES = "81-101", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214404"} @article{bb219402, AUTHOR = "Kavoosifar, M.R. and Apiletti, D. and Baralis, E. and Garza, P. and Huet, B.", TITLE = "Effective video hyperlinking by means of enriched feature sets and monomodal query combinations", JOURNAL = MultInfoRetr, VOLUME = "9", YEAR = "2020", NUMBER = "3", MONTH = "September", PAGES = "215-227", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214405"} @article{bb219403, AUTHOR = "Tang, P.J. and Tan, Y.L. and Li, J.Z. and Tan, B.", TITLE = "Translating video into language by enhancing visual and language representations", JOURNAL = JVCIR, VOLUME = "72", YEAR = "2020", PAGES = "102875", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214406"} @article{bb219404, AUTHOR = "Yu, J. and Jiang, X. and Qin, Z. and Zhang, W. and Hu, Y. and Wu, Q.", TITLE = "Learning Dual Encoding Model for Adaptive Visual Understanding in Visual Dialogue", JOURNAL = IP, VOLUME = "30", YEAR = "2021", PAGES = "220-233", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214407"} @article{bb219405, AUTHOR = "Duan, J.H. and Xu, H. and Lin, X.Z. and Zhu, S.C. and Du, Y.Z.", TITLE = "Multi-semantic long-range dependencies capturing for efficient video representation learning", JOURNAL = IVC, VOLUME = "104", YEAR = "2020", PAGES = "103988", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214408"} @article{bb219406, AUTHOR = "Tan, H.L. and Zhu, H.Y. and Lim, J.H. and Tan, C.", TITLE = "A comprehensive survey of procedural video datasets", JOURNAL = CVIU, VOLUME = "202", YEAR = "2021", PAGES = "103107", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214409"} @article{bb219407, AUTHOR = "Lin, J. and Gan, C. and Wang, K. and Han, S.", TITLE = "TSM: Temporal Shift Module for Efficient and Scalable Video Understanding on Edge Devices", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "5", MONTH = "May", PAGES = "2760-2774", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214410"} @inproceedings{bb219408, AUTHOR = "Lin, J. and Gan, C. and Han, S.", TITLE = "TSM: Temporal Shift Module for Efficient Video Understanding", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "7082-7092", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214411"} @article{bb219409, AUTHOR = "Zhou, W. and Hou, Y. and Ouyang, K.W. and Zhou, S.L.", TITLE = "Exploring complementary information of self-supervised pretext tasks for unsupervised video pre-training", JOURNAL = IET-CV, VOLUME = "16", YEAR = "2022", NUMBER = "3", PAGES = "255-265", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214412"} @article{bb219410, AUTHOR = "Li, Z.Q. and Wang, W.M. and Li, Z.Y. and Huang, Y.F. and Sato, Y.", TITLE = "Spatio-Temporal Perturbations for Video Attribution", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "4", MONTH = "April", PAGES = "2043-2056", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214413"} @article{bb219411, AUTHOR = "Tao, L. and Wang, X.T. and Yamasaki, T.", TITLE = "An Improved Inter-Intra Contrastive Learning Framework on Self-Supervised Video Representation", JOURNAL = CirSysVideo, VOLUME = "32", YEAR = "2022", NUMBER = "8", MONTH = "August", PAGES = "5266-5280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214414"} @article{bb219412, AUTHOR = "Huang, L. and Zhang, C. and Zhang, H.Y.", TITLE = "Self-Adaptive Training: Bridging Supervised and Self-Supervised Learning", JOURNAL = PAMI, VOLUME = "46", YEAR = "2024", NUMBER = "3", MONTH = "March", PAGES = "1362-1377", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214415"} @inproceedings{bb219413, AUTHOR = "Huang, L. and You, S. and Zheng, M.K. and Wang, F. and Qian, C. and Yamasaki, T.", TITLE = "Learning Where to Learn in Cross-View Self-Supervised Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14431-14440", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214416"} @article{bb219414, AUTHOR = "Hu, Y. and Yin, D.C. and Wang, Y.W. and Chen, Z.Z. and Luo, C.", TITLE = "Decomposing style, content, and motion for videos", JOURNAL = JVCIR, VOLUME = "89", YEAR = "2022", PAGES = "103686", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214417"} @article{bb219415, AUTHOR = "Hong, M.Y. and Zhang, X.F. and Li, G.R. and Huang, Q.M.", TITLE = "Fine-Grained Feature Generation for Generalized Zero-Shot Video Classification", JOURNAL = IP, VOLUME = "32", YEAR = "2023", PAGES = "1599-1612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214418"} @article{bb219416, AUTHOR = "Jin, X. and Feng, R. and Sun, S. and Feng, R. and He, T.Y. and Chen, Z.B.", TITLE = "Semantical video coding: Instill static-dynamic clues into structured bitstream for AI tasks", JOURNAL = JVCIR, VOLUME = "93", YEAR = "2023", PAGES = "103816", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214419"} @article{bb219417, AUTHOR = "Schiappa, M.C. and Rawat, Y.S. and Shah, M.", TITLE = "Self-Supervised Learning for Videos: A Survey", JOURNAL = Surveys, VOLUME = "55", YEAR = "2023", NUMBER = "13s", MONTH = "July", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214420"} @article{bb219418, AUTHOR = "Yang, X.M. and Xiong, S. and Wu, K.W. and Shan, D.F. and Xie, Z.", TITLE = "Attentive spatial-temporal contrastive learning for self-supervised video representation", JOURNAL = IVC, VOLUME = "137", YEAR = "2023", PAGES = "104765", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214421"} @article{bb219419, AUTHOR = "Miao, J.X. and Wei, Y.C. and Wang, X.H. and Yang, Y.", TITLE = "Temporal Pixel-Level Semantic Understanding Through the VSPW Dataset", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "9", MONTH = "September", PAGES = "11297-11308", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214422"} @article{bb219420, AUTHOR = "Hu, D. and Wang, Z. and Nie, F.P. and Wang, R. and Li, X.L.", TITLE = "Self-Supervised Learning for Heterogeneous Audiovisual Scene Analysis", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "3534-3545", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214423"} @article{bb219421, AUTHOR = "Namitha, K. and Geetha, M. and Athi, N.", TITLE = "An Improved Interaction Estimation and Optimization Method for Surveillance Video Synopsis", JOURNAL = MultMedMag, VOLUME = "30", YEAR = "2023", NUMBER = "3", MONTH = "July", PAGES = "25-36", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214424"} @article{bb219422, AUTHOR = "Assefa, M. and Jiang, W. and Alemu, K.G. and Yilma, G. and Adhikari, D. and Ayalew, M. and Seid, A.M. and Erbad, A.", TITLE = "Actor-Aware Self-Supervised Learning for Semi-Supervised Video Representation Learning", JOURNAL = CirSysVideo, VOLUME = "33", YEAR = "2023", NUMBER = "11", MONTH = "November", PAGES = "6679-6692", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214425"} @article{bb219423, AUTHOR = "Hu, Y.F. and Gao, J.Y. and Xu, C.S.", TITLE = "Learning Multi-Expert Distribution Calibration for Long-Tailed Video Classification", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "555-567", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214426"} @article{bb219424, AUTHOR = "Chen, Z. and Wang, H.L. and Chen, C.W.", TITLE = "Self-Supervised Video Representation Learning by Serial Restoration With Elastic Complexity", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "2235-2248", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214427"} @inproceedings{bb219425, AUTHOR = "Tian, Y. and Lu, G. and Zhai, G.T. and Gao, Z.Y.", TITLE = "Non-Semantics Suppressed Mask Learning for Unsupervised Video Semantic Compression", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "13564-13576", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214428"} @inproceedings{bb219426, AUTHOR = "Li, K.C. and Wang, Y.L. and He, Y. and Li, Y.Z. and Wang, Y. and Wang, L.M. and Qiao, Y.", TITLE = "UniFormerV2: Unlocking the Potential of Image ViTs for Video Understanding", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "1632-1643", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214429"} @inproceedings{bb219427, AUTHOR = "Afham, M. and Shukla, S.N. and Poursaeed, O. and Zhang, P. and Shah, A. and Lim, S.", TITLE = "Revisiting Kernel Temporal Segmentation as an Adaptive Tokenizer for Long-form Video Understanding", BOOKTITLE = REDLCV23, YEAR = "2023", PAGES = "1181-1186", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214430"} @inproceedings{bb219428, AUTHOR = "Strafforello, O. and Schutte, K. and van Gemert, J.C.", TITLE = "Are current long-term video understanding datasets long-term?", BOOKTITLE = CVEU23, YEAR = "2023", PAGES = "2959-2968", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214431"} @inproceedings{bb219429, AUTHOR = "Zhao, Y.C. and Luo, C. and Tang, C.X. and Chen, D.D. and Codella, N. and Zha, Z.J.", TITLE = "Streaming Video Model", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14602-14612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214432"} @inproceedings{bb219430, AUTHOR = "Maiya, S.R. and Girish, S. and Ehrlich, M. and Wang, H.Y. and Lee, K.S. and Poirson, P. and Wu, P.X. and Wang, C. and Shrivastava, A.", TITLE = "NIRVANA: Neural Implicit Representations of Videos with Adaptive Networks and Autoregressive Patch-Wise Modeling", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "14378-14387", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214433"} @inproceedings{bb219431, AUTHOR = "Zhang, Y.T. and Bai, Y. and Liu, C. and Wang, H. and Li, S. and Fu, Y.", TITLE = "Frame Flexible Network", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10504-10513", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214434"} @inproceedings{bb219432, AUTHOR = "Dessalene, E. and Maynord, M. and Fermuller, C. and Aloimonos, Y.F.", TITLE = "Therbligs in Action: Video Understanding through Motion Primitives", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10618-10626", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214435"} @inproceedings{bb219433, AUTHOR = "Zhao, Y. and Misra, I. and Krahenbuhl, P. and Girdhar, R.", TITLE = "Learning Video Representations from Large Language Models", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6586-6597", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214436"} @inproceedings{bb219434, AUTHOR = "Wang, R. and Chen, D.D. and Wu, Z.X. and Chen, Y.P. and Dai, X. and Liu, M.C. and Yuan, L. and Jiang, Y.G.", TITLE = "Masked Video Distillation: Rethinking Masked Feature Modeling for Self-supervised Video Representation Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6312-6322", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214437"} @inproceedings{bb219435, AUTHOR = "Yang, X.T. and Chu, F.J. and Feiszli, M. and Goyal, R. and Torresani, L. and Tran, D.", TITLE = "Relational Space-Time Query in Long-Form Videos", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6398-6408", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214438"} @inproceedings{bb219436, AUTHOR = "Foo, L.G. and Gong, J. and Fan, Z.P. and Liu, J.", TITLE = "System-Status-Aware Adaptive Network for Online Streaming Video Understanding", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "10514-10523", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214439"} @inproceedings{bb219437, AUTHOR = "Dong, S. and Hu, H.Z. and Lian, D.Z. and Luo, W.X. and Qian, Y.C. and Gao, S.H.", TITLE = "Weakly Supervised Video Representation Learning with Unaligned Text for Sequential Videos", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2437-2447", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214440"} @inproceedings{bb219438, AUTHOR = "Wang, J. and Zhu, W.T. and Wang, P. and Yu, X. and Liu, L. and Omar, M. and Hamid, R.", TITLE = "Selective Structured State-Spaces for Long-Form Video Understanding", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6387-6397", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214441"} @inproceedings{bb219439, AUTHOR = "Zhang, H. and Liu, D. and Zheng, Q. and Su, B.", TITLE = "Modeling Video as Stochastic Processes for Fine-Grained Video Representation Learning", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2225-2234", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214442"} @inproceedings{bb219440, AUTHOR = "Kumar, Y. and Mishra, A.", TITLE = "Few-Shot Referring Relationships in Videos", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2289-2298", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214443"} @inproceedings{bb219441, AUTHOR = "Harzig, P. and Einfalt, M. and Lienhart, R.", TITLE = "Synchronized Audio-Visual Frames with Fractional Positional Encoding for Transformers in Video-to-Text Translation", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "2041-2045", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214444"} @inproceedings{bb219442, AUTHOR = "Wiles, O. and Carreira, J. and Barr, I. and Zisserman, A. and Malinowski, M.", TITLE = "Compressed Vision for Efficient Video Understanding", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "VII:679-695", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214445"} @inproceedings{bb219443, AUTHOR = "Rho, D. and Cho, J. and Ko, J.H. and Park, E.", TITLE = "Neural Residual Flow Fields for Efficient Video Representations", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "II:458-474", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214446"} @inproceedings{bb219444, AUTHOR = "Tian, F.R. and Fan, J.W. and Yu, X. and Du, S.Y. and Song, M. and Zhao, Y.", TITLE = "TCVM: Temporal Contrasting Video Montage Framework for Self-Supervised Video Representation Learning", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "II:526-542", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214447"} @inproceedings{bb219445, AUTHOR = "Huang, Z.M. and Jia, C.M. and Wang, S.S. and Ma, S.W.", TITLE = "A Compressive Prior Guided Mask Predictive Coding Approach for Video Analysis", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:469-484", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214448"} @inproceedings{bb219446, AUTHOR = "Li, L. and Zhuang, L.S. and Gao, S.H. and Wang, S.", TITLE = "Havit: Hybrid-attention Based Vision Transformer for Video Classification", BOOKTITLE = ACCV22, YEAR = "2022", PAGES = "IV:502-517", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214449"} @inproceedings{bb219447, AUTHOR = "Zhang, H.L. and Pirsiavash, H. and Liu, X.", TITLE = "MASTAF: A Model-Agnostic Spatio-Temporal Attention Fusion Network for Few-shot Video Classification", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "2507-2516", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214450"} @inproceedings{bb219448, AUTHOR = "Senocak, A. and Kim, J. and Oh, T.H. and Li, D.Z. and Kweon, I.S.", TITLE = "Event-Specific Audio-Visual Fusion Layers: A Simple and New Perspective on Video Understanding", BOOKTITLE = WACV23, YEAR = "2023", PAGES = "2236-2246", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214451"} @inproceedings{bb219449, AUTHOR = "Xia, B.Y. and Wu, W.H. and Wang, H.R. and Su, R. and He, D.L. and Yang, H. and Fan, X.R. and Ouyang, W.L.", TITLE = "NSNet: Non-saliency Suppression Sampler for Efficient Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:705-723", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214452"} @inproceedings{bb219450, AUTHOR = "Xia, B.Y. and Wang, Z.H. and Wu, W.H. and Wang, H.R. and Han, J.G.", TITLE = "Temporal Saliency Query Network for Efficient Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:741-759", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214453"} @inproceedings{bb219451, AUTHOR = "Islam, M.M. and Bertasius, G.", TITLE = "Long Movie Clip Classification with State-Space Video Models", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:87-104", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214454"} @inproceedings{bb219452, AUTHOR = "Habibian, A. and Yahia, H.B. and Abati, D. and Gavves, E. and Porikli, F.M.", TITLE = "Delta Distillation for Efficient Video Processing", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:213-229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214455"} @inproceedings{bb219453, AUTHOR = "Li, Z.Z. and Wang, M.M. and Pi, H.J. and Xu, K. and Mei, J.B. and Liu, Y.", TITLE = "E-NeRV: Expedite Neural Video Representation with Disentangled Spatial-Temporal Context", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:267-284", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214456"} @inproceedings{bb219454, AUTHOR = "Kosman, E. and di Castro, D.", TITLE = "GraphVid: It only Takes a Few Nodes to Understand a Video", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:195-212", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214457"} @inproceedings{bb219455, AUTHOR = "Ju, C. and Han, T. and Zheng, K. and Zhang, Y. and Xie, W.", TITLE = "Prompting Visual-Language Models for Efficient Video Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXV:105-124", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214458"} @inproceedings{bb219456, AUTHOR = "Liang, S.X. and Shen, X. and Huang, J.Q. and Hua, X.S.", TITLE = "Delving into Details: Synopsis-to-Detail Networks for Video Recognition", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "IV:262-278", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214459"} @inproceedings{bb219457, AUTHOR = "Ur Rehman, Y.A. and Gao, Y. and Shen, J.J. and de Gusmao, P.P.B. and Lane, N.", TITLE = "Federated Self-supervised Learning for Video Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXI:506-522", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214460"} @inproceedings{bb219458, AUTHOR = "Dadashzadeh, A. and Whone, A. and Mirmehdi, M.", TITLE = "Auxiliary Learning for Self-Supervised Video Representation via Similarity-based Knowledge Distillation", BOOKTITLE = L3D-IVU22, YEAR = "2022", PAGES = "4230-4239", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214461"} @inproceedings{bb219459, AUTHOR = "Li, Y. and Vasconcelos, N.M.", TITLE = "Improving Video Model Transfer with Dynamic Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19258-19269", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214462"} @inproceedings{bb219460, AUTHOR = "Guo, S. and Xiong, Z. and Zhong, Y.J. and Wang, L.M. and Guo, X.B. and Han, B. and Huang, W.L.", TITLE = "Cross-Architecture Self-supervised Video Representation Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19248-19257", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214463"} @inproceedings{bb219461, AUTHOR = "Xu, X.Y. and Li, Y.L. and Lu, C.", TITLE = "Learning to Anticipate Future with Dynamic Context Removal", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "12724-12734", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214464"} @inproceedings{bb219462, AUTHOR = "Gadre, S.Y. and Ehsani, K. and Song, S. and Mottaghi, R.", TITLE = "Continuous Scene Representations for Embodied AI", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14829-14839", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214465"} @inproceedings{bb219463, AUTHOR = "Liang, C. and Wang, W.G. and Zhou, T.F. and Yang, Y.", TITLE = "Visual Abductive Reasoning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "15544-15554", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214466"} @inproceedings{bb219464, AUTHOR = "Kinfu, K.A. and Vidal, R.", TITLE = "Analysis and Extensions of Adversarial Training for Video Classification", BOOKTITLE = RoSe22, YEAR = "2022", PAGES = "3415-3424", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214467"} @inproceedings{bb219465, AUTHOR = "Xiao, F. and Kundu, K. and Tighe, J. and Modolo, D.", TITLE = "Hierarchical Self-supervised Representation Learning for Movie Understanding", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "9717-9726", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214468"} @inproceedings{bb219466, AUTHOR = "Li, L.L. and Zhou, T.F. and Wang, W.G. and Yang, L. and Li, J.W. and Yang, Y.", TITLE = "Locality-Aware Inter-and Intra-Video Reconstruction for Self-Supervised Correspondence Learning", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8709-8720", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214469"} @inproceedings{bb219467, AUTHOR = "Jiang, Y.F. and Gong, X.Y. and Wu, J. and Shi, H. and Yan, Z.C. and Wang, Z.Y.", TITLE = "Auto-X3D: Ultra-Efficient Video Understanding via Finer-Grained Neural Architecture Search", BOOKTITLE = WACV22, YEAR = "2022", PAGES = "2354-2363", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214470"} @inproceedings{bb219468, AUTHOR = "Chen, N.L. and Chu, L. and Pan, H. and Lu, Y. and Wang, W.P.", TITLE = "Self-Supervised Image Representation Learning with Geometric Set Consistency", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19270-19280", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214471"} @inproceedings{bb219469, AUTHOR = "Lin, Y.Z. and Guo, X. and Lu, Y.", TITLE = "Self-Supervised Video Representation Learning with Meta-Contrastive Network", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "8219-8229", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214472"} @inproceedings{bb219470, AUTHOR = "Guo, X.D. and Guo, X. and Lu, Y.", TITLE = "SSAN: Separable Self-Attention Network for Video Representation Learning", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "12613-12622", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214473"} @inproceedings{bb219471, AUTHOR = "Yang, X.T. and Fan, H.Q. and Torresani, L. and Davis, L.S. and Wang, H.", TITLE = "Beyond Short Clips: End-to-End Video-Level Learning with Collaborative Memories", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "7563-7572", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214474"} @inproceedings{bb219472, AUTHOR = "Wu, C.Y. and Krahenbuhl, P.", TITLE = "Towards Long-Form Video Understanding", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "1884-1894", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214475"} @inproceedings{bb219473, AUTHOR = "Zhang, C.H. and Gupta, A. and Zisserman, A.", TITLE = "Temporal Query Networks for Fine-grained Video Understanding", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "4484-4494", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214476"} @inproceedings{bb219474, AUTHOR = "Kangaspunta, J. and Piergiovanni, A. and Jonschkowski, R. and Ryoo, M. and Angelova, A.", TITLE = "Adaptive Intermediate Representations for Video Understanding", BOOKTITLE = MULA21, YEAR = "2021", PAGES = "1602-1612", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214477"} @inproceedings{bb219475, AUTHOR = "Duan, H.D. and Zhao, Y. and Xiong, Y.J. and Liu, W.T. and Lin, D.", TITLE = "Omni-sourced Webly-supervised Learning for Video Recognition", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XV:670-688", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214478"} @inproceedings{bb219476, AUTHOR = "Jha, A. and Kumar, A. and Pande, S. and Banerjee, B. and Chaudhuri, S.", TITLE = "MT-UNET: A Novel U-Net Based Multi-Task Architecture For Visual Scene Understanding", BOOKTITLE = ICIP20, YEAR = "2020", PAGES = "2191-2195", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214479"} @inproceedings{bb219477, AUTHOR = "Diba, A. and Fayyaz, M. and Sharma, V. and Paluri, M. and Gall, J. and Stiefelhagen, R. and Van Gool, L.J.", TITLE = "Large Scale Holistic Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "V:593-610", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214480"} @inproceedings{bb219478, AUTHOR = "Voigtlaender, P. and Changpinyo, S. and Pont Tuset, J. and Soricut, R. and Ferrari, V.", TITLE = "Connecting Vision and Language with Video Localized Narratives", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "2461-2471", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214481"} @inproceedings{bb219479, AUTHOR = "Pont Tuset, J. and Uijlings, J. and Changpinyo, S. and Soricut, R. and Ferrari, V.", TITLE = "Connecting Vision and Language with Localized Narratives", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "V:647-664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214482"} @inproceedings{bb219480, AUTHOR = "Hu, A. and Cotter, F. and Mohan, N. and Gurau, C. and Kendall, A.", TITLE = "Probabilistic Future Prediction for Video Scene Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVI: 767-785", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214483"} @inproceedings{bb219481, AUTHOR = "Mavroudi, E. and Haro, B.B. and Vidal, R.", TITLE = "Representation Learning on Visual-Symbolic Graphs for Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XXIX: 71-90", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214484"} @inproceedings{bb219482, AUTHOR = "Sener, F. and Singhania, D. and Yao, A.", TITLE = "Temporal Aggregate Representations for Long-range Video Understanding", BOOKTITLE = ECCV20, YEAR = "2020", PAGES = "XVI: 154-171", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214485"} @inproceedings{bb219483, AUTHOR = "Tosi, F. and Aleotti, F. and Ramirez, P.Z. and Poggi, M. and Salti, S. and di Stefano, L. and Mattoccia, S.", TITLE = "Distilled Semantics for Comprehensive Scene Understanding from Videos", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "4653-4664", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214486"} @inproceedings{bb219484, AUTHOR = "Piergiovanni, A.J. and Angelova, A. and Ryoo, M.S.", TITLE = "Evolving Losses for Unsupervised Video Representation Learning", BOOKTITLE = CVPR20, YEAR = "2020", PAGES = "130-139", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214487"} @inproceedings{bb219485, AUTHOR = "Xiong, Y. and Huang, Q. and Guo, L. and Zhou, H. and Zhou, B. and Lin, D.", TITLE = "A Graph-Based Framework to Bridge Movies and Synopses", BOOKTITLE = ICCV19, YEAR = "2019", PAGES = "4591-4600", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214488"} @inproceedings{bb219486, AUTHOR = "Kanehira, A. and Takemoto, K. and Inayoshi, S. and Harada, T.", TITLE = "Multimodal Explanations by Predicting Counterfactuality in Videos", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "8586-8594", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214489"} @inproceedings{bb219487, AUTHOR = "Kanehira, A. and Harada, T.", TITLE = "Learning to Explain With Complemental Examples", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "8595-8603", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214490"} @inproceedings{bb219488, AUTHOR = "Zhou, L. and Kalantidis, Y. and Chen, X.L. and Corso, J.J. and Rohrbach, M.", TITLE = "Grounded Video Description", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "6571-6580", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214491"} @inproceedings{bb219489, AUTHOR = "Liu, X.Y. and Lee, J.Y. and Jin, H.L.", TITLE = "Learning Video Representations From Correspondence Proposals", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "4268-4276", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214492"} @inproceedings{bb219490, AUTHOR = "Xiong, B. and Kalantidis, Y. and Ghadiyaram, D. and Grauman, K.", TITLE = "Less Is More: Learning Highlight Detection From Video Duration", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1258-1267", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214493"} @inproceedings{bb219491, AUTHOR = "Zhang, D. and Dai, X. and Wang, X. and Wang, Y.F. and Davis, L.S.", TITLE = "MAN: Moment Alignment Network for Natural Language Moment Retrieval via Iterative Graph Adjustment", BOOKTITLE = CVPR19, YEAR = "2019", PAGES = "1247-1257", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214494"} @inproceedings{bb219492, AUTHOR = "Fan, L. and Huang, W. and Gan, C. and Ermon, S. and Gong, B. and Huang, J.", TITLE = "End-to-End Learning of Motion Representation for Video Understanding", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "6016-6025", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214495"} @inproceedings{bb219493, AUTHOR = "Huang, D. and Ramanathan, V. and Mahajan, D. and Torresani, L. and Paluri, M. and Fei Fei, L. and Niebles, J.C.", TITLE = "What Makes a Video a Video: Analyzing Temporal Information in Video Understanding Models and Datasets", BOOKTITLE = CVPR18, YEAR = "2018", PAGES = "7366-7375", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214496"} @inproceedings{bb219494, AUTHOR = "Mahdisoltani, F. and Memisevic, R. and Fleet, D.J.", TITLE = "Hierarchical Video Understanding", BOOKTITLE = WiCV-E18, YEAR = "2018", PAGES = "IV:659-663", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214497"} @inproceedings{bb219495, AUTHOR = "Shin, K.S. and Jeon, J. and Lee, S. and Lim, B. and Jeong, M.S. and Nang, J.", TITLE = "Approach for Video Classification with Multi-label on YouTube-8M Dataset", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:317-324", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214498"} @inproceedings{bb219496, AUTHOR = "Skalic, M. and Austin, D.", TITLE = "Building A Size Constrained Predictive Models for Video Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:297-305", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214499"} @inproceedings{bb219497, AUTHOR = "Garg, S.", TITLE = "Learning Video Features for Multi-label Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:325-337", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214500"} @inproceedings{bb219498, AUTHOR = "Cho, C. and Antin, B. and Arora, S. and Ashrafi, S. and Duan, P. and Huynh, D.T. and James, L. and Nguyen, H.T. and Solgi, M. and Than, C.V.", TITLE = "Large-Scale Video Classification with Feature Space Augmentation Coupled with Learned Label Relations and Ensembling", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:338-346", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214501"} @inproceedings{bb219499, AUTHOR = "Lin, R.C. and Xiao, J. and Fan, J.P.", TITLE = "NeXtVLAD: An Efficient Neural Network to Aggregate Frame-Level Features for Large-Scale Video Classification", BOOKTITLE = Large-Scale18, YEAR = "2018", PAGES = "IV:206-218", BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214502"}