@inproceedings{bb219400,
        AUTHOR = "Aodha, O.M. and Brostow, G.J. and Pollefeys, M.",
        TITLE = "Segmenting video into classes of algorithm-suitability",
        BOOKTITLE = CVPR10,
        YEAR = "2010",
        PAGES = "1054-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214403"}

@article{bb219401,
        AUTHOR = "Suresha, M. and Kuppa, S. and Raghukumar, D.S.",
        TITLE = "A study on deep learning spatiotemporal models and feature extraction
techniques for video understanding",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "2",
        MONTH = "June",
        PAGES = "81-101",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214404"}

@article{bb219402,
        AUTHOR = "Kavoosifar, M.R. and Apiletti, D. and Baralis, E. and Garza, P. and Huet, B.",
        TITLE = "Effective video hyperlinking by means of enriched feature sets and
monomodal query combinations",
        JOURNAL = MultInfoRetr,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "215-227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214405"}

@article{bb219403,
        AUTHOR = "Tang, P.J. and Tan, Y.L. and Li, J.Z. and Tan, B.",
        TITLE = "Translating video into language by enhancing visual and language
representations",
        JOURNAL = JVCIR,
        VOLUME = "72",
        YEAR = "2020",
        PAGES = "102875",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214406"}

@article{bb219404,
        AUTHOR = "Yu, J. and Jiang, X. and Qin, Z. and Zhang, W. and Hu, Y. and Wu, Q.",
        TITLE = "Learning Dual Encoding Model for Adaptive Visual Understanding in
Visual Dialogue",
        JOURNAL = IP,
        VOLUME = "30",
        YEAR = "2021",
        PAGES = "220-233",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214407"}

@article{bb219405,
        AUTHOR = "Duan, J.H. and Xu, H. and Lin, X.Z. and Zhu, S.C. and Du, Y.Z.",
        TITLE = "Multi-semantic long-range dependencies capturing for efficient video
representation learning",
        JOURNAL = IVC,
        VOLUME = "104",
        YEAR = "2020",
        PAGES = "103988",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214408"}

@article{bb219406,
        AUTHOR = "Tan, H.L. and Zhu, H.Y. and Lim, J.H. and Tan, C.",
        TITLE = "A comprehensive survey of procedural video datasets",
        JOURNAL = CVIU,
        VOLUME = "202",
        YEAR = "2021",
        PAGES = "103107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214409"}

@article{bb219407,
        AUTHOR = "Lin, J. and Gan, C. and Wang, K. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient and Scalable Video
Understanding on Edge Devices",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "2760-2774",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214410"}

@inproceedings{bb219408,
        AUTHOR = "Lin, J. and Gan, C. and Han, S.",
        TITLE = "TSM: Temporal Shift Module for Efficient Video Understanding",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "7082-7092",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214411"}

@article{bb219409,
        AUTHOR = "Zhou, W. and Hou, Y. and Ouyang, K.W. and Zhou, S.L.",
        TITLE = "Exploring complementary information of self-supervised pretext tasks
for unsupervised video pre-training",
        JOURNAL = IET-CV,
        VOLUME = "16",
        YEAR = "2022",
        NUMBER = "3",
        PAGES = "255-265",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214412"}

@article{bb219410,
        AUTHOR = "Li, Z.Q. and Wang, W.M. and Li, Z.Y. and Huang, Y.F. and Sato, Y.",
        TITLE = "Spatio-Temporal Perturbations for Video Attribution",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "2043-2056",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214413"}

@article{bb219411,
        AUTHOR = "Tao, L. and Wang, X.T. and Yamasaki, T.",
        TITLE = "An Improved Inter-Intra Contrastive Learning Framework on
Self-Supervised Video Representation",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "5266-5280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214414"}

@article{bb219412,
        AUTHOR = "Huang, L. and Zhang, C. and Zhang, H.Y.",
        TITLE = "Self-Adaptive Training: Bridging Supervised and Self-Supervised
Learning",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1362-1377",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214415"}

@inproceedings{bb219413,
        AUTHOR = "Huang, L. and You, S. and Zheng, M.K. and Wang, F. and Qian, C. and Yamasaki, T.",
        TITLE = "Learning Where to Learn in Cross-View Self-Supervised Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14431-14440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214416"}

@article{bb219414,
        AUTHOR = "Hu, Y. and Yin, D.C. and Wang, Y.W. and Chen, Z.Z. and Luo, C.",
        TITLE = "Decomposing style, content, and motion for videos",
        JOURNAL = JVCIR,
        VOLUME = "89",
        YEAR = "2022",
        PAGES = "103686",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214417"}

@article{bb219415,
        AUTHOR = "Hong, M.Y. and Zhang, X.F. and Li, G.R. and Huang, Q.M.",
        TITLE = "Fine-Grained Feature Generation for Generalized Zero-Shot Video
Classification",
        JOURNAL = IP,
        VOLUME = "32",
        YEAR = "2023",
        PAGES = "1599-1612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214418"}

@article{bb219416,
        AUTHOR = "Jin, X. and Feng, R. and Sun, S. and Feng, R. and He, T.Y. and Chen, Z.B.",
        TITLE = "Semantical video coding: Instill static-dynamic clues into structured
bitstream for AI tasks",
        JOURNAL = JVCIR,
        VOLUME = "93",
        YEAR = "2023",
        PAGES = "103816",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214419"}

@article{bb219417,
        AUTHOR = "Schiappa, M.C. and Rawat, Y.S. and Shah, M.",
        TITLE = "Self-Supervised Learning for Videos: A Survey",
        JOURNAL = Surveys,
        VOLUME = "55",
        YEAR = "2023",
        NUMBER = "13s",
        MONTH = "July",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214420"}

@article{bb219418,
        AUTHOR = "Yang, X.M. and Xiong, S. and Wu, K.W. and Shan, D.F. and Xie, Z.",
        TITLE = "Attentive spatial-temporal contrastive learning for self-supervised
video representation",
        JOURNAL = IVC,
        VOLUME = "137",
        YEAR = "2023",
        PAGES = "104765",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214421"}

@article{bb219419,
        AUTHOR = "Miao, J.X. and Wei, Y.C. and Wang, X.H. and Yang, Y.",
        TITLE = "Temporal Pixel-Level Semantic Understanding Through the VSPW Dataset",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "11297-11308",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214422"}

@article{bb219420,
        AUTHOR = "Hu, D. and Wang, Z. and Nie, F.P. and Wang, R. and Li, X.L.",
        TITLE = "Self-Supervised Learning for Heterogeneous Audiovisual Scene Analysis",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "3534-3545",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214423"}

@article{bb219421,
        AUTHOR = "Namitha, K. and Geetha, M. and Athi, N.",
        TITLE = "An Improved Interaction Estimation and Optimization Method for
Surveillance Video Synopsis",
        JOURNAL = MultMedMag,
        VOLUME = "30",
        YEAR = "2023",
        NUMBER = "3",
        MONTH = "July",
        PAGES = "25-36",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214424"}

@article{bb219422,
        AUTHOR = "Assefa, M. and Jiang, W. and Alemu, K.G. and Yilma, G. and Adhikari, D. and Ayalew, M. and Seid, A.M. and Erbad, A.",
        TITLE = "Actor-Aware Self-Supervised Learning for Semi-Supervised Video
Representation Learning",
        JOURNAL = CirSysVideo,
        VOLUME = "33",
        YEAR = "2023",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "6679-6692",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214425"}

@article{bb219423,
        AUTHOR = "Hu, Y.F. and Gao, J.Y. and Xu, C.S.",
        TITLE = "Learning Multi-Expert Distribution Calibration for Long-Tailed Video
Classification",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "555-567",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214426"}

@article{bb219424,
        AUTHOR = "Chen, Z. and Wang, H.L. and Chen, C.W.",
        TITLE = "Self-Supervised Video Representation Learning by Serial Restoration
With Elastic Complexity",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "2235-2248",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214427"}

@inproceedings{bb219425,
        AUTHOR = "Tian, Y. and Lu, G. and Zhai, G.T. and Gao, Z.Y.",
        TITLE = "Non-Semantics Suppressed Mask Learning for Unsupervised Video
Semantic Compression",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13564-13576",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214428"}

@inproceedings{bb219426,
        AUTHOR = "Li, K.C. and Wang, Y.L. and He, Y. and Li, Y.Z. and Wang, Y. and Wang, L.M. and Qiao, Y.",
        TITLE = "UniFormerV2: Unlocking the Potential of Image ViTs for Video
Understanding",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "1632-1643",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214429"}

@inproceedings{bb219427,
        AUTHOR = "Afham, M. and Shukla, S.N. and Poursaeed, O. and Zhang, P. and Shah, A. and Lim, S.",
        TITLE = "Revisiting Kernel Temporal Segmentation as an Adaptive Tokenizer for
Long-form Video Understanding",
        BOOKTITLE = REDLCV23,
        YEAR = "2023",
        PAGES = "1181-1186",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214430"}

@inproceedings{bb219428,
        AUTHOR = "Strafforello, O. and Schutte, K. and van Gemert, J.C.",
        TITLE = "Are current long-term video understanding datasets long-term?",
        BOOKTITLE = CVEU23,
        YEAR = "2023",
        PAGES = "2959-2968",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214431"}

@inproceedings{bb219429,
        AUTHOR = "Zhao, Y.C. and Luo, C. and Tang, C.X. and Chen, D.D. and Codella, N. and Zha, Z.J.",
        TITLE = "Streaming Video Model",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14602-14612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214432"}

@inproceedings{bb219430,
        AUTHOR = "Maiya, S.R. and Girish, S. and Ehrlich, M. and Wang, H.Y. and Lee, K.S. and Poirson, P. and Wu, P.X. and Wang, C. and Shrivastava, A.",
        TITLE = "NIRVANA: Neural Implicit Representations of Videos with Adaptive
Networks and Autoregressive Patch-Wise Modeling",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14378-14387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214433"}

@inproceedings{bb219431,
        AUTHOR = "Zhang, Y.T. and Bai, Y. and Liu, C. and Wang, H. and Li, S. and Fu, Y.",
        TITLE = "Frame Flexible Network",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10504-10513",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214434"}

@inproceedings{bb219432,
        AUTHOR = "Dessalene, E. and Maynord, M. and Fermuller, C. and Aloimonos, Y.F.",
        TITLE = "Therbligs in Action: Video Understanding through Motion Primitives",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10618-10626",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214435"}

@inproceedings{bb219433,
        AUTHOR = "Zhao, Y. and Misra, I. and Krahenbuhl, P. and Girdhar, R.",
        TITLE = "Learning Video Representations from Large Language Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6586-6597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214436"}

@inproceedings{bb219434,
        AUTHOR = "Wang, R. and Chen, D.D. and Wu, Z.X. and Chen, Y.P. and Dai, X. and Liu, M.C. and Yuan, L. and Jiang, Y.G.",
        TITLE = "Masked Video Distillation: Rethinking Masked Feature Modeling for
Self-supervised Video Representation Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6312-6322",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214437"}

@inproceedings{bb219435,
        AUTHOR = "Yang, X.T. and Chu, F.J. and Feiszli, M. and Goyal, R. and Torresani, L. and Tran, D.",
        TITLE = "Relational Space-Time Query in Long-Form Videos",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6398-6408",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214438"}

@inproceedings{bb219436,
        AUTHOR = "Foo, L.G. and Gong, J. and Fan, Z.P. and Liu, J.",
        TITLE = "System-Status-Aware Adaptive Network for Online Streaming Video
Understanding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "10514-10523",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214439"}

@inproceedings{bb219437,
        AUTHOR = "Dong, S. and Hu, H.Z. and Lian, D.Z. and Luo, W.X. and Qian, Y.C. and Gao, S.H.",
        TITLE = "Weakly Supervised Video Representation Learning with Unaligned Text
for Sequential Videos",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2437-2447",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214440"}

@inproceedings{bb219438,
        AUTHOR = "Wang, J. and Zhu, W.T. and Wang, P. and Yu, X. and Liu, L. and Omar, M. and Hamid, R.",
        TITLE = "Selective Structured State-Spaces for Long-Form Video Understanding",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6387-6397",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214441"}

@inproceedings{bb219439,
        AUTHOR = "Zhang, H. and Liu, D. and Zheng, Q. and Su, B.",
        TITLE = "Modeling Video as Stochastic Processes for Fine-Grained Video
Representation Learning",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2225-2234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214442"}

@inproceedings{bb219440,
        AUTHOR = "Kumar, Y. and Mishra, A.",
        TITLE = "Few-Shot Referring Relationships in Videos",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2289-2298",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214443"}

@inproceedings{bb219441,
        AUTHOR = "Harzig, P. and Einfalt, M. and Lienhart, R.",
        TITLE = "Synchronized Audio-Visual Frames with Fractional Positional Encoding
for Transformers in Video-to-Text Translation",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "2041-2045",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214444"}

@inproceedings{bb219442,
        AUTHOR = "Wiles, O. and Carreira, J. and Barr, I. and Zisserman, A. and Malinowski, M.",
        TITLE = "Compressed Vision for Efficient Video Understanding",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "VII:679-695",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214445"}

@inproceedings{bb219443,
        AUTHOR = "Rho, D. and Cho, J. and Ko, J.H. and Park, E.",
        TITLE = "Neural Residual Flow Fields for Efficient Video Representations",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "II:458-474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214446"}

@inproceedings{bb219444,
        AUTHOR = "Tian, F.R. and Fan, J.W. and Yu, X. and Du, S.Y. and Song, M. and Zhao, Y.",
        TITLE = "TCVM: Temporal Contrasting Video Montage Framework for Self-Supervised
Video Representation Learning",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "II:526-542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214447"}

@inproceedings{bb219445,
        AUTHOR = "Huang, Z.M. and Jia, C.M. and Wang, S.S. and Ma, S.W.",
        TITLE = "A Compressive Prior Guided Mask Predictive Coding Approach for Video
Analysis",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:469-484",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214448"}

@inproceedings{bb219446,
        AUTHOR = "Li, L. and Zhuang, L.S. and Gao, S.H. and Wang, S.",
        TITLE = "Havit: Hybrid-attention Based Vision Transformer for Video
Classification",
        BOOKTITLE = ACCV22,
        YEAR = "2022",
        PAGES = "IV:502-517",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214449"}

@inproceedings{bb219447,
        AUTHOR = "Zhang, H.L. and Pirsiavash, H. and Liu, X.",
        TITLE = "MASTAF: A Model-Agnostic Spatio-Temporal Attention Fusion Network for
Few-shot Video Classification",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2507-2516",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214450"}

@inproceedings{bb219448,
        AUTHOR = "Senocak, A. and Kim, J. and Oh, T.H. and Li, D.Z. and Kweon, I.S.",
        TITLE = "Event-Specific Audio-Visual Fusion Layers:
A Simple and New Perspective on Video Understanding",
        BOOKTITLE = WACV23,
        YEAR = "2023",
        PAGES = "2236-2246",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214451"}

@inproceedings{bb219449,
        AUTHOR = "Xia, B.Y. and Wu, W.H. and Wang, H.R. and Su, R. and He, D.L. and Yang, H. and Fan, X.R. and Ouyang, W.L.",
        TITLE = "NSNet: Non-saliency Suppression Sampler for Efficient Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:705-723",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214452"}

@inproceedings{bb219450,
        AUTHOR = "Xia, B.Y. and Wang, Z.H. and Wu, W.H. and Wang, H.R. and Han, J.G.",
        TITLE = "Temporal Saliency Query Network for Efficient Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:741-759",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214453"}

@inproceedings{bb219451,
        AUTHOR = "Islam, M.M. and Bertasius, G.",
        TITLE = "Long Movie Clip Classification with State-Space Video Models",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:87-104",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214454"}

@inproceedings{bb219452,
        AUTHOR = "Habibian, A. and Yahia, H.B. and Abati, D. and Gavves, E. and Porikli, F.M.",
        TITLE = "Delta Distillation for Efficient Video Processing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:213-229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214455"}

@inproceedings{bb219453,
        AUTHOR = "Li, Z.Z. and Wang, M.M. and Pi, H.J. and Xu, K. and Mei, J.B. and Liu, Y.",
        TITLE = "E-NeRV: Expedite Neural Video Representation with Disentangled
Spatial-Temporal Context",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:267-284",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214456"}

@inproceedings{bb219454,
        AUTHOR = "Kosman, E. and di Castro, D.",
        TITLE = "GraphVid: It only Takes a Few Nodes to Understand a Video",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:195-212",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214457"}

@inproceedings{bb219455,
        AUTHOR = "Ju, C. and Han, T. and Zheng, K. and Zhang, Y. and Xie, W.",
        TITLE = "Prompting Visual-Language Models for Efficient Video Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXV:105-124",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214458"}

@inproceedings{bb219456,
        AUTHOR = "Liang, S.X. and Shen, X. and Huang, J.Q. and Hua, X.S.",
        TITLE = "Delving into Details: Synopsis-to-Detail Networks for Video Recognition",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "IV:262-278",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214459"}

@inproceedings{bb219457,
        AUTHOR = "Ur Rehman, Y.A. and Gao, Y. and Shen, J.J. and de Gusmao, P.P.B. and Lane, N.",
        TITLE = "Federated Self-supervised Learning for Video Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXI:506-522",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214460"}

@inproceedings{bb219458,
        AUTHOR = "Dadashzadeh, A. and Whone, A. and Mirmehdi, M.",
        TITLE = "Auxiliary Learning for Self-Supervised Video Representation via
Similarity-based Knowledge Distillation",
        BOOKTITLE = L3D-IVU22,
        YEAR = "2022",
        PAGES = "4230-4239",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214461"}

@inproceedings{bb219459,
        AUTHOR = "Li, Y. and Vasconcelos, N.M.",
        TITLE = "Improving Video Model Transfer with Dynamic Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19258-19269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214462"}

@inproceedings{bb219460,
        AUTHOR = "Guo, S. and Xiong, Z. and Zhong, Y.J. and Wang, L.M. and Guo, X.B. and Han, B. and Huang, W.L.",
        TITLE = "Cross-Architecture Self-supervised Video Representation Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19248-19257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214463"}

@inproceedings{bb219461,
        AUTHOR = "Xu, X.Y. and Li, Y.L. and Lu, C.",
        TITLE = "Learning to Anticipate Future with Dynamic Context Removal",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "12724-12734",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214464"}

@inproceedings{bb219462,
        AUTHOR = "Gadre, S.Y. and Ehsani, K. and Song, S. and Mottaghi, R.",
        TITLE = "Continuous Scene Representations for Embodied AI",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14829-14839",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214465"}

@inproceedings{bb219463,
        AUTHOR = "Liang, C. and Wang, W.G. and Zhou, T.F. and Yang, Y.",
        TITLE = "Visual Abductive Reasoning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "15544-15554",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214466"}

@inproceedings{bb219464,
        AUTHOR = "Kinfu, K.A. and Vidal, R.",
        TITLE = "Analysis and Extensions of Adversarial Training for Video
Classification",
        BOOKTITLE = RoSe22,
        YEAR = "2022",
        PAGES = "3415-3424",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214467"}

@inproceedings{bb219465,
        AUTHOR = "Xiao, F. and Kundu, K. and Tighe, J. and Modolo, D.",
        TITLE = "Hierarchical Self-supervised Representation Learning for Movie
Understanding",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "9717-9726",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214468"}

@inproceedings{bb219466,
        AUTHOR = "Li, L.L. and Zhou, T.F. and Wang, W.G. and Yang, L. and Li, J.W. and Yang, Y.",
        TITLE = "Locality-Aware Inter-and Intra-Video Reconstruction for
Self-Supervised Correspondence Learning",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8709-8720",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214469"}

@inproceedings{bb219467,
        AUTHOR = "Jiang, Y.F. and Gong, X.Y. and Wu, J. and Shi, H. and Yan, Z.C. and Wang, Z.Y.",
        TITLE = "Auto-X3D: Ultra-Efficient Video Understanding via Finer-Grained
Neural Architecture Search",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2354-2363",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214470"}

@inproceedings{bb219468,
        AUTHOR = "Chen, N.L. and Chu, L. and Pan, H. and Lu, Y. and Wang, W.P.",
        TITLE = "Self-Supervised Image Representation Learning with Geometric Set
Consistency",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19270-19280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214471"}

@inproceedings{bb219469,
        AUTHOR = "Lin, Y.Z. and Guo, X. and Lu, Y.",
        TITLE = "Self-Supervised Video Representation Learning with Meta-Contrastive
Network",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "8219-8229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214472"}

@inproceedings{bb219470,
        AUTHOR = "Guo, X.D. and Guo, X. and Lu, Y.",
        TITLE = "SSAN: Separable Self-Attention Network for Video Representation
Learning",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12613-12622",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214473"}

@inproceedings{bb219471,
        AUTHOR = "Yang, X.T. and Fan, H.Q. and Torresani, L. and Davis, L.S. and Wang, H.",
        TITLE = "Beyond Short Clips:
End-to-End Video-Level Learning with Collaborative Memories",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "7563-7572",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214474"}

@inproceedings{bb219472,
        AUTHOR = "Wu, C.Y. and Krahenbuhl, P.",
        TITLE = "Towards Long-Form Video Understanding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "1884-1894",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214475"}

@inproceedings{bb219473,
        AUTHOR = "Zhang, C.H. and Gupta, A. and Zisserman, A.",
        TITLE = "Temporal Query Networks for Fine-grained Video Understanding",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "4484-4494",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214476"}

@inproceedings{bb219474,
        AUTHOR = "Kangaspunta, J. and Piergiovanni, A. and Jonschkowski, R. and Ryoo, M. and Angelova, A.",
        TITLE = "Adaptive Intermediate Representations for Video Understanding",
        BOOKTITLE = MULA21,
        YEAR = "2021",
        PAGES = "1602-1612",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214477"}

@inproceedings{bb219475,
        AUTHOR = "Duan, H.D. and Zhao, Y. and Xiong, Y.J. and Liu, W.T. and Lin, D.",
        TITLE = "Omni-sourced Webly-supervised Learning for Video Recognition",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XV:670-688",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214478"}

@inproceedings{bb219476,
        AUTHOR = "Jha, A. and Kumar, A. and Pande, S. and Banerjee, B. and Chaudhuri, S.",
        TITLE = "MT-UNET: A Novel U-Net Based Multi-Task Architecture For Visual Scene
Understanding",
        BOOKTITLE = ICIP20,
        YEAR = "2020",
        PAGES = "2191-2195",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214479"}

@inproceedings{bb219477,
        AUTHOR = "Diba, A. and Fayyaz, M. and Sharma, V. and Paluri, M. and Gall, J. and Stiefelhagen, R. and Van Gool, L.J.",
        TITLE = "Large Scale Holistic Video Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "V:593-610",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214480"}

@inproceedings{bb219478,
        AUTHOR = "Voigtlaender, P. and Changpinyo, S. and Pont Tuset, J. and Soricut, R. and Ferrari, V.",
        TITLE = "Connecting Vision and Language with Video Localized Narratives",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "2461-2471",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214481"}

@inproceedings{bb219479,
        AUTHOR = "Pont Tuset, J. and Uijlings, J. and Changpinyo, S. and Soricut, R. and Ferrari, V.",
        TITLE = "Connecting Vision and Language with Localized Narratives",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "V:647-664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214482"}

@inproceedings{bb219480,
        AUTHOR = "Hu, A. and Cotter, F. and Mohan, N. and Gurau, C. and Kendall, A.",
        TITLE = "Probabilistic Future Prediction for Video Scene Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVI: 767-785",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214483"}

@inproceedings{bb219481,
        AUTHOR = "Mavroudi, E. and Haro, B.B. and Vidal, R.",
        TITLE = "Representation Learning on Visual-Symbolic Graphs for Video
Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XXIX: 71-90",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214484"}

@inproceedings{bb219482,
        AUTHOR = "Sener, F. and Singhania, D. and Yao, A.",
        TITLE = "Temporal Aggregate Representations for Long-range Video Understanding",
        BOOKTITLE = ECCV20,
        YEAR = "2020",
        PAGES = "XVI: 154-171",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214485"}

@inproceedings{bb219483,
        AUTHOR = "Tosi, F. and Aleotti, F. and Ramirez, P.Z. and Poggi, M. and Salti, S. and di Stefano, L. and Mattoccia, S.",
        TITLE = "Distilled Semantics for Comprehensive Scene Understanding from Videos",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "4653-4664",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214486"}

@inproceedings{bb219484,
        AUTHOR = "Piergiovanni, A.J. and Angelova, A. and Ryoo, M.S.",
        TITLE = "Evolving Losses for Unsupervised Video Representation Learning",
        BOOKTITLE = CVPR20,
        YEAR = "2020",
        PAGES = "130-139",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214487"}

@inproceedings{bb219485,
        AUTHOR = "Xiong, Y. and Huang, Q. and Guo, L. and Zhou, H. and Zhou, B. and Lin, D.",
        TITLE = "A Graph-Based Framework to Bridge Movies and Synopses",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "4591-4600",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214488"}

@inproceedings{bb219486,
        AUTHOR = "Kanehira, A. and Takemoto, K. and Inayoshi, S. and Harada, T.",
        TITLE = "Multimodal Explanations by Predicting Counterfactuality in Videos",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8586-8594",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214489"}

@inproceedings{bb219487,
        AUTHOR = "Kanehira, A. and Harada, T.",
        TITLE = "Learning to Explain With Complemental Examples",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "8595-8603",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214490"}

@inproceedings{bb219488,
        AUTHOR = "Zhou, L. and Kalantidis, Y. and Chen, X.L. and Corso, J.J. and Rohrbach, M.",
        TITLE = "Grounded Video Description",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "6571-6580",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214491"}

@inproceedings{bb219489,
        AUTHOR = "Liu, X.Y. and Lee, J.Y. and Jin, H.L.",
        TITLE = "Learning Video Representations From Correspondence Proposals",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "4268-4276",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214492"}

@inproceedings{bb219490,
        AUTHOR = "Xiong, B. and Kalantidis, Y. and Ghadiyaram, D. and Grauman, K.",
        TITLE = "Less Is More: Learning Highlight Detection From Video Duration",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "1258-1267",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214493"}

@inproceedings{bb219491,
        AUTHOR = "Zhang, D. and Dai, X. and Wang, X. and Wang, Y.F. and Davis, L.S.",
        TITLE = "MAN: Moment Alignment Network for Natural Language Moment Retrieval via
Iterative Graph Adjustment",
        BOOKTITLE = CVPR19,
        YEAR = "2019",
        PAGES = "1247-1257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214494"}

@inproceedings{bb219492,
        AUTHOR = "Fan, L. and Huang, W. and Gan, C. and Ermon, S. and Gong, B. and Huang, J.",
        TITLE = "End-to-End Learning of Motion Representation for Video Understanding",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "6016-6025",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214495"}

@inproceedings{bb219493,
        AUTHOR = "Huang, D. and Ramanathan, V. and Mahajan, D. and Torresani, L. and Paluri, M. and Fei Fei, L. and Niebles, J.C.",
        TITLE = "What Makes a Video a Video: Analyzing Temporal Information in Video
Understanding Models and Datasets",
        BOOKTITLE = CVPR18,
        YEAR = "2018",
        PAGES = "7366-7375",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214496"}

@inproceedings{bb219494,
        AUTHOR = "Mahdisoltani, F. and Memisevic, R. and Fleet, D.J.",
        TITLE = "Hierarchical Video Understanding",
        BOOKTITLE = WiCV-E18,
        YEAR = "2018",
        PAGES = "IV:659-663",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214497"}

@inproceedings{bb219495,
        AUTHOR = "Shin, K.S. and Jeon, J. and Lee, S. and Lim, B. and Jeong, M.S. and Nang, J.",
        TITLE = "Approach for Video Classification with Multi-label on YouTube-8M
Dataset",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:317-324",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214498"}

@inproceedings{bb219496,
        AUTHOR = "Skalic, M. and Austin, D.",
        TITLE = "Building A Size Constrained Predictive Models for Video Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:297-305",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214499"}

@inproceedings{bb219497,
        AUTHOR = "Garg, S.",
        TITLE = "Learning Video Features for Multi-label Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:325-337",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214500"}

@inproceedings{bb219498,
        AUTHOR = "Cho, C. and Antin, B. and Arora, S. and Ashrafi, S. and Duan, P. and Huynh, D.T. and James, L. and Nguyen, H.T. and Solgi, M. and Than, C.V.",
        TITLE = "Large-Scale Video Classification with Feature Space Augmentation
Coupled with Learned Label Relations and Ensembling",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:338-346",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214501"}

@inproceedings{bb219499,
        AUTHOR = "Lin, R.C. and Xiao, J. and Fan, J.P.",
        TITLE = "NeXtVLAD: An Efficient Neural Network to Aggregate Frame-Level Features
for Large-Scale Video Classification",
        BOOKTITLE = Large-Scale18,
        YEAR = "2018",
        PAGES = "IV:206-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/applicat825vu1.html#TT214502"}

Last update:Apr 18, 2024 at 11:38:49