@inproceedings{bb102600,
AUTHOR = "Zhang, H. and Gao, T.W. and Shao, J. and Wu, Z.X.",
TITLE = "BlockDance: Reuse Structurally Similar Spatio-Temporal Features to
Accelerate Diffusion Transformers",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "12891-12900",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99349"}
@inproceedings{bb102601,
AUTHOR = "Wang, Y.Q. and Ren, S. and Lin, Z.J. and Han, Y.J. and Guo, H.Y. and Yang, Z. and Zou, D. and Feng, J.S. and Liu, X.H.",
TITLE = "Parallelized Autoregressive Visual Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "12955-12965",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99350"}
@inproceedings{bb102602,
AUTHOR = "Lee, Y.C. and Lu, E. and Rumbley, S. and Geyer, M. and Huang, J.B. and Dekel, T. and Cole, F.",
TITLE = "Generative Omnimatte: Learning to Decompose Video into Layers",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "12522-12532",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99351"}
@inproceedings{bb102603,
AUTHOR = "Wang, L. and Li, S.M. and Yang, F. and Wang, J. and Zhang, Z.H. and Liu, Y.H. and Wang, Y.X. and Yang, J.",
TITLE = "Not All Parameters Matter: Masking Diffusion Models for Enhancing
Generation Ability",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "12880-12890",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99352"}
@inproceedings{bb102604,
AUTHOR = "Ge, Y.Y. and Li, Y.Z. and Ge, Y.X. and Shan, Y.",
TITLE = "Divot: Diffusion Powers Video Tokenizer for Comprehension and
Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13606-13617",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99353"}
@inproceedings{bb102605,
AUTHOR = "Pallotta, E. and Azar, S.M. and Li, S. and Zatsarynna, O. and Gall, J.",
TITLE = "SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13787-13797",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99354"}
@inproceedings{bb102606,
AUTHOR = "Gu, Y.C. and Zhou, Y. and Ye, Y.F. and Nie, Y.X. and Yu, L.C. and Ma, P. and Lin, K.Q. and Shou, M.Z.",
TITLE = "ROICtrl: Boosting Instance Control for Visual Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "23658-23667",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99355"}
@inproceedings{bb102607,
AUTHOR = "Wu, Z. and Siarohin, A. and Menapace, W. and Skorokhodov, I. and Fang, Y.W. and Chordia, V. and Gilitschenski, I. and Tulyakov, S.",
TITLE = "Mind the Time: Temporally-Controlled Multi-Event Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "23989-24000",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99356"}
@inproceedings{bb102608,
AUTHOR = "Zhou, D.Y. and Sun, Q. and Peng, Y. and Yan, K. and Dong, R. and Wang, D. and Ge, Z. and Duan, N. and Zhang, X.Y.",
TITLE = "Taming Teacher Forcing for Masked Autoregressive Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7374-7384",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99357"}
@inproceedings{bb102609,
AUTHOR = "Gao, M. and Pan, Y. and Gao, H.A. and Zhang, Z.Z. and Li, W. and Dong, H. and Tang, H. and Yi, L. and Zhao, H.",
TITLE = "PartRM: Modeling Part-Level Dynamics with Large Cross-State
Reconstruction Model",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7004-7014",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99358"}
@inproceedings{bb102610,
AUTHOR = "Zhang, Q.H. and Zhai, S.F. and Martin, M.A.B. and Miao, K. and Toshev, A. and Susskind, J. and Gu, J.",
TITLE = "World-consistent Video Diffusion with Explicit 3D Modeling",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "21685-21695",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99359"}
@inproceedings{bb102611,
AUTHOR = "Ji, L.B. and Zhong, L. and Wei, P.F. and Li, C.J.",
TITLE = "PoseTraj: Pose-Aware Trajectory Control in Video Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "22776-22785",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99360"}
@inproceedings{bb102612,
AUTHOR = "Shao, J.H. and Yang, Y.B. and Zhou, H.Y. and Zhang, Y.M. and Shen, Y.J. and Guizilini, V. and Wang, Y. and Poggi, M. and Liao, Y.",
TITLE = "Learning Temporally Consistent Video Depth from Video Diffusion
Priors",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "22841-22852",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99361"}
@inproceedings{bb102613,
AUTHOR = "Li, Z.L. and Rahmani, H. and Ke, Q.H. and Liu, J.",
TITLE = "LongDiff: Training-Free Long Video Generation in One Go",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "17789-17798",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99362"}
@inproceedings{bb102614,
AUTHOR = "Li, Z.J. and Lin, B. and Ye, Y. and Chen, L. and Cheng, X.H. and Yuan, S.H. and Yuan, L.",
TITLE = "WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent
Video Diffusion Model",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "17778-17788",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99363"}
@inproceedings{bb102615,
AUTHOR = "Li, J. and Liu, C.K. and Wu, J.J.",
TITLE = "Lifting Motion to the 3D World via 2D Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "17518-17528",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99364"}
@inproceedings{bb102616,
AUTHOR = "Wang, C.Y. and Zhuang, P. and Ngo, T.D. and Menapace, W. and Siarohin, A. and Vasilkovsky, M. and Skorokhodov, I. and Tulyakov, S. and Wonka, P. and Lee, H.Y.",
TITLE = "4Real-Video: Learning Generalizable Photo-Realistic 4D Video
Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "17723-17732",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99365"}
@inproceedings{bb102617,
AUTHOR = "Akkerman, R. and Feng, H. and Black, M.J. and Tzionas, D. and Abrevaya, V.F.",
TITLE = "InterDyn: Controllable Interactive Dynamics with Video Diffusion
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "12467-12479",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99366"}
@inproceedings{bb102618,
AUTHOR = "Hyung, J. and Kim, K. and Hong, S. and Kim, M.J. and Choo, J.",
TITLE = "Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "11006-11015",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99367"}
@inproceedings{bb102619,
AUTHOR = "Liu, Z. and Ye, W. and Lu Ximon, Y. and Wan, P.F. and Zhang, D.",
TITLE = "Unleashing the Potential of Multi-modal Foundation Models and Video
Diffusion for 4D Dynamic Physical Scene Simulation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "11016-11025",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99368"}
@inproceedings{bb102620,
AUTHOR = "Liu, F. and Zhang, S.W. and Wang, X.F. and Wei, Y.J. and Qiu, H. and Zhao, Y.Z. and Zhang, Y.Y. and Ye, Q.X. and Wan, F.",
TITLE = "Timestep Embedding Tells: It's Time to Cache for Video Diffusion
Model",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7353-7363",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99369"}
@inproceedings{bb102621,
AUTHOR = "Sun, M.Z. and Wang, W. and Li, G. and Liu, J.W. and Sun, J.H. and Feng, W.Q. and Lao, S.S. and Zhou, S. and He, Q. and Liu, J.",
TITLE = "AR-Diffusion: Asynchronous Video Generation with Auto-Regressive
Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7364-7373",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99370"}
@inproceedings{bb102622,
AUTHOR = "Jeong, H. and Huang, C.H.P. and Ye, J.C. and Mitra, N.J. and Ceylan, D.",
TITLE = "Track4Gen: Teaching Video Diffusion Models to Track Points Improves
Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7276-7287",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99371"}
@inproceedings{bb102623,
AUTHOR = "Ma, X. and Wang, Y.H. and Jia, G. and Chen, X.Y. and Wong, T.T. and Li, Y.F. and Chen, C.",
TITLE = "Consistent and Controllable Image Animation with Motion Diffusion
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "7288-7298",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99372"}
@inproceedings{bb102624,
AUTHOR = "Wu, S. and Xu, C.R. and Huang, B.B. and Geiger, A. and Chen, A.",
TITLE = "GenFusion: Closing the Loop between Reconstruction and Generation via
Videos",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "6078-6088",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99373"}
@inproceedings{bb102625,
AUTHOR = "Liu, J.X. and Lin, S. and Li, Y.X. and Yang, M.H.",
TITLE = "DynamicScaler: Seamless and Scalable Video Generation for Panoramic
Scenes",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "6144-6153",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99374"}
@inproceedings{bb102626,
AUTHOR = "Yan, X. and Cai, Y.X. and Wang, Q.Y. and Zhou, Y. and Huang, W.H. and Yang, H.",
TITLE = "Long Video Diffusion Generation with Segmented Cross-Attention and
Content-Rich Video Data Curation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3184-3194",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99375"}
@inproceedings{bb102627,
AUTHOR = "Wang, H.Y. and Liu, F. and Chi, J.W. and Duan, Y.",
TITLE = "VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in
One Step",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "16475-16485",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99376"}
@inproceedings{bb102628,
AUTHOR = "Burgert, R. and Xu, Y.C. and Xian, W.Q. and Pilarski, O. and Clausen, P. and He, M.M. and Ma, L. and Deng, Y.T. and Li, L.X. and Mousavi, M. and Ryoo, M. and Debevec, P. and Yu, N.",
TITLE = "Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using
Real-Time Warped Noise",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13-23",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99377"}
@inproceedings{bb102629,
AUTHOR = "Lu, R.J. and Chen, Y.X. and Ni, J.F. and Jia, B.X. and Liu, Y. and Wan, D. and Zeng, G. and Huang, S.Y.",
TITLE = "MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "26767-26778",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99378"}
@inproceedings{bb102630,
AUTHOR = "Yang, L. and Qi, L. and Li, X.T. and Li, S. and Jampani, V. and Yang, M.H.",
TITLE = "Unified Dense Prediction of Video Diffusion",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "28963-28973",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99379"}
@inproceedings{bb102631,
AUTHOR = "Wu, R. and Gao, R.Q. and Poole, B. and Trevithick, A. and Zheng, C.X. and Barron, J.T. and Holynski, A.",
TITLE = "CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "26057-26068",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99380"}
@inproceedings{bb102632,
AUTHOR = "Tan, J.T. and Yu, H. and Huang, J. and Xiao, J. and Zhao, F.",
TITLE = "FreePCA: Integrating Consistency Information across Long-short Frames
in Training-free Long Video Generation via Principal Component
Analysis",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "27979-27988",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99381"}
@inproceedings{bb102633,
AUTHOR = "Zhai, S. and Ye, Z.C. and Liu, J.L. and Xie, W.J. and Hu, J.Q. and Peng, Z. and Xue, H. and Chen, D. and Wang, X.M. and Yang, L. and Wang, N. and Liu, H.M. and Zhang, G.F.",
TITLE = "StarGen: A Spatiotemporal Autoregression Framework with Video
Diffusion Model for Scalable and Controllable Scene Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "26822-26833",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99382"}
@inproceedings{bb102634,
AUTHOR = "Soucek, T. and Gatti, P. and Wray, M. and Laptev, I. and Damen, D. and Sivic, J.",
TITLE = "ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual
Instructions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "27435-27445",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99383"}
@inproceedings{bb102635,
AUTHOR = "Cai, S.Q. and Chan, E.R. and Zhang, Y.Z. and Guibas, L.J. and Wu, J.J. and Wetzstein, G.",
TITLE = "Diffusion Self-Distillation for Zero-Shot Customized Image Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18434-18443",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99384"}
@inproceedings{bb102636,
AUTHOR = "Yin, T.W. and Zhang, Q. and Zhang, R. and Freeman, W.T. and Durand, F. and Shechtman, E. and Huang, X.",
TITLE = "From Slow Bidirectional to Fast Autoregressive Video Diffusion Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "22963-22974",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99385"}
@inproceedings{bb102637,
AUTHOR = "Zhang, Z.H. and Liao, J.C. and Li, M.H. and Dai, Z. and Qiu, B.X. and Zhu, S. and Qin, L. and Wang, W.Z.",
TITLE = "TORA: Trajectory-oriented Diffusion Transformer for Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "2063-2073",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99386"}
@inproceedings{bb102638,
AUTHOR = "Jin, W. and Dai, Q. and Luo, C. and Baek, S.H. and Cho, S.",
TITLE = "FloVD: Optical Flow Meets Video Diffusion Model for Enhanced
Camera-Controlled Video Synthesis",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "2040-2049",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99387"}
@inproceedings{bb102639,
AUTHOR = "Wu, X. and Singer, U. and Lin, Z.J. and Madotto, A. and Xia, X. and Xu, Y.F. and Crook, P. and Dong, X.L. and Moon, S.",
TITLE = "Corgi: Cached Memory Guided Video Generation",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "4585-4594",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99388"}
@inproceedings{bb102640,
AUTHOR = "Rahman, A. and Perera, M.V. and Patel, V.M.",
TITLE = "Frame by Familiar Frame: Understanding Replication in Video Diffusion
Models",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "2766-2776",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99389"}
@inproceedings{bb102641,
AUTHOR = "Li, P.X. and Chen, K. and Liu, Z. and Gao, R. and Hong, L.Q. and Yeung, D. .Y. and Lu, H.C. and Jia, X.",
TITLE = "TrackDiffusion: Tracklet-Conditioned Video Generation via Diffusion
Models",
BOOKTITLE = WACV25,
YEAR = "2025",
PAGES = "3539-3548",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99390"}
@inproceedings{bb102642,
AUTHOR = "Yang, J.W. and Sun, J.M. and Yang, Y.L. and Yang, J. and Shan, Y. and Cao, Y.P. and Gao, L.",
TITLE = "DMiT: Deformable Mipmapped Tri-plane Representation for Dynamic Scenes",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LV: 436-453",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99391"}
@inproceedings{bb102643,
AUTHOR = "Zatsarynna, O. and Bahrami, E. and Farha, Y.A. and Francesca, G. and Gall, J.",
TITLE = "Gated Temporal Diffusion for Stochastic Long-term Dense Anticipation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LV: 454-472",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99392"}
@inproceedings{bb102644,
AUTHOR = "Hong, F.Z. and Guzov, V. and Kim, H.J. and Ye, Y.T. and Newcombe, R. and Liu, Z.W. and Ma, L.",
TITLE = "EgoLM: Multi-Modal Language Model of Egocentric Motions",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "5344-5354",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99393"}
@inproceedings{bb102645,
AUTHOR = "Zhang, M.Y. and Jin, D. and Gu, C.Y. and Hong, F.Z. and Cai, Z.A. and Huang, J.F. and Zhang, C.Z. and Guo, X.Y. and Yang, L. and He, Y. and Liu, Z.W.",
TITLE = "Large Motion Model for Unified Multi-modal Motion Generation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIII: 397-421",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99394"}
@inproceedings{bb102646,
AUTHOR = "Lu, J.C. and Huang, Z. and Yang, Z. and Zhang, J.H. and Zhang, L.",
TITLE = "Wovogen: World Volume-aware Diffusion for Controllable Multi-camera
Driving Scene Generation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXX: 329-345",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99395"}
@inproceedings{bb102647,
AUTHOR = "Sampieri, A. and Palma, A. and Spinelli, I. and Galasso, F.",
TITLE = "Length-aware Motion Synthesis via Latent Diffusion",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LIII: 107-124",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99396"}
@inproceedings{bb102648,
AUTHOR = "Zhu, L. and Zheng, Y.L. and Zhang, Y.J. and Wang, X. and Wang, L.Z. and Huang, H.",
TITLE = "Temporal Residual Guided Diffusion Framework for Event-driven Video
Reconstruction",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XL: 411-427",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99397"}
@inproceedings{bb102649,
AUTHOR = "Jin, P. and Li, H. and Cheng, Z. and Li, K. and Yu, R. and Liu, C. and Ji, X.Y. and Yuan, L. and Chen, J.",
TITLE = "Local Action-guided Motion Diffusion Model for Text-to-motion
Generation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XXV: 392-409",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99398"}
@inproceedings{bb102650,
AUTHOR = "Chi, S.G. and Chi, H.G. and Ma, H. and Agarwal, N. and Siddiqui, F. and Ramani, K. and Lee, K.",
TITLE = "M2d2m: Multi-Motion Generation from Text with Discrete Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIV: 18-36",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99399"}
@inproceedings{bb102651,
AUTHOR = "Kara, O. and Singh, K.K. and Liu, F. and Ceylan, D. and Rehg, J.M. and Hinz, T.",
TITLE = "ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion
Models",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "28405-28415",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99400"}
@inproceedings{bb102652,
AUTHOR = "Cai, S.Q. and Ceylan, D. and Gadelha, M. and Huang, C.H.P. and Wang, T.F.Y. and Wetzstein, G.",
TITLE = "Generative Rendering: Controllable 4D-Guided Video Generation with 2D
Diffusion Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7611-7620",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99401"}
@inproceedings{bb102653,
AUTHOR = "Zhao, Y.P. and Zhang, P. and Wang, C. and Lam, E.Y.",
TITLE = "Controllable Unsupervised Event-Based Video Generation",
BOOKTITLE = ICIP24,
YEAR = "2024",
PAGES = "2278-2284",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99402"}
@inproceedings{bb102654,
AUTHOR = "Qazi, T. and Lall, B.",
TITLE = "Thermal Videodiff (TVD):
A Diffusion Architecture for Thermal Video Synthesis",
BOOKTITLE = ICIP24,
YEAR = "2024",
PAGES = "2438-2444",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99403"}
@inproceedings{bb102655,
AUTHOR = "Zhang, K.W. and Zhou, Y.F. and Xu, X.D. and Dai, B. and Pan, X.G.",
TITLE = "DiffMorpher: Unleashing the Capability of Diffusion Models for Image
Morphing",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7912-7921",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99404"}
@inproceedings{bb102656,
AUTHOR = "Chen, H.X. and Zhang, Y. and Cun, X.D. and Xia, M.H. and Wang, X.T. and Weng, C. and Shan, Y.",
TITLE = "VideoCrafter2: Overcoming Data Limitations for High-Quality Video
Diffusion Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7310-7320",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99405"}
@inproceedings{bb102657,
AUTHOR = "Ren, Y.X. and Zhou, Y. and Yang, J. and Shi, J. and Liu, D. and Liu, F. and Kwon, M. and Shrivastava, A.",
TITLE = "Customize-a-video: One-shot Motion Customization of Text-to-video
Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXIX: 332-349",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99406"}
@inproceedings{bb102658,
AUTHOR = "Zhang, J.T. and Liu, Y.H. and Tai, Y.W. and Tang, C.K.",
TITLE = "C3Net: Compound Conditioned ControlNet for Multimodal Content
Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "26876-26885",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99407"}
@inproceedings{bb102659,
AUTHOR = "Li, Z.Q. and Tucker, R. and Snavely, N. and Holynski, A.",
TITLE = "Generative Image Dynamics",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "24142-24153",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99408"}
@inproceedings{bb102660,
AUTHOR = "Zhuang, S. and Li, K. and Chen, X.Y. and Wang, Y.H. and Liu, Z.W. and Qiao, Y. and Wang, Y.",
TITLE = "Vlogger: Make Your Dream A Vlog",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "8806-8817",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99409"}
@inproceedings{bb102661,
AUTHOR = "Zeng, Y. and Wei, G.Q. and Zheng, J. and Zou, J.X. and Wei, Y. and Zhang, Y.C. and Li, H.",
TITLE = "Make Pixels Dance: High-Dynamic Video Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "8850-8860",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99410"}
@inproceedings{bb102662,
AUTHOR = "Zhang, Z.C. and Hu, J. and Cheng, W.T. and Paudel, D. and Yang, J.F.",
TITLE = "ExtDM: Distribution Extrapolation Diffusion Model for Video
Prediction",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "19310-19320",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99411"}
@inproceedings{bb102663,
AUTHOR = "Skorokhodov, I. and Menapace, W. and Siarohin, A. and Tulyakov, S.",
TITLE = "Hierarchical Patch Diffusion Models for High-Resolution Video
Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7569-7579",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99412"}
@inproceedings{bb102664,
AUTHOR = "Jiang, Y.M. and Wu, T.X. and Yang, S. and Si, C.Y. and Lin, D. and Qiao, Y. and Loy, C.C. and Liu, Z.W.",
TITLE = "VideoBooth: Diffusion-based Video Generation with Image Prompts",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6689-6700",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99413"}
@inproceedings{bb102665,
AUTHOR = "Wei, Y.J. and Zhang, S.W. and Qing, Z.W. and Yuan, H.J. and Liu, Z.H. and Liu, Y. and Zhang, Y.Y. and Zhou, J.R. and Shan, H.M.",
TITLE = "Dream Video: Composing Your Dream Videos with Customized Subject and
Motion",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6537-6549",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99414"}
@inproceedings{bb102666,
AUTHOR = "Gal, R. and Vinker, Y. and Alaluf, Y. and Bermano, A. and Cohen Or, D. and Shamir, A. and Chechik, G.",
TITLE = "Breathing Life Into Sketches Using Text-to-Video Priors",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "4325-4336",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99415"}
@inproceedings{bb102667,
AUTHOR = "Jain, Y. and Nasery, A. and Vineet, V. and Behl, H.",
TITLE = "Peekaboo: Interactive Video Generation via Masked-Diffusion",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "8079-8088",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99416"}
@inproceedings{bb102668,
AUTHOR = "Yatim, D. and Fridman, R. and Bar Tal, O. and Kasten, Y. and Dekel, T.",
TITLE = "Space-Time Diffusion Features for Zero-Shot Text-Driven Motion
Transfer",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "8466-8476",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99417"}
@inproceedings{bb102669,
AUTHOR = "Chen, S. and Xu, M.M. and Ren, J.W. and Cong, Y. and He, S. and Xie, Y.P. and Sinha, A. and Luo, P. and Xiang, T. and Perez Rua, J.M.",
TITLE = "GenTron: Diffusion Transformers for Image and Video Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6441-6451",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99418"}
@inproceedings{bb102670,
AUTHOR = "Lee, T. and Kwon, S. and Kim, T.",
TITLE = "Grid Diffusion Models for Text-to-Video Generation",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "8734-8743",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99419"}
@inproceedings{bb102671,
AUTHOR = "Eldesokey, A. and Wonka, P.",
TITLE = "LatentMan: Generating Consistent Animated Characters using Image
Diffusion Models",
BOOKTITLE = GCV24,
YEAR = "2024",
PAGES = "7510-7519",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99420"}
@inproceedings{bb102672,
AUTHOR = "Yuan, X. and Baek, J. and Xu, K. and Tov, O. and Fei, H.L.",
TITLE = "Inflation with Diffusion: Efficient Temporal Adaptation for
Text-to-Video Super-Resolution",
BOOKTITLE = VAQuality24,
YEAR = "2024",
PAGES = "489-496",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99421"}
@inproceedings{bb102673,
AUTHOR = "Wu, J.Z.J. and Ge, Y.X. and Wang, X.T. and Lei, S.W.X. and Gu, Y.C. and Shi, Y.F. and Hsu, W. and Shan, Y. and Qie, X.H. and Shou, M.Z.",
TITLE = "Tune-A-Video: One-Shot Tuning of Image Diffusion Models for
Text-to-Video Generation",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "7589-7599",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99422"}
@inproceedings{bb102674,
AUTHOR = "Zhu, Y.Z. and Liu, X.C. and Liu, Q.",
TITLE = "Slimflow: Training Smaller One-step Diffusion Models with Rectified
Flow",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXXII: 342-359",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99423"}
@inproceedings{bb102675,
AUTHOR = "Zhong, L. and Xie, Y.M. and Jampani, V. and Sun, D.Q. and Jiang, H.",
TITLE = "Smoodi: Stylized Motion Diffusion Model",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "I: 405-421",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99424"}
@inproceedings{bb102676,
AUTHOR = "Zhou, W.Y. and Dou, Z.Y. and Cao, Z. and Liao, Z.Y.C. and Wang, J.B. and Wang, W.J. and Liu, Y. and Komura, T. and Wang, W.P. and Liu, L.J.",
TITLE = "EMDM: Efficient Motion Diffusion Model for Fast and High-quality Motion
Generation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "II: 18-38",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99425"}
@inproceedings{bb102677,
AUTHOR = "Gupta, A. and Yu, L.J. and Sohn, K. and Gu, X. and Hahn, M. and Li, F.F. and Essa, I. and Jiang, L. and Lezama, J.",
TITLE = "Photorealistic Video Generation with Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LXXIX: 393-411",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99426"}
@inproceedings{bb102678,
AUTHOR = "Han, J.L. and Kokkinos, F. and Torr, P.H.S.",
TITLE = "Vfusion3d: Learning Scalable 3d Generative Models from Video Diffusion
Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "II: 333-350",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99427"}
@inproceedings{bb102679,
AUTHOR = "Jagpal, D. and Chen, X. and Namboodiri, V.P.",
TITLE = "EIDT-V: Exploiting Intersections in Diffusion Trajectories for
Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18219-18228",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99428"}
@inproceedings{bb102680,
AUTHOR = "Xue, Q.Y. and Yin, X.Y. and Yang, B. and Gao, W.",
TITLE = "PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded
Text-to-Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18826-18836",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99429"}
@inproceedings{bb102681,
AUTHOR = "Qi, T.H. and Yuan, J.L. and Feng, W.Q. and Fang, S.C. and Liu, J.W. and Zhou, S. and He, Q. and Xie, H.T. and Zhang, Y.D.",
TITLE = "Mask2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long
Video Generation",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "18837-18846",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99430"}
@inproceedings{bb102682,
AUTHOR = "Shi, F.Y. and Gu, J.X. and Xu, H. and Xu, S. and Zhang, W. and Wang, L.M.",
TITLE = "BIVDiff: A Training-Free Framework for General-Purpose Video
Synthesis via Bridging Image and Video Diffusion Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7393-7402",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99431"}
@inproceedings{bb102683,
AUTHOR = "Ge, S.W. and Nah, S.J. and Liu, G.L. and Poon, T. and Tao, A. and Catanzaro, B. and Jacobs, D. and Huang, J.B. and Liu, M.Y. and Balaji, Y.",
TITLE = "Preserve Your Own Correlation:
A Noise Prior for Video Diffusion Models",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "22873-22884",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99432"}
@inproceedings{bb102684,
AUTHOR = "Zhu, Z.X. and Feng, X.L. and Chen, D.D. and Yuan, J.S. and Qiao, C.M. and Hua, G.",
TITLE = "Exploring Pre-trained Text-to-video Diffusion Models for Referring
Video Object Segmentation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XII: 452-469",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99433"}
@inproceedings{bb102685,
AUTHOR = "Guo, Y.W. and Yang, C. and Rao, A. and Agrawala, M. and Lin, D. and Dai, B.",
TITLE = "Sparsectrl: Adding Sparse Controls to Text-to-video Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLII: 330-348",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99434"}
@inproceedings{bb102686,
AUTHOR = "Zhao, R. and Gu, Y.C. and Wu, J.Z.J. and Zhang, D.J.H. and Liu, J.W. and Wu, W.J. and Keppo, J. and Shou, M.Z.",
TITLE = "Motiondirector: Motion Customization of Text-to-video Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LVI: 273-290",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99435"}
@inproceedings{bb102687,
AUTHOR = "Liang, J.Y. and Fan, Y.C. and Zhang, K. and Timofte, R. and Van Gool, L.J. and Ranjan, R.",
TITLE = "Movideo: Motion-aware Video Generation with Diffusion Model",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLIV: 56-74",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99436"}
@inproceedings{bb102688,
AUTHOR = "Huang, T. and Jiang, G.Q. and Ze, Y.J. and Xu, H.Z.",
TITLE = "Diffusion Reward: Learning Rewards via Conditional Video Diffusion",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLII: 478-495",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99437"}
@inproceedings{bb102689,
AUTHOR = "Niu, M. and Cun, X.D. and Wang, X.T. and Zhang, Y. and Shan, Y. and Zheng, Y.Q.",
TITLE = "MOFA-Video: Controllable Image Animation via Generative Motion Field
Adaptions in Frozen Image-to-Video Diffusion Model",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XIX: 111-128",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99438"}
@inproceedings{bb102690,
AUTHOR = "Wu, T.X. and Si, C.Y. and Jiang, Y.M. and Huang, Z.Q. and Liu, Z.W.",
TITLE = "Freeinit: Bridging Initialization Gap in Video Diffusion Models",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "III: 378-394",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99439"}
@inproceedings{bb102691,
AUTHOR = "Xing, J. and Xia, M.H. and Zhang, Y. and Chen, H.X. and Yu, W.B. and Liu, H.Y. and Liu, G. and Wang, X.T. and Shan, Y. and Wong, T.T.",
TITLE = "Dynamicrafter: Animating Open-domain Images with Video Diffusion Priors",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "XLVI: 399-417",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99440"}
@inproceedings{bb102692,
AUTHOR = "Kim, K. and Lee, H. and Park, J. and Kim, S. and Lee, K. and Kim, S. and Yoo, J.",
TITLE = "Hybrid Video Diffusion Models with 2d Triplane and 3d Wavelet
Representation",
BOOKTITLE = ECCV24,
YEAR = "2024",
PAGES = "LII: 148-165",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99441"}
@inproceedings{bb102693,
AUTHOR = "Yuan, H.J. and Zhang, S.W. and Wang, X. and Wei, Y.J. and Feng, T. and Pan, Y. and Zhang, Y.Y. and Liu, Z.W. and Albanie, S. and Ni, D.",
TITLE = "InstructVideo: Instructing Video Diffusion Models with Human Feedback",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "6463-6474",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99442"}
@inproceedings{bb102694,
AUTHOR = "Ni, H. and Egger, B. and Lohit, S. and Cherian, A. and Wang, Y. and Koike Akino, T. and Huang, S.X. and Marks, T.K.",
TITLE = "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion
Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "9015-9025",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99443"}
@inproceedings{bb102695,
AUTHOR = "Jeong, H. and Park, G.Y. and Ye, J.C.",
TITLE = "VMC: Video Motion Customization Using Temporal Attention Adaption for
Text-to-Video Diffusion Models",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "9212-9221",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99444"}
@inproceedings{bb102696,
AUTHOR = "Motamed, S. and van Gansbeke, W. and Van Gool, L.J.",
TITLE = "Investigating the Effectiveness of Cross-Attention to Unlock
Zero-Shot Editing of Text-to-Video Diffusion Models",
BOOKTITLE = GCV24,
YEAR = "2024",
PAGES = "7406-7415",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99445"}
@inproceedings{bb102697,
AUTHOR = "Fei, H. and Wu, S.Q. and Ji, W. and Zhang, H.W. and Chua, T.S.",
TITLE = "Dysen-VDM: Empowering Dynamics-Aware Text-to-Video Diffusion with
LLMs",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "7641-7653",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99446"}
@inproceedings{bb102698,
AUTHOR = "Blattmann, A. and Rombach, R. and Ling, H. and Dockhorn, T. and Kim, S.W. and Fidler, S. and Kreis, K.",
TITLE = "Align Your Latents: High-Resolution Video Synthesis with Latent
Diffusion Models",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22563-22575",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99447"}
@inproceedings{bb102699,
AUTHOR = "Rombach, R. and Blattmann, A. and Lorenz, D. and Esser, P. and Ommer, B.",
TITLE = "High-Resolution Image Synthesis with Latent Diffusion Models",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "10674-10685",
BIBSOURCE = "http://www.visionbib.com/bibliography/describe490difvid3.html#TT99448"}
Last update:Nov 26, 2025 at 20:24:09