@article{bb376600,
        AUTHOR = "Erro, D.",
        TITLE = "Two-Band Radial Postfiltering in Cepstral Domain with Application to
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "202-206",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370703"}

@article{bb376601,
        AUTHOR = "Hu, Y.J. and Ling, Z.H.",
        TITLE = "DBN-based Spectral Feature Representation for Statistical Parametric
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "321-325",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370704"}

@article{bb376602,
        AUTHOR = "Tsiaras, V. and Maia, R. and Diakoloukas, V. and Stylianou, Y. and Digalakis, V.",
        TITLE = "Global Variance in Speech Synthesis With Linear Dynamical Models",
        JOURNAL = SPLetters,
        VOLUME = "23",
        YEAR = "2016",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1057-1061",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370705"}

@article{bb376603,
        AUTHOR = "Wang, F.Z. and Nagano, H. and Kashino, K. and Igarashi, T.",
        TITLE = "Visualizing Video Sounds With Sound Word Animation to Enrich User
Experience",
        JOURNAL = MultMed,
        VOLUME = "19",
        YEAR = "2017",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "418-429",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370706"}

@article{bb376604,
        AUTHOR = "Sharma, B. and Prasanna, S.R.M.",
        TITLE = "Enhancement of Spectral Tilt in Synthesized Speech",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "April",
        PAGES = "382-386",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370707"}

@article{bb376605,
        AUTHOR = "Singh, R. and Jimenez, A. and Oland, A.",
        TITLE = "Voice disguise by mimicry: deriving statistical articulometric evidence
to evaluate claimed impersonation",
        JOURNAL = IET-Bio,
        VOLUME = "6",
        YEAR = "2017",
        NUMBER = "4",
        MONTH = "July",
        PAGES = "282-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370708"}

@article{bb376606,
        AUTHOR = "Lee, K.S.",
        TITLE = "Restricted Boltzmann Machine-Based Voice Conversion for Nonparallel
Corpus",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1103-1107",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370709"}

@article{bb376607,
        AUTHOR = "Reddy, M.K. and Rao, K.S.",
        TITLE = "Robust Pitch Extraction Method for the HMM-Based Speech Synthesis
System",
        JOURNAL = SPLetters,
        VOLUME = "24",
        YEAR = "2017",
        NUMBER = "8",
        MONTH = "August",
        PAGES = "1133-1137",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370710"}

@article{bb376608,
        AUTHOR = "Liu, Z.C. and Ling, Z.H. and Dai, L.R.",
        TITLE = "Statistical Parametric Speech Synthesis Using Generalized
Distillation Framework",
        JOURNAL = SPLetters,
        VOLUME = "25",
        YEAR = "2018",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "695-699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370711"}

@article{bb376609,
        AUTHOR = "Drugman, T. and Huybrechts, G. and Klimkov, V. and Moinet, A.",
        TITLE = "Traditional Machine Learning for Pitch Detection",
        JOURNAL = SPLetters,
        VOLUME = "25",
        YEAR = "2018",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "1745-1749",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370712"}

@article{bb376610,
        AUTHOR = "Arik, S.O. and Jun, H. and Diamos, G.",
        TITLE = "Fast Spectrogram Inversion Using Multi-Head Convolutional Neural
Networks",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "94-98",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370713"}

@article{bb376611,
        AUTHOR = "Masuyama, Y. and Yatabe, K. and Oikawa, Y.",
        TITLE = "Griffin-Lim Like Phase Recovery via Alternating Direction Method of
Multipliers",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "184-188",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370714"}

@article{bb376612,
        AUTHOR = "Kwon, O. and Jang, I. and Ahn, C. and Kang, H.",
        TITLE = "An Effective Style Token Weight Control Technique for End-to-End
Emotional Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1383-1387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370715"}

@article{bb376613,
        AUTHOR = "Liu, Q. and Jackson, P.J.B. and Wang, W.",
        TITLE = "A Speech Synthesis Approach for High Quality Speech Separation and
Generation",
        JOURNAL = SPLetters,
        VOLUME = "26",
        YEAR = "2019",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "1872-1876",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370716"}

@article{bb376614,
        AUTHOR = "Cotescu, M. and Drugman, T. and Huybrechts, G. and Lorenzo Trueba, J. and Moinet, A.",
        TITLE = "Voice Conversion for Whispered Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "186-190",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370717"}

@article{bb376615,
        AUTHOR = "Aylett, M.P. and Vinciarelli, A. and Wester, M.",
        TITLE = "Speech Synthesis for the Generation of Artificial Personality",
        JOURNAL = AffCom,
        VOLUME = "11",
        YEAR = "2020",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "361-372",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370718"}

@article{bb376616,
        AUTHOR = "Rao, M.V.A. and Ghosh, P.K.",
        TITLE = "SFNet: A Computationally Efficient Source Filter Model Based Neural
Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1170-1174",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370719"}

@article{bb376617,
        AUTHOR = "Zhou, Y. and Tian, X. and Li, H.",
        TITLE = "Multi-Task WaveRNN With an Integrated Architecture for Cross-Lingual
Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1310-1314",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370720"}

@article{bb376618,
        AUTHOR = "Yang, J.C. and Lin, P. and He, Q.H.",
        TITLE = "Constant-Q magnitude-phase coefficients extraction for synthetic speech
detection",
        JOURNAL = IET-Bio,
        VOLUME = "9",
        YEAR = "2020",
        NUMBER = "5",
        MONTH = "September",
        PAGES = "216-221",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370721"}

@article{bb376619,
        AUTHOR = "Liu, R. and Sisman, B. and Bao, F. and Gao, G. and Li, H.",
        TITLE = "Modeling Prosodic Phrasing With Multi-Task Learning in Tacotron-Based
TTS",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1470-1474",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370722"}

@article{bb376620,
        AUTHOR = "Qi, J. and Du, J. and Siniscalchi, S.M. and Ma, X. and Lee, C.",
        TITLE = "On Mean Absolute Error for Deep Neural Network Based Vector-to-Vector
Regression",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1485-1489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370723"}

@article{bb376621,
        AUTHOR = "Yang, S. and Wang, Y. and Xie, L.",
        TITLE = "Adversarial Feature Learning and Unsupervised Clustering Based Speech
Synthesis for Found Data With Acoustic and Textual Noise",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "1730-1734",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370724"}

@article{bb376622,
        AUTHOR = "Lee, J.Y. and Cheon, S.J. and Choi, B.J. and Kim, N.S.",
        TITLE = "Memory Attention: Robust Alignment Using Gating Mechanism for
End-to-End Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "2004-2008",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370725"}

@article{bb376623,
        AUTHOR = "Zhang, Y. and Jiang, F. and Duan, Z.Y.",
        TITLE = "One-Class Learning Towards Synthetic Voice Spoofing Detection",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "937-941",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370726"}

@article{bb376624,
        AUTHOR = "Saeki, T. and Takamichi, S. and Saruwatari, H.",
        TITLE = "Incremental Text-to-Speech Synthesis Using Pseudo Lookahead With
Large Pretrained Language Model",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "857-861",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370727"}

@article{bb376625,
        AUTHOR = "Comanducci, L. and Bestagini, P. and Tagliasacchi, M. and Sarti, A. and Tubaro, S.",
        TITLE = "Reconstructing Speech From CNN Embeddings",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "952-956",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370728"}

@article{bb376626,
        AUTHOR = "Hua, G. and Teoh, A.B.J. and Zhang, H.J.",
        TITLE = "Towards End-to-End Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1265-1269",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370729"}

@article{bb376627,
        AUTHOR = "Cheon, S.J. and Choi, B.J. and Kim, M. and Lee, H. and Kim, N.S.",
        TITLE = "A Controllable Multi-Lingual Multi-Speaker Multi-Style Text-to-Speech
Synthesis With Multivariate Information Minimization",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "55-59",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370730"}

@article{bb376628,
        AUTHOR = "Bilbao, S.",
        TITLE = "3D Interpolation in Wave-Based Acoustic Simulation",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "384-388",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370731"}

@article{bb376629,
        AUTHOR = "Saleem, N. and Gao, J. and Irfan, M. and Verdu, E. and Fuente, J.P.",
        TITLE = "E2E-V2SResNet: Deep residual convolutional neural networks for
end-to-end video driven speech synthesis",
        JOURNAL = IVC,
        VOLUME = "119",
        YEAR = "2022",
        PAGES = "104389",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370732"}

@article{bb376630,
        AUTHOR = "Sun, X. and Li, J.Y. and Tao, J.H.",
        TITLE = "Emotional Conversation Generation Orientated Syntactically
Constrained Bidirectional-Asynchronous Framework",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "187-198",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370733"}

@article{bb376631,
        AUTHOR = "Liu, S.G. and Li, S. and Cheng, H.N.",
        TITLE = "Towards an End-to-End Visual-to-Raw-Audio Generation With GAN",
        JOURNAL = CirSysVideo,
        VOLUME = "32",
        YEAR = "2022",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "1299-1312",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370734"}

@article{bb376632,
        AUTHOR = "Li, C.T. and Yang, F. and Yang, J.",
        TITLE = "The Role of Long-Term Dependency in Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1142-1146",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370735"}

@article{bb376633,
        AUTHOR = "Cui, S.S. and Huang, B.Y. and Huang, J.W. and Kang, X.G.",
        TITLE = "Synthetic Speech Detection Based on Local Autoregression and Variance
Statistics",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1462-1466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370736"}

@article{bb376634,
        AUTHOR = "Lei, Y. and Yang, S. and Zhu, X.F. and Xie, L. and Su, D.",
        TITLE = "Cross-Speaker Emotion Transfer Through Information Perturbation in
Emotional Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "1948-1952",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370737"}

@article{bb376635,
        AUTHOR = "Choi, B.J. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SNAC: Speaker-Normalized Affine Coupling Layer in Flow-Based
Architecture for Zero-Shot Multi-Speaker Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2502-2506",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370738"}

@article{bb376636,
        AUTHOR = "Choi, B.J. and Jeong, M. and Kim, M. and Kim, N.S.",
        TITLE = "Variable-Length Speaker Conditioning in Flow-Based Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "899-903",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370739"}

@article{bb376637,
        AUTHOR = "Chen, L.C. and Chen, P.H. and Tsai, R.T.H. and Tsao, Y.",
        TITLE = "EPG2S: Speech Generation and Speech Enhancement Based on
Electropalatography and Audio Signals Using Multimodal Learning",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2582-2586",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370740"}

@article{bb376638,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Emotion Intensity and its Control for Emotional Voice Conversion",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "31-48",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370741"}

@article{bb376639,
        AUTHOR = "Huang, B. and Cui, S. and Huang, J.W. and Kang, X.",
        TITLE = "Discriminative Frequency Information Learning for End-to-End Speech
Anti-Spoofing",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "185-189",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370742"}

@article{bb376640,
        AUTHOR = "Zhao, W. and Wang, Z. and Xu, L.",
        TITLE = "Mandarin Text-to-Speech Front-End With Lightweight Distilled
Convolution Network",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "249-253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370743"}

@article{bb376641,
        AUTHOR = "Ma, K.J. and Feng, Y.F. and Chen, B.J. and Zhao, G.Y.",
        TITLE = "End-to-End Dual-Branch Network Towards Synthetic Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "359-363",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370744"}

@article{bb376642,
        AUTHOR = "Mira, R. and Vougioukas, K. and Ma, P.C. and Petridis, S. and Schuller, B.W. and Pantic, M.",
        TITLE = "End-to-End Video-to-Speech Synthesis Using Generative Adversarial
Networks",
        JOURNAL = Cyber,
        VOLUME = "53",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "3454-3466",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370745"}

@article{bb376643,
        AUTHOR = "Yoon, H.C. and Kim, C. and Um, S. and Yoon, H.W. and Kang, H.G.",
        TITLE = "SC-CNN: Effective Speaker Conditioning Method for Zero-Shot
Multi-Speaker Text-to-Speech Systems",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "593-597",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370746"}

@inproceedings{bb376644,
        AUTHOR = "Gu, Y.W. and Zhao, X.F. and Yi, X.W. and Xiao, J.C.",
        TITLE = "Voice Conversion Using Learnable Similarity-guided Masked Autoencoder",
        BOOKTITLE = IWDW22,
        YEAR = "2022",
        PAGES = "53-67",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370747"}

@article{bb376645,
        AUTHOR = "Zhang, M.Y. and Zhou, X.H. and Wu, Z.Z. and Li, H.Z.",
        TITLE = "Towards Zero-Shot Multi-Speaker Multi-Accent Text-to-Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "947-951",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370748"}

@article{bb376646,
        AUTHOR = "Ly, E. and Villegas, J.",
        TITLE = "Cartesian Genetic Programming Parameterization in the Context of
Audio Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1077-1081",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370749"}

@article{bb376647,
        AUTHOR = "Mingote, V. and Gimeno, P. and Vicente, L. and Khurana, S. and Laurent, A. and Duret, J.",
        TITLE = "Direct Text to Speech Translation System Using Acoustic Units",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1262-1266",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370750"}

@article{bb376648,
        AUTHOR = "Wang, Z.C. and Chen, Y.Z. and Xie, L. and Tian, Q. and Wang, Y.P.",
        TITLE = "LM-VC: Zero-Shot Voice Conversion via Speech Generation Based on
Language Models",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1157-1161",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370751"}

@article{bb376649,
        AUTHOR = "van Niekerk, B. and Carbonneau, M.A. and Kamper, H.",
        TITLE = "Rhythm Modeling for Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "30",
        YEAR = "2023",
        PAGES = "1297-1301",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370752"}

@article{bb376650,
        AUTHOR = "Zhou, K. and Sisman, B. and Rana, R. and Schuller, B.W. and Li, H.Z.",
        TITLE = "Speech Synthesis With Mixed Emotions",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "3120-3134",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370753"}

@article{bb376651,
        AUTHOR = "Liu, Y. and Wei, L.F. and Qian, X.Y. and Zhang, T.H. and Chen, S.L. and Yin, X.C.",
        TITLE = "M3TTS: Multi-modal text-to-speech of multi-scale style control for
dubbing",
        JOURNAL = PRL,
        VOLUME = "179",
        YEAR = "2024",
        PAGES = "158-164",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370754"}

@article{bb376652,
        AUTHOR = "Jeong, M. and Kim, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "Efficient Parallel Audio Generation Using Group Masked Language
Modeling",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "979-983",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370755"}

@article{bb376653,
        AUTHOR = "Yi, J.Y. and Wang, C.L. and Tao, J.H. and Zhang, C.Y. and Fan, C.H. and Tian, Z.K. and Ma, H.X. and Fu, R.",
        TITLE = "SceneFake:
An initial dataset and benchmarks for scene fake audio detection",
        JOURNAL = PR,
        VOLUME = "152",
        YEAR = "2024",
        PAGES = "110468",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370756"}

@article{bb376654,
        AUTHOR = "Tan, X. and Chen, J.W. and Liu, H. and Cong, J. and Zhang, C. and Liu, Y.Q. and Wang, X. and Leng, Y. and Yi, Y.H. and He, L. and Zhao, S. and Qin, T. and Soong, F. and Liu, T.Y.",
        TITLE = "NaturalSpeech:
End-to-End Text-to-Speech Synthesis With Human-Level Quality",
        JOURNAL = PAMI,
        VOLUME = "46",
        YEAR = "2024",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "4234-4245",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370757"}

@article{bb376655,
        AUTHOR = "Zhou, J. and Li, Y. and Fan, C.H. and Tao, L. and Kwan, H.K.",
        TITLE = "Multi-Level Information Aggregation Based Graph Attention Networks
Towards Fake Speech Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "1580-1584",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370758"}

@article{bb376656,
        AUTHOR = "Cao, D.Y. and Zhang, Z.Y. and Zhang, J.Y.",
        TITLE = "NeuralVC: Any-to-Any Voice Conversion Using Neural Networks Decoder
for Real-Time Voice Conversion",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2070-2074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370759"}

@article{bb376657,
        AUTHOR = "Valin, J.M. and Mustafa, A. and Buthe, J.",
        TITLE = "Very Low Complexity Speech Synthesis Using Framewise Autoregressive
GAN (FARGAN) With Pitch Prediction",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2115-2119",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370760"}

@article{bb376658,
        AUTHOR = "Xue, J. and Fan, C.H. and Yi, J.Y. and Zhou, J. and Lv, Z.",
        TITLE = "Dynamic Ensemble Teacher-Student Distillation Framework for
Light-Weight Fake Audio Detection",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2305-2309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370761"}

@article{bb376659,
        AUTHOR = "Cheng, X.Y. and Wang, Y.F. and Liu, C. and Hu, D.H. and Su, Z.",
        TITLE = "HiFi-GANw: Watermarked Speech Synthesis via Fine-Tuning of HiFi-GAN",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2440-2444",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370762"}

@article{bb376660,
        AUTHOR = "Zhang, Y.M. and Du, R. and Tan, Z.H. and Wang, W.W. and Ma, Z.Y.",
        TITLE = "Generating Accurate and Diverse Audio Captions Through Variational
Autoencoder Framework",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2520-2524",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370763"}

@article{bb376661,
        AUTHOR = "Huang, W.C. and Wu, Y.C. and Toda, T.",
        TITLE = "Multi-Speaker Text-to-Speech Training With Speaker Anonymized Data",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "2995-2999",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370764"}

@article{bb376662,
        AUTHOR = "Lee, J. and Shin, Y. and Chang, J.H.",
        TITLE = "Differentiable Duration Refinement Using Internal Division for
Non-Autoregressive Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "31",
        YEAR = "2024",
        PAGES = "3154-3158",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370765"}

@article{bb376663,
        AUTHOR = "Xu, X. and Ma, Z.Y. and Wu, M.Y. and Yu, K.",
        TITLE = "Towards Weakly Supervised Text-to-Audio Grounding",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "11126-11138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370766"}

@article{bb376664,
        AUTHOR = "Kim, M. and Jeong, M. and Lee, J.Y. and Kim, N.S.",
        TITLE = "SegINR: Segment-Wise Implicit Neural Representation for Sequence
Alignment in Neural Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "646-650",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370767"}

@article{bb376665,
        AUTHOR = "Zheng, J.J. and Zhou, J. and Zheng, W.M. and Tao, L. and Kwan, H.K.",
        TITLE = "Controllable Multi-Speaker Emotional Speech Synthesis With an Emotion
Representation of High Generalization Capability",
        JOURNAL = AffCom,
        VOLUME = "16",
        YEAR = "2025",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "68-82",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370768"}

@article{bb376666,
        AUTHOR = "Chen, K. and Huang, Z.H. and He, L. and Yan, Y.H.",
        TITLE = "UnitDiff: A Unit-Diffusion Model for Code-Switching Speech Synthesis",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1051-1055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370769"}

@article{bb376667,
        AUTHOR = "Chang, Y. and Ko, Y.J.",
        TITLE = "Soft engagement with pseudo initiatives for multi-party dialogue
generation",
        JOURNAL = PRL,
        VOLUME = "191",
        YEAR = "2025",
        PAGES = "103-109",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370770"}

@article{bb376668,
        AUTHOR = "He, Y.L. and Wang, H.X. and Qiu, Y.Q. and Cao, H.",
        TITLE = "ASSMark: Dual Defense Against Speech Synthesis Attack via Adversarial
Robust Watermarking",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1870-1874",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370771"}

@article{bb376669,
        AUTHOR = "Wang, R. and Chen, L.P. and Lee, K.A. and Ling, Z.H.",
        TITLE = "Asynchronous Voice Anonymization by Learning From Speaker-Adversarial
Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "1905-1909",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370772"}

@article{bb376670,
        AUTHOR = "Feng, Y. and Zhang, X.B. and Feng, F.Y. and Zhang, G.L. and Xu, L.T.",
        TITLE = "Robust and Imperceptible Watermarking Framework for Generative Audio
Models",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3196-3200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370773"}

@article{bb376671,
        AUTHOR = "Lee, J. and Song, N.S. and Chang, J.H.",
        TITLE = "Vector Field Decomposition-Based Flow Matching for Zero-Shot
Cross-Lingual Text-to-Speech",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3560-3564",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370774"}

@article{bb376672,
        AUTHOR = "Wang, H. and Yang, Y.F. and Liu, S. and Li, J. and Meng, L. and Liu, Y.Q. and Zhou, J.M. and Sun, H.Q. and Lu, Y. and Qin, Y.",
        TITLE = "StreamMel: Real-Time Zero-Shot Text-to-Speech Via Interleaved
Continuous Autoregressive Modeling",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "3530-3534",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370775"}

@article{bb376673,
        AUTHOR = "Li, L. and Cong, G.X. and Qi, Y.K. and Zha, Z.J. and Wu, Q. and Sheng, Q.Z. and Huang, Q.M. and Yang, M.H.",
        TITLE = "Dubbing Movies via Hierarchical Phoneme Modeling and Acoustic
Diffusion Denoising",
        JOURNAL = PAMI,
        VOLUME = "47",
        YEAR = "2025",
        NUMBER = "11",
        MONTH = "November",
        PAGES = "10361-10377",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370776"}

@article{bb376674,
        AUTHOR = "Gao, X.X. and Zhang, H. and Chen, N.F.",
        TITLE = "Prompt-Unseen-Emotion: Mixed Emotional Speech Synthesis With
Prompt-LLM Contextual Knowledge",
        JOURNAL = SPLetters,
        VOLUME = "32",
        YEAR = "2025",
        PAGES = "4259-4263",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370777"}

@article{bb376675,
        AUTHOR = "Lee, K. and Hong, S. and Chun, S.Y.",
        TITLE = "Robust watermarks for audio diffusion models by quadrature amplitude
modulation",
        JOURNAL = PRL,
        VOLUME = "198",
        YEAR = "2025",
        PAGES = "22-28",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370778"}

@article{bb376676,
        AUTHOR = "Inoue, S. and Zhou, K. and Wang, S. and Li, H.Z.",
        TITLE = "Hierarchical Control of Emotion Rendering in Speech Synthesis",
        JOURNAL = AffCom,
        VOLUME = "16",
        YEAR = "2025",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "3316-3328",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370779"}

@article{bb376677,
        AUTHOR = "Cha, H. and Um, S. and Kim, M. and Kim, C. and Lee, S. and Kang, H.G.",
        TITLE = "Content-Aware Style Augmentation for Zero-Shot Voice Conversion With
Short Target Speech",
        JOURNAL = SPLetters,
        VOLUME = "33",
        YEAR = "2026",
        PAGES = "66-70",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370780"}

@article{bb376678,
        AUTHOR = "Haji Ali, M. and Menapace, W. and Siarohin, A. and Balakrishnan, G. and Ordonez, V.",
        TITLE = "Taming Data and Transformers for Audio Generation",
        JOURNAL = IJCV,
        VOLUME = "134",
        YEAR = "2026",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "87",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370781"}

@inproceedings{bb376679,
        AUTHOR = "Liu, J. and Geddes, J. and Guo, Z.Y. and Jiang, H. and Nandwana, M.K.",
        TITLE = "Smooth Cache: A Universal Inference Acceleration Technique for
Diffusion Transformers",
        BOOKTITLE = LargeVM25,
        YEAR = "2025",
        PAGES = "3220-3229",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370782"}

@inproceedings{bb376680,
        AUTHOR = "Kushwaha, S.S. and Tian, Y.P.",
        TITLE = "VinTAGe: Joint Video and Text Conditioning for Holistic Audio
Generation",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "13529-13539",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370783"}

@inproceedings{bb376681,
        AUTHOR = "Kim, J.H. and Choi, J. and Kim, J.H. and Jung, C. and Chung, J.S.",
        TITLE = "From Faces to Voices: Learning Hierarchical Representations for
High-quality Video-to-Speech",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "15874-15884",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370784"}

@inproceedings{bb376682,
        AUTHOR = "Cong, G.X. and Pan, J. and Li, L. and Qi, Y.K. and Peng, Y.X. and van den Hengel, A.J. and Yang, J. and Huang, Q.M.",
        TITLE = "EmoDubber: Towards High Quality and Emotion Controllable Movie
Dubbing",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "15863-15873",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370785"}

@inproceedings{bb376683,
        AUTHOR = "Zhang, Z.D. and Li, L. and Yan, C.G. and Liu, C.S. and van den Hengel, A.J. and Qi, Y.K.",
        TITLE = "Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled
Prosody Adapting for Movie Dubbing",
        BOOKTITLE = CVPR25,
        YEAR = "2025",
        PAGES = "172-182",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370786"}

@inproceedings{bb376684,
        AUTHOR = "Rai, A. and Sridhar, S.",
        TITLE = "EgoSonics: Generating Synchronized Audio for Silent Egocentric Videos",
        BOOKTITLE = WACV25,
        YEAR = "2025",
        PAGES = "4935-4946",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370787"}

@inproceedings{bb376685,
        AUTHOR = "Yadav, A.K.S. and Bhagtani, K. and Salvi, D. and Bestagini, P. and Delp, E.J.",
        TITLE = "FairSSD: Understanding Bias in Synthetic Speech Detectors",
        BOOKTITLE = WMF24,
        YEAR = "2024",
        PAGES = "4418-4428",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370788"}

@inproceedings{bb376686,
        AUTHOR = "Cuccovillo, L. and Gerhardt, M. and Aichroth, P.",
        TITLE = "Audio Transformer for Synthetic Speech Detection via Multi-Formant
Analysis",
        BOOKTITLE = WMF24,
        YEAR = "2024",
        PAGES = "4409-4417",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370789"}

@inproceedings{bb376687,
        AUTHOR = "Cong, G.X. and Li, L. and Qi, Y.K. and Zha, Z.J. and Wu, Q. and Wang, W.Y. and Jiang, B. and Yang, M.H. and Huang, Q.M.",
        TITLE = "Learning to Dub Movies via Hierarchical Prosody Models",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "14687-14697",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370790"}

@inproceedings{bb376688,
        AUTHOR = "Hsu, W.N. and Remez, T. and Shi, B. and Donley, J. and Adi, Y.",
        TITLE = "ReVISE: Self-Supervised Speech Resynthesis with Visual Input for
Universal and Generalized Speech Regeneration",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18796-18806",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370791"}

@inproceedings{bb376689,
        AUTHOR = "Sun, C.Z. and Jia, S. and Hou, S.W. and Lyu, S.W.",
        TITLE = "AI-Synthesized Voice Detection Using Neural Vocoder Artifacts",
        BOOKTITLE = WMF23,
        YEAR = "2023",
        PAGES = "904-912",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370792"}

@inproceedings{bb376690,
        AUTHOR = "Noufi, C. and May, L. and Berger, J.",
        TITLE = "The Role of Vocal Persona in Natural and Synthesized Speech",
        BOOKTITLE = FG23,
        YEAR = "2023",
        PAGES = "1-4",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370793"}

@inproceedings{bb376691,
        AUTHOR = "Hwang, I.S. and Lee, S.H. and Lee, S.W.",
        TITLE = "StyleVC: Non-Parallel Voice Conversion with Adversarial Style
Generalization",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "23-30",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370794"}

@inproceedings{bb376692,
        AUTHOR = "Wang, W.B. and Song, Y. and Jha, S.",
        TITLE = "Autolv: Automatic Lecture Video Generator",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1086-1090",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370795"}

@inproceedings{bb376693,
        AUTHOR = "Borzi, S. and Giudice, O. and Stanco, F. and Allegra, D.",
        TITLE = "Is synthetic voice detection research going into the right direction?",
        BOOKTITLE = WMF22,
        YEAR = "2022",
        PAGES = "71-80",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370796"}

@inproceedings{bb376694,
        AUTHOR = "Hassid, M. and Ramanovich, M.T. and Shillingford, B. and Wang, M. and Jia, Y. and Remez, T.",
        TITLE = "More than Words: In-the-Wild Visually-Driven Prosody for
Text-to-Speech",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10577-10587",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370797"}

@inproceedings{bb376695,
        AUTHOR = "Kwak, I.Y. and Kwag, S. and Lee, J. and Huh, J.H. and Lee, C.H. and Jeon, Y.B. and Hwang, J.H. and Yoon, J.W.",
        TITLE = "ResMax: Detecting Voice Spoofing Attacks with Residual Network and
Max Feature Map",
        BOOKTITLE = ICPR21,
        YEAR = "2021",
        PAGES = "4837-4844",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370798"}

@inproceedings{bb376696,
        AUTHOR = "Wang, D.H. and Wang, R. and Dong, L. and Yan, D. and Ren, Y.M.",
        TITLE = "Efficient Generation of Speech Adversarial Examples with Generative
Model",
        BOOKTITLE = IWDW20,
        YEAR = "2020",
        PAGES = "251-264",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370799"}

@inproceedings{bb376697,
        AUTHOR = "Zhou, H. and Liu, Z. and Xu, X. and Luo, P. and Wang, X.",
        TITLE = "Vision-Infused Deep Audio Inpainting",
        BOOKTITLE = ICCV19,
        YEAR = "2019",
        PAGES = "283-292",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370800"}

@inproceedings{bb376698,
        AUTHOR = "Bailer, W. and Wijnants, M. and Lievens, H. and Claes, S.",
        TITLE = "Multimedia Analytics Challenges and Opportunities for Creating
Interactive Radio Content",
        BOOKTITLE = MMMod20,
        YEAR = "2020",
        PAGES = "II:375-387",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370801"}

@inproceedings{bb376699,
        AUTHOR = "Huang, T. and Wang, H.X. and Chen, Y. and He, P.S.",
        TITLE = "GRU-SVM Model for Synthetic Speech Detection",
        BOOKTITLE = IWDW19,
        YEAR = "2019",
        PAGES = "115-125",
        BIBSOURCE = "http://www.visionbib.com/bibliography/other1024ss1.html#TT370802"}

Last update:Feb 26, 2026 at 10:58:24