@article{bb255600, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Audiovisual Discrimination Between Speech and Laughter: Why and When Visual Information Might Help", JOURNAL = MultMed, VOLUME = "13", YEAR = "2011", NUMBER = "2", PAGES = "216-234", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250413"} @article{bb255601, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Prediction-Based Audiovisual Fusion for Classification of Non-Linguistic Vocalisations", JOURNAL = AffCom, VOLUME = "7", YEAR = "2016", NUMBER = "1", MONTH = "January", PAGES = "45-58", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250414"} @inproceedings{bb255602, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Fusion of audio and visual cues for laughter detection", BOOKTITLE = CIVR08, YEAR = "2008", PAGES = "329-338", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250415"} @inproceedings{bb255603, AUTHOR = "Petridis, S. and Pantic, M. and Cohn, J.F.", TITLE = "Prediction-based classification for audiovisual discrimination between laughter and speech", BOOKTITLE = FG11, YEAR = "2011", PAGES = "619-626", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250416"} @article{bb255604, AUTHOR = "Moustakas, K. and Tzovaras, D. and Dybkjaer, L. and Bernsen, N. and Aran, O.", TITLE = "Using Modality Replacement to Facilitate Communication between Visually and Hearing-Impaired People", JOURNAL = MultMedMag, VOLUME = "18", YEAR = "2011", NUMBER = "2", MONTH = "April", PAGES = "26-37", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250417"} @article{bb255605, AUTHOR = "Tariquzzaman, M. and Kim, J.Y. and Na, S.Y. and Kim, H.G. and Har, D.S.", TITLE = "A Visual Signal Reliability for Robust Audio-Visual Speaker Identification", JOURNAL = IEICE, VOLUME = "E94-D", YEAR = "2011", NUMBER = "10", MONTH = "October", PAGES = "2052-2055", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250418"} @article{bb255606, AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.", TITLE = "Efficient video coding based on audio-visual focus of attention", JOURNAL = JVCIR, VOLUME = "22", YEAR = "2011", NUMBER = "8", MONTH = "November", PAGES = "704-711", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250419"} @article{bb255607, AUTHOR = "Tiawongsombat, P. and Jeong, M.H. and Yun, J.S. and You, B.J. and Oh, S.R.", TITLE = "Robust visual speakingness detection using bi-level HMM", JOURNAL = PR, VOLUME = "45", YEAR = "2012", NUMBER = "2", MONTH = "February", PAGES = "783-793", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250420"} @article{bb255608, AUTHOR = "Noulas, A. and Englebienne, G. and Krose, B.J.A.", TITLE = "Multimodal Speaker Diarization", JOURNAL = PAMI, VOLUME = "34", YEAR = "2012", NUMBER = "1", MONTH = "January", PAGES = "79-93", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250421"} @article{bb255609, AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.", TITLE = "Voice activity detection and speaker localization using audiovisual cues", JOURNAL = PRL, VOLUME = "33", YEAR = "2012", NUMBER = "4", MONTH = "March", PAGES = "373-380", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250422"} @inproceedings{bb255610, AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.", TITLE = "Audiovisual voice activity detection using off-the-shelf cameras", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "3886-3890", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250423"} @article{bb255611, AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.", TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using Mid-Fusion of SVM and HMMs", JOURNAL = MultMed, VOLUME = "16", YEAR = "2014", NUMBER = "4", MONTH = "June", PAGES = "1032-1044", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250424"} @article{bb255612, AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.", TITLE = "Multimodal Multi-Channel On-Line Speaker Diarization Using Sensor Fusion Through SVM", JOURNAL = MultMed, VOLUME = "17", YEAR = "2015", NUMBER = "10", MONTH = "October", PAGES = "1694-1705", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250425"} @article{bb255613, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Output-associative RVM regression for dimensional and continuous emotion prediction", JOURNAL = IVC, VOLUME = "30", YEAR = "2012", NUMBER = "3", MONTH = "March", PAGES = "186-196", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250426"} @inproceedings{bb255614, AUTHOR = "Pantic, M. and Gunes, H. and Nicolaou, M.A.", TITLE = "Output-associative RVM regression for dimensional and continuous emotion prediction", BOOKTITLE = FG11, YEAR = "2011", PAGES = "16-23", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250426"} @inproceedings{bb255615, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Designing frameworks for automatic affect prediction and classification in dimensional space", BOOKTITLE = Gesture11, YEAR = "2011", PAGES = "20-26", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250427"} @article{bb255616, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Continuous Prediction of Spontaneous Affect from Multiple Cues and Modalities in Valence-Arousal Space", JOURNAL = AffCom, VOLUME = "2", YEAR = "2011", NUMBER = "2", PAGES = "92-105", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250428"} @inproceedings{bb255617, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Audio-Visual Classification and Fusion of Spontaneous Affective Data in Likelihood Space", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "3695-3699", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250429"} @article{bb255618, AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.", TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behavior and Fusion of Continuous Annotations", JOURNAL = PAMI, VOLUME = "36", YEAR = "2014", NUMBER = "7", MONTH = "July", PAGES = "1299-1311", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250430"} @inproceedings{bb255619, AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.", TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behaviour", BOOKTITLE = ECCV12, YEAR = "2012", PAGES = "VII: 98-111", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250431"} @article{bb255620, AUTHOR = "Wang, L.J. and Qian, Y. and Scott, M.R. and Chen, G. and Soong, F.K.", TITLE = "Computer-Assisted Audiovisual Language Learning", JOURNAL = Computer, VOLUME = "45", YEAR = "2012", NUMBER = "6", MONTH = "June", PAGES = "38-47", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250432"} @article{bb255621, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Chi, Z. and Feng, D.D.", TITLE = "Realistic Human Action Recognition with Multimodal Feature Selection and Fusion", JOURNAL = SMCS, VOLUME = "43", YEAR = "2013", NUMBER = "4", PAGES = "875-885", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250433"} @article{bb255622, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Xia, Y. and Kang, W.X. and Feng, D.D.", TITLE = "Discriminative two-level feature selection for realistic human action recognition", JOURNAL = JVCIR, VOLUME = "24", YEAR = "2013", NUMBER = "7", PAGES = "1064-1074", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250434"} @inproceedings{bb255623, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Feng, D.D.", TITLE = "Realistic Human Action Recognition with Audio Context", BOOKTITLE = DICTA10, YEAR = "2010", PAGES = "288-293", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250435"} @inproceedings{bb255624, AUTHOR = "Wu, Q.X. and Lu, S.Y. and Wang, Z.Y. and Deng, F.Q. and Kang, W.X. and Feng, D.D.", TITLE = "Structure Context of Local Features in Realistic Human Action Recognition", BOOKTITLE = VECTaR11, YEAR = "2011", PAGES = "1496-1501", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250436"} @article{bb255625, AUTHOR = "Mirzaei, M.R. and Ghorshi, S. and Mortazavi, M.", TITLE = "Audio-visual speech recognition techniques in augmented reality environments", JOURNAL = VC, VOLUME = "30", YEAR = "2014", NUMBER = "3", MONTH = "March", PAGES = "245-257", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250437"} @article{bb255626, AUTHOR = "Bredin, H. and Roy, A. and Le, V.B. and Barras, C.", TITLE = "Person instance graphs for mono-, cross- and multi-modal person recognition in multimedia data: application to speaker identification in TV broadcast", JOURNAL = MultInfoRetr, VOLUME = "3", YEAR = "2014", NUMBER = "3", MONTH = "September", PAGES = "161-175", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250438"} @article{bb255627, AUTHOR = "Ozasa, Y. and Nakano, M. and Ariki, Y. and Iwahashi, N.", TITLE = "Discriminating Unknown Objects from Known Objects Using Image and Speech Information", JOURNAL = IEICE, VOLUME = "E98-D", YEAR = "2015", NUMBER = "3", MONTH = "March", PAGES = "704-711", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250439"} @inproceedings{bb255628, AUTHOR = "Ozasa, Y. and Ariki, Y. and Nakano, M. and Iwahashi, N.", TITLE = "Disambiguation in Unknown Object Detection by Integrating Image and Speech Recognition Confidences", BOOKTITLE = ACCV12, YEAR = "2012", PAGES = "I:85-96", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250440"} @inproceedings{bb255629, AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.", TITLE = "Selection of Unknown Objects Specified by Speech Using Models Constructed from Web Images", BOOKTITLE = ICPR14, YEAR = "2014", PAGES = "477-482", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250441"} @inproceedings{bb255630, AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.", TITLE = "Object Recognition by Integrated Information Using Web Images", BOOKTITLE = ACPR13, YEAR = "2013", PAGES = "657-661", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250442"} @inproceedings{bb255631, AUTHOR = "Ozasa, Y. and Enami, N. and Ariki, Y.", TITLE = "Color saliency for object identification", BOOKTITLE = FCV15, YEAR = "2015", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250443"} @article{bb255632, AUTHOR = "Harte, N. and Gillen, E.", TITLE = "TCD-TIMIT: An Audio-Visual Corpus of Continuous Speech", JOURNAL = MultMed, VOLUME = "17", YEAR = "2015", NUMBER = "5", MONTH = "May", PAGES = "603-615", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250444"} @article{bb255633, AUTHOR = "Katsaggelos, A.K. and Bahaadini, S. and Molina, R.", TITLE = "Audiovisual Fusion: Challenges and New Approaches", JOURNAL = PIEEE, VOLUME = "103", YEAR = "2015", NUMBER = "9", MONTH = "September", PAGES = "1635-1653", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250445"} @article{bb255634, AUTHOR = "Mezai, L. and Hachouf, F.", TITLE = "Score-Level Fusion of Face and Voice Using Particle Swarm Optimization and Belief Functions", JOURNAL = HMS, VOLUME = "45", YEAR = "2015", NUMBER = "6", MONTH = "December", PAGES = "761-772", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250446"} @article{bb255635, AUTHOR = "Wu, P. and Liu, H. and Li, X. and Fan, T. and Zhang, X.", TITLE = "A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on Adaptive Decision Fusion", JOURNAL = MultMed, VOLUME = "18", YEAR = "2016", NUMBER = "3", MONTH = "March", PAGES = "326-338", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250447"} @article{bb255636, AUTHOR = "Dilpazir, H. and Muhammad, Z. and Minhas, Q. and Ahmed, F. and Malik, H. and Mahmood, H.", TITLE = "Multivariate mutual information for audio video fusion", JOURNAL = SIViP, VOLUME = "10", YEAR = "2016", NUMBER = "7", MONTH = "October", PAGES = "1265-1272", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250448"} @article{bb255637, AUTHOR = "Beyan, C. and Capozzi, F. and Becchio, C. and Murino, V.", TITLE = "Prediction of the Leadership Style of an Emergent Leader Using Audio and Visual Nonverbal Features", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "2", MONTH = "February", PAGES = "441-456", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250449"} @article{bb255638, AUTHOR = "Fernandez Lopez, A. and Sukno, F.M.", TITLE = "Survey on automatic lip-reading in the era of deep learning", JOURNAL = IVC, VOLUME = "78", YEAR = "2018", PAGES = "53-72", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250450"} @article{bb255639, AUTHOR = "Stafylakis, T. and Khan, M.H. and Tzimiropoulos, G.", TITLE = "Pushing the boundaries of audiovisual word recognition using Residual Networks and LSTMs", JOURNAL = CVIU, VOLUME = "176-177", YEAR = "2018", PAGES = "22-32", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250451"} @inproceedings{bb255640, AUTHOR = "Stafylakis, T. and Tzimiropoulos, G.", TITLE = "Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "II: 536-552", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250452"} @article{bb255641, AUTHOR = "Liu, X. and Geng, J.J. and Ling, H.B. and Cheung, Y.M.", TITLE = "Attention guided deep audio-face fusion for efficient speaker naming", JOURNAL = PR, VOLUME = "88", YEAR = "2019", PAGES = "557-568", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250453"} @article{bb255642, AUTHOR = "Tsiami, A. and Koutras, P. and Katsamanis, A. and Vatakis, A. and Maragos, P.", TITLE = "A behaviorally inspired fusion approach for computational audiovisual saliency modeling", JOURNAL = SP:IC, VOLUME = "76", YEAR = "2019", PAGES = "186-200", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250454"} @article{bb255643, AUTHOR = "Hsiao, S. and Sun, H. and Hsieh, M. and Tsai, M. and Tsao, Y. and Lee, C.", TITLE = "Toward Automating Oral Presentation Scoring During Principal Certification Program Using Audio-Video Low-Level Behavior Profiles", JOURNAL = AffCom, VOLUME = "10", YEAR = "2019", NUMBER = "4", MONTH = "October", PAGES = "552-567", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250455"} @article{bb255644, AUTHOR = "Ma, Y. and Hong, H. and Li, H. and Zhao, H. and Li, Y.S. and Sun, L. and Gu, C. and Zhu, X.H.", TITLE = "Non-Contact Speech Recovery Technology Using a 24 GHz Portable Auditory Radar and Webcam", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "4", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250456"} @inproceedings{bb255645, AUTHOR = "Xu, B. and Wang, J. and Lu, C. and Guo, Y.", TITLE = "Watch to Listen Clearly: Visual Speech Enhancement Driven Multi-modality Speech Recognition", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1626-1635", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250457"} @article{bb255646, AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.", TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank and Kernelized Sparsity", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "865-869", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250458"} @article{bb255647, AUTHOR = "Tao, F. and Busso, C.", TITLE = "End-to-End Audiovisual Speech Recognition System With Multitask Learning", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "1-11", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250459"} @article{bb255648, AUTHOR = "Liu, L. and Feng, G. and Beautemps, D. and Zhang, X.P.", TITLE = "Re-Synchronization Using the Hand Preceding Model for Multi-Modal Fusion in Automatic Continuous Cued Speech Recognition", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "292-305", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250460"} @article{bb255649, AUTHOR = "Beyan, C. and Shahid, M. and Murino, V.", TITLE = "RealVAD: A Real-World Dataset and A Method for Voice Activity Detection by Body Motion Analysis", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2071-2085", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250461"} @article{bb255650, AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.", TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling Factor Estimation", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1405-1409", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250462"} @article{bb255651, AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.", TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "338-351", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250463"} @article{bb255652, AUTHOR = "Xu, J.H. and Zhang, B. and Wang, Z.Y. and Wang, Y. and Chen, F. and Gao, J.B. and Feng, D.D.", TITLE = "Affective Audio Annotation of Public Speeches with Convolutional Clustering Neural Network", JOURNAL = AffCom, VOLUME = "13", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "238-249", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250464"} @article{bb255653, AUTHOR = "Afouras, T. and Chung, J.S. and Senior, A. and Vinyals, O. and Zisserman, A.", TITLE = "Deep Audio-Visual Speech Recognition", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "8717-8727", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250465"} @inproceedings{bb255654, AUTHOR = "Rahimi, A. and Afouras, T. and Zisserman, A.", TITLE = "Reading to Listen at the Cocktail Party: Multi-Modal Speech Separation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10483-10492", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250466"} @article{bb255655, AUTHOR = "Narain, J. and Johnson, K.T. and Quatieri, T.F. and Picard, R.W. and Maes, P.", TITLE = "Modeling Real-World Affective and Communicative Nonverbal Vocalizations From Minimally Speaking Individuals", JOURNAL = AffCom, VOLUME = "13", YEAR = "2022", NUMBER = "4", MONTH = "October", PAGES = "2238-2253", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250467"} @article{bb255656, AUTHOR = "Gong, Y. and Liu, A.H. and Rouditchenko, A. and Glass, J.", TITLE = "UAVM: Towards Unifying Audio and Visual Models", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "2437-2441", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250468"} @inproceedings{bb255657, AUTHOR = "Oya, T. and Iwase, S. and Morishima, S.", TITLE = "The Sound of Bounding-Boxes", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "9-15", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250469"} @article{bb255658, AUTHOR = "Zhou, J.X. and Guo, D. and Wang, M.", TITLE = "Contrastive Positive Sample Propagation Along the Audio-Visual Event Line", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7239-7257", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250470"} @inproceedings{bb255659, AUTHOR = "Zhou, J.X. and Zheng, L. and Zhong, Y. and Hao, S.J. and Wang, M.", TITLE = "Positive Sample Propagation along the Audio-Visual Event Line", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8432-8440", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250471"} @article{bb255660, AUTHOR = "Sen, T.K. and Naven, G. and Gerstner, L. and Bagley, D. and Baten, R.A. and Rahman, W. and Hasan, M.K. and Haut, K. and Mamun, A.A. and Samrose, S. and Solbu, A. and Barnes, R.E. and Frank, M.G. and Hoque, E.", TITLE = "DBATES: Dataset for Discerning Benefits of Audio, Textual, and Facial Expression Features in Competitive Debate Speeches", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "2", MONTH = "April", PAGES = "1028-1043", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250472"} @article{bb255661, AUTHOR = "Sharma, G. and Dhall, A. and Cai, J.F.", TITLE = "Audio-Visual Automatic Group Affect Analysis", JOURNAL = AffCom, VOLUME = "14", YEAR = "2023", NUMBER = "2", MONTH = "April", PAGES = "1056-1069", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250473"} @article{bb255662, AUTHOR = "Cheng, W.L. and Tang, W. and Huang, Y. and Luo, Y. and Wang, L.", TITLE = "A Reconstruction-Based Visual-Acoustic-Semantic Embedding Method for Speech-Image Retrieval", JOURNAL = MultMed, VOLUME = "25", YEAR = "2023", PAGES = "4067-4080", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250474"} @article{bb255663, AUTHOR = "Kefalas, T. and Fotiadou, E. and Georgopoulos, M. and Panagakis, Y. and Ma, P.C. and Petridis, S. and Stafylakis, T. and Pantic, M.", TITLE = "KAN-AV dataset for audio-visual face and speech analysis in the wild", JOURNAL = IVC, VOLUME = "140", YEAR = "2023", PAGES = "104839", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250475"} @article{bb255664, AUTHOR = "Wang, X.M. and Mi, J.C. and Li, B.Q. and Zhao, Y.X. and Meng, J.X.", TITLE = "CATNet: Cross-modal fusion for audio-visual speech recognition", JOURNAL = PRL, VOLUME = "178", YEAR = "2024", PAGES = "216-222", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250476"} @article{bb255665, AUTHOR = "Zhu, D.D. and Zhang, K.W. and Zhang, N. and Zhou, Q.Q. and Min, X.K. and Zhai, G.T. and Yang, X.K.", TITLE = "Unified Audio-Visual Saliency Model for Omnidirectional Videos With Spatial Audio", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "764-775", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250477"} @article{bb255666, AUTHOR = "Qian, X.Y. and Xue, W. and Zhang, Q. and Tao, R.J. and Li, H.Z.", TITLE = "Deep Cross-Modal Retrieval Between Spatial Image and Acoustic Speech", JOURNAL = MultMed, VOLUME = "26", YEAR = "2024", PAGES = "4480-4489", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250478"} @article{bb255667, AUTHOR = "Xie, J.W. and Liu, Z. and Li, G.Y. and Song, Y.J.", TITLE = "Audio-visual saliency prediction with multisensory perception and integration", JOURNAL = IVC, VOLUME = "143", YEAR = "2024", PAGES = "104955", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250479"} @inproceedings{bb255668, AUTHOR = "Rachavarapu, K.K. and Rajagopalan, A.N.", TITLE = "Boosting Positive Segments for Weakly-Supervised Audio-Visual Video Parsing", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "10158-10168", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250480"} @inproceedings{bb255669, AUTHOR = "Chen, J. and Wang, W.G. and Liu, S. and Li, H.S. and Yang, Y.", TITLE = "Omnidirectional Information Gathering for Knowledge Transfer-based Audio-Visual Navigation", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "10959-10969", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250481"} @inproceedings{bb255670, AUTHOR = "Cheng, X. and Jin, T. and Huang, R.J. and Li, L.J. and Lin, W. and Wang, Z. and Wang, Y. and Liu, H. and Yin, A. and Zhao, Z.", TITLE = "MixSpeech: Cross-Modality Self-Learning with Audio-Visual Stream Mixup for Visual Speech Translation and Recognition", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "15689-15699", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250482"} @inproceedings{bb255671, AUTHOR = "Georgescu, M.I. and Fonseca, E. and Ionescu, R.T. and Lucic, M. and Schmid, C. and Arnab, A.", TITLE = "Audiovisual Masked Autoencoders", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "16098-16108", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250483"} @inproceedings{bb255672, AUTHOR = "Chen, M.F. and Su, K. and Shlizerman, E.", TITLE = "Be Everywhere - Hear Everything (BEE): Audio Scene Reconstruction by Sparse Audio-Visual Samples", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "7819-7828", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250484"} @inproceedings{bb255673, AUTHOR = "Xie, H.X. and Lee, M.X. and Chen, T.J. and Chen, H.J. and Liu, H.I. and Shuai, H.H. and Cheng, W.H.", TITLE = "Most Important Person-guided Dual-branch Cross-Patch Attention for Group Affect Recognition", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "20541-20551", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250485"} @inproceedings{bb255674, AUTHOR = "Djilali, Y.A.D. and Narayan, S. and Boussaid, H. and Almazrouei, E. and Debbah, M.", TITLE = "Lip2Vec: Efficient and Robust Visual Speech Recognition via Latent-to-Latent Visual to Audio Representation Mapping", BOOKTITLE = ICCV23, YEAR = "2023", PAGES = "13744-13755", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250486"} @inproceedings{bb255675, AUTHOR = "Chen, G.Y. and Zhang, D. and Liu, T. and Du, X.Y.", TITLE = "Local-Global Contrast for Learning Voice-Face Representations", BOOKTITLE = ICIP23, YEAR = "2023", PAGES = "51-55", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250487"} @inproceedings{bb255676, AUTHOR = "Hong, J. and Kim, M. and Choi, J. and Ro, Y.M.", TITLE = "Watch or Listen: Robust Audio-Visual Speech Recognition with Visual Corruption Modeling and Reliability Scoring", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18783-18794", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250488"} @inproceedings{bb255677, AUTHOR = "Gao, J.Y. and Chen, M.Y. and Xu, C.S.", TITLE = "Collecting Cross-Modal Presence-Absence Evidence for Weakly-Supervised Audio-Visual Event Perception", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "18827-18836", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250489"} @inproceedings{bb255678, AUTHOR = "Porgali, B. and Albiero, V. and Ryda, J. and Ferrer, C.C. and Hazirbas, C.", TITLE = "The Casual Conversations v2 Dataset: A diverse, large benchmark for measuring fairness and robustness in audio/vision/speech models", BOOKTITLE = FaDE-TCV23, YEAR = "2023", PAGES = "10-17", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250490"} @inproceedings{bb255679, AUTHOR = "Xiong, J.W. and Wang, G. and Zhang, P. and Huang, W. and Zha, Y.F. and Zhai, G.T.", TITLE = "CASP-Net: Rethinking Video Saliency Prediction from an Audio-Visual Consistency Perceptual Perspective", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "6441-6450", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250491"} @inproceedings{bb255680, AUTHOR = "Huang, C. and Tian, Y. and Kumar, A. and Xu, C.L.", TITLE = "Egocentric Audio-Visual Object Localization", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22910-22921", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250492"} @inproceedings{bb255681, AUTHOR = "Liao, J. and Duan, H. and Feng, K. and Zhao, W.B. and Yang, Y.B. and Chen, L.Y.", TITLE = "A Light Weight Model for Active Speaker Detection", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22932-22941", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250493"} @inproceedings{bb255682, AUTHOR = "Seo, P.H. and Nagrani, A. and Schmid, C.", TITLE = "AVFormer: Injecting Vision into Frozen Speech Models for Zero-Shot AV-ASR", BOOKTITLE = CVPR23, YEAR = "2023", PAGES = "22922-22931", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250494"} @inproceedings{bb255683, AUTHOR = "Feng, D. and Yang, S. and Shan, S.G. and Chen, X.L.", TITLE = "Audio-Driven Deformation Flow for Effective Lip Reading", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "274-280", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250495"} @inproceedings{bb255684, AUTHOR = "Varshney, M. and Yadav, R. and Namboodiri, V.P. and Hegde, R.M.", TITLE = "Learning Speaker-specific Lip-to-Speech Generation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "491-498", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250496"} @inproceedings{bb255685, AUTHOR = "Shi, C. and Yang, S.", TITLE = "Spatial and Visual Perspective-Taking via View Rotation and Relation Reasoning for Embodied Reference Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:201-218", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250497"} @inproceedings{bb255686, AUTHOR = "Hayes, T. and Zhang, S.Y. and Yin, X. and Pang, G. and Sheng, S. and Yang, H. and Ge, S.W. and Hu, Q.Y. and Parikh, D.", TITLE = "MUGEN: A Playground for Video-Audio-Text Multimodal Understanding and GENeration", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "VIII:431-449", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250498"} @inproceedings{bb255687, AUTHOR = "van Horn, G. and Qian, R. and Wilber, K. and Adam, H. and Aodha, O.M. and Belongie, S.", TITLE = "Exploring Fine-Grained Audiovisual Categorization with the SSW60 Dataset", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "VIII:271-289", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250499"} @inproceedings{bb255688, AUTHOR = "Yu, S. and Wu, P. and Liang, P.P. and Salakhutdinov, R. and Morency, L.P.", TITLE = "PACS: A Dataset for Physical Audiovisual CommonSense Reasoning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:292-309", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250500"} @inproceedings{bb255689, AUTHOR = "Cheng, H.Y. and Liu, Z.Y. and Zhou, H. and Qian, C. and Wu, W. and Wang, L.M.", TITLE = "Joint-Modal Label Denoising for Weakly-Supervised Audio-Visual Video Parsing", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:431-448", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250501"} @inproceedings{bb255690, AUTHOR = "Zhang, Z.Q. and Zhang, J. and Zhang, J.S. and Wu, M.H. and Fang, X. and Dai, L.R.", TITLE = "Learning Contextually Fused Audio-Visual Representations for Audio-Visual Speech Recognition", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1346-1350", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250502"} @inproceedings{bb255691, AUTHOR = "Mo, S.T. and Morgado, P.", TITLE = "Localizing Visual Sounds the Easy Way", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:218-234", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250503"} @inproceedings{bb255692, AUTHOR = "Montesinos, J.F. and Kadandale, V.S. and Haro, G.", TITLE = "VoViT: Low Latency Graph-Based Audio-Visual Voice Separation Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:310-326", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250504"} @inproceedings{bb255693, AUTHOR = "Tzinis, E. and Wisdom, S. and Remez, T. and Hershey, J.R.", TITLE = "AudioScopeV2: Audio-Visual Attention Architectures for Calibrated Open-Domain On-Screen Sound Separation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:368-385", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250505"} @inproceedings{bb255694, AUTHOR = "Zhou, J.X. and Wang, J.Y. and Zhang, J.Y. and Sun, W.X. and Zhang, J. and Birchfield, S. and Guo, D. and Kong, L.P. and Wang, M. and Zhong, Y.", TITLE = "Audio-Visual Segmentation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:386-403", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250506"} @inproceedings{bb255695, AUTHOR = "Alcazar, J.L. and Cordes, M. and Zhao, C. and Ghanem, B.", TITLE = "End-to-End Active Speaker Detection", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:126-143", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250507"} @inproceedings{bb255696, AUTHOR = "Chen, C.G. and Gao, R.H. and Calamia, P. and Grauman, K.", TITLE = "Visual Acoustic Matching", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18836-18846", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250508"} @inproceedings{bb255697, AUTHOR = "Lee, S. and Kim, H.I. and Ro, Y.M.", TITLE = "Weakly Paired Associative Learning for Sound and Image Representations via Bimodal Associative Memory", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10524-10533", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250509"} @inproceedings{bb255698, AUTHOR = "Vasudevan, A.B. and Dai, D.X. and Van Gool, L.J.", TITLE = "Sound and Visual Representation Learning with Multiple Pretraining Tasks", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14596-14606", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250510"} @inproceedings{bb255699, AUTHOR = "Xia, Y. and Zhao, Z.", TITLE = "Cross-modal Background Suppression for Audio-Visual Event Localization", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19957-19966", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250511"}