@article{bb241300, AUTHOR = "Heracleous, P. and Aboutabit, N. and Beautemps, D.", TITLE = "Lip Shape and Hand Position Fusion for Automatic Vowel Recognition in Cued Speech for French", JOURNAL = SPLetters, VOLUME = "16", YEAR = "2009", NUMBER = "5", MONTH = "May", PAGES = "339-342", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236211"} @article{bb241301, AUTHOR = "Zhang, C. and Yin, P. and Rui, Y. and Cutler, R. and Viola, P. and Sun, X.D. and Pinto, N. and Zhang, Z.Y.", TITLE = "Boosting-Based Multimodal Speaker Detection for Distributed Meeting Videos", JOURNAL = MultMed, VOLUME = "10", YEAR = "2008", NUMBER = "8", MONTH = "December", PAGES = "1541-1552", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236212"} @article{bb241302, AUTHOR = "Lee, J.S. and Park, C.H.", TITLE = "Robust Audio-Visual Speech Recognition Based on Late Integration", JOURNAL = MultMed, VOLUME = "10", YEAR = "2008", NUMBER = "5", MONTH = "August", PAGES = "767-779", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236213"} @article{bb241303, AUTHOR = "Saenko, K. and Livescu, K. and Glass, J. and Darrell, T.J.", TITLE = "Multistream Articulatory Feature-Based Models for Visual Speech Recognition", JOURNAL = PAMI, VOLUME = "31", YEAR = "2009", NUMBER = "9", MONTH = "September", PAGES = "1700-1707", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236214"} @inproceedings{bb241304, AUTHOR = "Saenko, K. and Livescu, K. and Siracusa, M. and Wilson, K. and Glass, J. and Darrell, T.J.", TITLE = "Visual Speech Recognition with Loosely Synchronized Feature Streams", BOOKTITLE = ICCV05, YEAR = "2005", PAGES = "II: 1424-1431", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236215"} @article{bb241305, AUTHOR = "Schuller, B. and Muller, R. and Eyben, F. and Gast, J. and Hornler, B. and Wollmer, M. and Rigoll, G. and Hothker, A. and Konosu, H.", TITLE = "Being bored? Recognising natural interest by extensive audiovisual integration for real-life application", JOURNAL = IVC, VOLUME = "27", YEAR = "2009", NUMBER = "12", MONTH = "November", PAGES = "1760-1774", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236216"} @inproceedings{bb241306, AUTHOR = "Eyben, F. and Wollmer, M. and Valstar, M.F. and Gunes, H. and Schuller, B. and Pantic, M.", TITLE = "String-based audiovisual fusion of behavioural events for the assessment of dimensional affect", BOOKTITLE = FG11, YEAR = "2011", PAGES = "322-329", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236217"} @inproceedings{bb241307, AUTHOR = "Althoff, F. and McGlaun, G. and Lang, M.K. and Rigoll, G.", TITLE = "Evaluating Multimodal Interaction Patterns in Various Application Scenarios", BOOKTITLE = GW03, YEAR = "2003", PAGES = "421-435", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236218"} @article{bb241308, AUTHOR = "Casanovas, A.L. and Monaci, G. and Vandergheynst, P. and Gribonval, R.", TITLE = "Blind Audiovisual Source Separation Based on Sparse Redundant Representations", JOURNAL = MultMed, VOLUME = "12", YEAR = "2010", NUMBER = "5", PAGES = "358-371", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236219"} @inproceedings{bb241309, AUTHOR = "Casanovas, A.L. and Monaci, G. and Vandergheynst, P.", TITLE = "Blind Audiovisual Source Separation using Sparse Representations", BOOKTITLE = ICIP07, YEAR = "2007", PAGES = "III: 301-304", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236220"} @article{bb241310, AUTHOR = "Esch, J.", TITLE = "Audiovisual Information Fusion in Human-Computer Interfaces and Intelligent Environments: A Survey", JOURNAL = PIEEE, VOLUME = "98", YEAR = "2010", NUMBER = "10", MONTH = "October", PAGES = "1690-1691", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236221"} @article{bb241311, AUTHOR = "Shivappa, S.T. and Trivedi, M.M. and Rao, B.D.", TITLE = "Audiovisual Information Fusion in Human-Computer Interfaces and Intelligent Environments: A Survey", JOURNAL = PIEEE, VOLUME = "98", YEAR = "2010", NUMBER = "10", MONTH = "October", PAGES = "1692-1715", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236222"} @article{bb241312, AUTHOR = "Claussen, H. and Rosca, J. and Damper, R.I.", TITLE = "Signature extraction using mutual interdependencies", JOURNAL = PR, VOLUME = "44", YEAR = "2011", NUMBER = "3", MONTH = "March", PAGES = "650-661", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236223"} @inproceedings{bb241313, AUTHOR = "Higgins, J.E. and Damper, R.I.", TITLE = "An HMM-Based Subband Processing Approach to Speaker Identification", BOOKTITLE = AVBPA01, YEAR = "2001", PAGES = "169", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236224"} @article{bb241314, AUTHOR = "El Sallam, A.A. and Mian, A.S.", TITLE = "Correlation based speech-video synchronization", JOURNAL = PRL, VOLUME = "32", YEAR = "2011", NUMBER = "6", MONTH = "April", PAGES = "780-786", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236225"} @inproceedings{bb241315, AUTHOR = "El Sallam, A.A. and Mian, A.S.", TITLE = "Speech-Video Synchronization Using Lips Movements and Speech Envelope Correlation", BOOKTITLE = ICIAR09, YEAR = "2009", PAGES = "397-407", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236226"} @article{bb241316, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Audiovisual Discrimination Between Speech and Laughter: Why and When Visual Information Might Help", JOURNAL = MultMed, VOLUME = "13", YEAR = "2011", NUMBER = "2", PAGES = "216-234", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236227"} @article{bb241317, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Prediction-Based Audiovisual Fusion for Classification of Non-Linguistic Vocalisations", JOURNAL = AffCom, VOLUME = "7", YEAR = "2016", NUMBER = "1", MONTH = "January", PAGES = "45-58", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236228"} @inproceedings{bb241318, AUTHOR = "Petridis, S. and Pantic, M.", TITLE = "Fusion of audio and visual cues for laughter detection", BOOKTITLE = CIVR08, YEAR = "2008", PAGES = "329-338", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236229"} @inproceedings{bb241319, AUTHOR = "Petridis, S. and Pantic, M. and Cohn, J.F.", TITLE = "Prediction-based classification for audiovisual discrimination between laughter and speech", BOOKTITLE = FG11, YEAR = "2011", PAGES = "619-626", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236230"} @article{bb241320, AUTHOR = "Moustakas, K. and Tzovaras, D. and Dybkjaer, L. and Bernsen, N. and Aran, O.", TITLE = "Using Modality Replacement to Facilitate Communication between Visually and Hearing-Impaired People", JOURNAL = MultMedMag, VOLUME = "18", YEAR = "2011", NUMBER = "2", MONTH = "April", PAGES = "26-37", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236231"} @article{bb241321, AUTHOR = "Tariquzzaman, M. and Kim, J.Y. and Na, S.Y. and Kim, H.G. and Har, D.S.", TITLE = "A Visual Signal Reliability for Robust Audio-Visual Speaker Identification", JOURNAL = IEICE, VOLUME = "E94-D", YEAR = "2011", NUMBER = "10", MONTH = "October", PAGES = "2052-2055", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236232"} @article{bb241322, AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.", TITLE = "Efficient video coding based on audio-visual focus of attention", JOURNAL = JVCIR, VOLUME = "22", YEAR = "2011", NUMBER = "8", MONTH = "November", PAGES = "704-711", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236233"} @article{bb241323, AUTHOR = "Tiawongsombat, P. and Jeong, M.H. and Yun, J.S. and You, B.J. and Oh, S.R.", TITLE = "Robust visual speakingness detection using bi-level HMM", JOURNAL = PR, VOLUME = "45", YEAR = "2012", NUMBER = "2", MONTH = "February", PAGES = "783-793", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236234"} @article{bb241324, AUTHOR = "Noulas, A. and Englebienne, G. and Krose, B.J.A.", TITLE = "Multimodal Speaker Diarization", JOURNAL = PAMI, VOLUME = "34", YEAR = "2012", NUMBER = "1", MONTH = "January", PAGES = "79-93", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236235"} @article{bb241325, AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.", TITLE = "Voice activity detection and speaker localization using audiovisual cues", JOURNAL = PRL, VOLUME = "33", YEAR = "2012", NUMBER = "4", MONTH = "March", PAGES = "373-380", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236236"} @inproceedings{bb241326, AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.", TITLE = "Audiovisual voice activity detection using off-the-shelf cameras", BOOKTITLE = ICIP15, YEAR = "2015", PAGES = "3886-3890", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236237"} @article{bb241327, AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.", TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using Mid-Fusion of SVM and HMMs", JOURNAL = MultMed, VOLUME = "16", YEAR = "2014", NUMBER = "4", MONTH = "June", PAGES = "1032-1044", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236238"} @article{bb241328, AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.", TITLE = "Multimodal Multi-Channel On-Line Speaker Diarization Using Sensor Fusion Through SVM", JOURNAL = MultMed, VOLUME = "17", YEAR = "2015", NUMBER = "10", MONTH = "October", PAGES = "1694-1705", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236239"} @article{bb241329, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Output-associative RVM regression for dimensional and continuous emotion prediction", JOURNAL = IVC, VOLUME = "30", YEAR = "2012", NUMBER = "3", MONTH = "March", PAGES = "186-196", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236240"} @inproceedings{bb241330, AUTHOR = "Pantic, M. and Gunes, H. and Nicolaou, M.A.", TITLE = "Output-associative RVM regression for dimensional and continuous emotion prediction", BOOKTITLE = FG11, YEAR = "2011", PAGES = "16-23", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236240"} @inproceedings{bb241331, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Designing frameworks for automatic affect prediction and classification in dimensional space", BOOKTITLE = Gesture11, YEAR = "2011", PAGES = "20-26", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236241"} @article{bb241332, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Continuous Prediction of Spontaneous Affect from Multiple Cues and Modalities in Valence-Arousal Space", JOURNAL = AffCom, VOLUME = "2", YEAR = "2011", NUMBER = "2", PAGES = "92-105", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236242"} @inproceedings{bb241333, AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.", TITLE = "Audio-Visual Classification and Fusion of Spontaneous Affective Data in Likelihood Space", BOOKTITLE = ICPR10, YEAR = "2010", PAGES = "3695-3699", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236243"} @article{bb241334, AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.", TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behavior and Fusion of Continuous Annotations", JOURNAL = PAMI, VOLUME = "36", YEAR = "2014", NUMBER = "7", MONTH = "July", PAGES = "1299-1311", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236244"} @inproceedings{bb241335, AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.", TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behaviour", BOOKTITLE = ECCV12, YEAR = "2012", PAGES = "VII: 98-111", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236245"} @article{bb241336, AUTHOR = "Wang, L.J. and Qian, Y. and Scott, M.R. and Chen, G. and Soong, F.K.", TITLE = "Computer-Assisted Audiovisual Language Learning", JOURNAL = Computer, VOLUME = "45", YEAR = "2012", NUMBER = "6", MONTH = "June", PAGES = "38-47", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236246"} @article{bb241337, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Chi, Z. and Feng, D.D.", TITLE = "Realistic Human Action Recognition with Multimodal Feature Selection and Fusion", JOURNAL = SMCS, VOLUME = "43", YEAR = "2013", NUMBER = "4", PAGES = "875-885", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236247"} @article{bb241338, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Xia, Y. and Kang, W.X. and Feng, D.D.", TITLE = "Discriminative two-level feature selection for realistic human action recognition", JOURNAL = JVCIR, VOLUME = "24", YEAR = "2013", NUMBER = "7", PAGES = "1064-1074", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236248"} @inproceedings{bb241339, AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Feng, D.D.", TITLE = "Realistic Human Action Recognition with Audio Context", BOOKTITLE = DICTA10, YEAR = "2010", PAGES = "288-293", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236249"} @inproceedings{bb241340, AUTHOR = "Wu, Q.X. and Lu, S.Y. and Wang, Z.Y. and Deng, F.Q. and Kang, W.X. and Feng, D.D.", TITLE = "Structure Context of Local Features in Realistic Human Action Recognition", BOOKTITLE = VECTaR11, YEAR = "2011", PAGES = "1496-1501", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236250"} @article{bb241341, AUTHOR = "Mirzaei, M.R. and Ghorshi, S. and Mortazavi, M.", TITLE = "Audio-visual speech recognition techniques in augmented reality environments", JOURNAL = VC, VOLUME = "30", YEAR = "2014", NUMBER = "3", MONTH = "March", PAGES = "245-257", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236251"} @article{bb241342, AUTHOR = "Bredin, H. and Roy, A. and Le, V.B. and Barras, C.", TITLE = "Person instance graphs for mono-, cross- and multi-modal person recognition in multimedia data: application to speaker identification in TV broadcast", JOURNAL = MultInfoRetr, VOLUME = "3", YEAR = "2014", NUMBER = "3", MONTH = "September", PAGES = "161-175", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236252"} @article{bb241343, AUTHOR = "Ozasa, Y. and Nakano, M. and Ariki, Y. and Iwahashi, N.", TITLE = "Discriminating Unknown Objects from Known Objects Using Image and Speech Information", JOURNAL = IEICE, VOLUME = "E98-D", YEAR = "2015", NUMBER = "3", MONTH = "March", PAGES = "704-711", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236253"} @inproceedings{bb241344, AUTHOR = "Ozasa, Y. and Ariki, Y. and Nakano, M. and Iwahashi, N.", TITLE = "Disambiguation in Unknown Object Detection by Integrating Image and Speech Recognition Confidences", BOOKTITLE = ACCV12, YEAR = "2012", PAGES = "I:85-96", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236254"} @inproceedings{bb241345, AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.", TITLE = "Selection of Unknown Objects Specified by Speech Using Models Constructed from Web Images", BOOKTITLE = ICPR14, YEAR = "2014", PAGES = "477-482", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236255"} @inproceedings{bb241346, AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.", TITLE = "Object Recognition by Integrated Information Using Web Images", BOOKTITLE = ACPR13, YEAR = "2013", PAGES = "657-661", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236256"} @inproceedings{bb241347, AUTHOR = "Ozasa, Y. and Enami, N. and Ariki, Y.", TITLE = "Color saliency for object identification", BOOKTITLE = FCV15, YEAR = "2015", PAGES = "1-5", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236257"} @article{bb241348, AUTHOR = "Harte, N. and Gillen, E.", TITLE = "TCD-TIMIT: An Audio-Visual Corpus of Continuous Speech", JOURNAL = MultMed, VOLUME = "17", YEAR = "2015", NUMBER = "5", MONTH = "May", PAGES = "603-615", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236258"} @article{bb241349, AUTHOR = "Katsaggelos, A.K. and Bahaadini, S. and Molina, R.", TITLE = "Audiovisual Fusion: Challenges and New Approaches", JOURNAL = PIEEE, VOLUME = "103", YEAR = "2015", NUMBER = "9", MONTH = "September", PAGES = "1635-1653", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236259"} @article{bb241350, AUTHOR = "Mezai, L. and Hachouf, F.", TITLE = "Score-Level Fusion of Face and Voice Using Particle Swarm Optimization and Belief Functions", JOURNAL = HMS, VOLUME = "45", YEAR = "2015", NUMBER = "6", MONTH = "December", PAGES = "761-772", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236260"} @article{bb241351, AUTHOR = "Wu, P. and Liu, H. and Li, X. and Fan, T. and Zhang, X.", TITLE = "A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on Adaptive Decision Fusion", JOURNAL = MultMed, VOLUME = "18", YEAR = "2016", NUMBER = "3", MONTH = "March", PAGES = "326-338", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236261"} @article{bb241352, AUTHOR = "Dilpazir, H. and Muhammad, Z. and Minhas, Q. and Ahmed, F. and Malik, H. and Mahmood, H.", TITLE = "Multivariate mutual information for audio video fusion", JOURNAL = SIViP, VOLUME = "10", YEAR = "2016", NUMBER = "7", MONTH = "October", PAGES = "1265-1272", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236262"} @article{bb241353, AUTHOR = "Beyan, C. and Capozzi, F. and Becchio, C. and Murino, V.", TITLE = "Prediction of the Leadership Style of an Emergent Leader Using Audio and Visual Nonverbal Features", JOURNAL = MultMed, VOLUME = "20", YEAR = "2018", NUMBER = "2", MONTH = "February", PAGES = "441-456", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236263"} @article{bb241354, AUTHOR = "Fernandez Lopez, A. and Sukno, F.M.", TITLE = "Survey on automatic lip-reading in the era of deep learning", JOURNAL = IVC, VOLUME = "78", YEAR = "2018", PAGES = "53-72", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236264"} @article{bb241355, AUTHOR = "Stafylakis, T. and Khan, M.H. and Tzimiropoulos, G.", TITLE = "Pushing the boundaries of audiovisual word recognition using Residual Networks and LSTMs", JOURNAL = CVIU, VOLUME = "176-177", YEAR = "2018", PAGES = "22-32", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236265"} @inproceedings{bb241356, AUTHOR = "Stafylakis, T. and Tzimiropoulos, G.", TITLE = "Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild", BOOKTITLE = ECCV18, YEAR = "2018", PAGES = "II: 536-552", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236266"} @article{bb241357, AUTHOR = "Liu, X. and Geng, J.J. and Ling, H.B. and Cheung, Y.M.", TITLE = "Attention guided deep audio-face fusion for efficient speaker naming", JOURNAL = PR, VOLUME = "88", YEAR = "2019", PAGES = "557-568", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236267"} @article{bb241358, AUTHOR = "Tsiami, A. and Koutras, P. and Katsamanis, A. and Vatakis, A. and Maragos, P.", TITLE = "A behaviorally inspired fusion approach for computational audiovisual saliency modeling", JOURNAL = SP:IC, VOLUME = "76", YEAR = "2019", PAGES = "186-200", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236268"} @article{bb241359, AUTHOR = "Hsiao, S. and Sun, H. and Hsieh, M. and Tsai, M. and Tsao, Y. and Lee, C.", TITLE = "Toward Automating Oral Presentation Scoring During Principal Certification Program Using Audio-Video Low-Level Behavior Profiles", JOURNAL = AffCom, VOLUME = "10", YEAR = "2019", NUMBER = "4", MONTH = "October", PAGES = "552-567", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236269"} @article{bb241360, AUTHOR = "Ma, Y. and Hong, H. and Li, H. and Zhao, H. and Li, Y.S. and Sun, L. and Gu, C. and Zhu, X.H.", TITLE = "Non-Contact Speech Recovery Technology Using a 24 GHz Portable Auditory Radar and Webcam", JOURNAL = RS, VOLUME = "12", YEAR = "2020", NUMBER = "4", PAGES = "xx-yy", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236270"} @inproceedings{bb241361, AUTHOR = "Xu, B. and Wang, J. and Lu, C. and Guo, Y.", TITLE = "Watch to Listen Clearly: Visual Speech Enhancement Driven Multi-modality Speech Recognition", BOOKTITLE = WACV20, YEAR = "2020", PAGES = "1626-1635", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236271"} @article{bb241362, AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.", TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank and Kernelized Sparsity", JOURNAL = SPLetters, VOLUME = "27", YEAR = "2020", PAGES = "865-869", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236272"} @article{bb241363, AUTHOR = "Tao, F. and Busso, C.", TITLE = "End-to-End Audiovisual Speech Recognition System With Multitask Learning", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "1-11", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236273"} @article{bb241364, AUTHOR = "Liu, L. and Feng, G. and Beautemps, D. and Zhang, X.P.", TITLE = "Re-Synchronization Using the Hand Preceding Model for Multi-Modal Fusion in Automatic Continuous Cued Speech Recognition", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "292-305", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236274"} @article{bb241365, AUTHOR = "Beyan, C. and Shahid, M. and Murino, V.", TITLE = "RealVAD: A Real-World Dataset and A Method for Voice Activity Detection by Body Motion Analysis", JOURNAL = MultMed, VOLUME = "23", YEAR = "2021", PAGES = "2071-2085", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236275"} @article{bb241366, AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.", TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling Factor Estimation", JOURNAL = SPLetters, VOLUME = "28", YEAR = "2021", PAGES = "1405-1409", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236276"} @article{bb241367, AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.", TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching", JOURNAL = MultMed, VOLUME = "24", YEAR = "2022", PAGES = "338-351", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236277"} @article{bb241368, AUTHOR = "Xu, J.H. and Zhang, B. and Wang, Z.Y. and Wang, Y. and Chen, F. and Gao, J.B. and Feng, D.D.", TITLE = "Affective Audio Annotation of Public Speeches with Convolutional Clustering Neural Network", JOURNAL = AffCom, VOLUME = "13", YEAR = "2022", NUMBER = "1", MONTH = "January", PAGES = "238-249", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236278"} @article{bb241369, AUTHOR = "Afouras, T. and Chung, J.S. and Senior, A. and Vinyals, O. and Zisserman, A.", TITLE = "Deep Audio-Visual Speech Recognition", JOURNAL = PAMI, VOLUME = "44", YEAR = "2022", NUMBER = "12", MONTH = "December", PAGES = "8717-8727", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236279"} @inproceedings{bb241370, AUTHOR = "Rahimi, A. and Afouras, T. and Zisserman, A.", TITLE = "Reading to Listen at the Cocktail Party: Multi-Modal Speech Separation", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10483-10492", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236280"} @article{bb241371, AUTHOR = "Narain, J. and Johnson, K.T. and Quatieri, T.F. and Picard, R.W. and Maes, P.", TITLE = "Modeling Real-World Affective and Communicative Nonverbal Vocalizations From Minimally Speaking Individuals", JOURNAL = AffCom, VOLUME = "13", YEAR = "2022", NUMBER = "4", MONTH = "October", PAGES = "2238-2253", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236281"} @article{bb241372, AUTHOR = "Gong, Y. and Liu, A.H. and Rouditchenko, A. and Glass, J.", TITLE = "UAVM: Towards Unifying Audio and Visual Models", JOURNAL = SPLetters, VOLUME = "29", YEAR = "2022", PAGES = "2437-2441", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236282"} @inproceedings{bb241373, AUTHOR = "Oya, T. and Iwase, S. and Morishima, S.", TITLE = "The Sound of Bounding-Boxes", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "9-15", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236283"} @article{bb241374, AUTHOR = "Zhou, J.X. and Guo, D. and Wang, M.", TITLE = "Contrastive Positive Sample Propagation Along the Audio-Visual Event Line", JOURNAL = PAMI, VOLUME = "45", YEAR = "2023", NUMBER = "6", MONTH = "June", PAGES = "7239-7257", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236284"} @inproceedings{bb241375, AUTHOR = "Zhou, J.X. and Zheng, L. and Zhong, Y. and Hao, S.J. and Wang, M.", TITLE = "Positive Sample Propagation along the Audio-Visual Event Line", BOOKTITLE = CVPR21, YEAR = "2021", PAGES = "8432-8440", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236285"} @inproceedings{bb241376, AUTHOR = "Feng, D. and Yang, S. and Shan, S.G. and Chen, X.L.", TITLE = "Audio-Driven Deformation Flow for Effective Lip Reading", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "274-280", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236286"} @inproceedings{bb241377, AUTHOR = "Varshney, M. and Yadav, R. and Namboodiri, V.P. and Hegde, R.M.", TITLE = "Learning Speaker-specific Lip-to-Speech Generation", BOOKTITLE = "ICPR22", YEAR = "2022", PAGES = "491-498", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236287"} @inproceedings{bb241378, AUTHOR = "Shi, C. and Yang, S.", TITLE = "Spatial and Visual Perspective-Taking via View Rotation and Relation Reasoning for Embodied Reference Understanding", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVI:201-218", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236288"} @inproceedings{bb241379, AUTHOR = "Hayes, T. and Zhang, S. and Yin, X. and Pang, G. and Sheng, S. and Yang, H. and Ge, S. and Hu, Q.Y. and Parikh, D.", TITLE = "MUGEN: A Playground for Video-Audio-Text Multimodal Understanding and GENeration", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "VIII:431-449", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236289"} @inproceedings{bb241380, AUTHOR = "van Horn, G. and Qian, R. and Wilber, K. and Adam, H. and Aodha, O.M. and Belongie, S.", TITLE = "Exploring Fine-Grained Audiovisual Categorization with the SSW60 Dataset", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "VIII:271-289", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236290"} @inproceedings{bb241381, AUTHOR = "Yu, S. and Wu, P. and Liang, P.P. and Salakhutdinov, R. and Morency, L.P.", TITLE = "PACS: A Dataset for Physical Audiovisual CommonSense Reasoning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:292-309", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236291"} @inproceedings{bb241382, AUTHOR = "Cheng, H.Y. and Liu, Z.Y. and Zhou, H. and Qian, C. and Wu, W. and Wang, L.M.", TITLE = "Joint-Modal Label Denoising for Weakly-Supervised Audio-Visual Video Parsing", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXIV:431-448", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236292"} @inproceedings{bb241383, AUTHOR = "Zhang, Z.Q. and Zhang, J. and Zhang, J.S. and Wu, M.H. and Fang, X. and Dai, L.R.", TITLE = "Learning Contextually Fused Audio-Visual Representations for Audio-Visual Speech Recognition", BOOKTITLE = ICIP22, YEAR = "2022", PAGES = "1346-1350", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236293"} @inproceedings{bb241384, AUTHOR = "Mo, S.T. and Morgado, P.", TITLE = "Localizing Visual Sounds the Easy Way", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:218-234", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236294"} @inproceedings{bb241385, AUTHOR = "Montesinos, J.F. and Kadandale, V.S. and Haro, G.", TITLE = "VoViT: Low Latency Graph-Based Audio-Visual Voice Separation Transformer", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:310-326", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236295"} @inproceedings{bb241386, AUTHOR = "Tzinis, E. and Wisdom, S. and Remez, T. and Hershey, J.R.", TITLE = "AudioScopeV2: Audio-Visual Attention Architectures for Calibrated Open-Domain On-Screen Sound Separation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:368-385", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236296"} @inproceedings{bb241387, AUTHOR = "Zhou, J.X. and Wang, J. and Zhang, J.Y. and Sun, W. and Zhang, J. and Birchfield, S. and Guo, D. and Kong, L. and Wang, M. and Zhong, Y.", TITLE = "Audio-Visual Segmentation", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:386-403", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236297"} @inproceedings{bb241388, AUTHOR = "Alcazar, J.L. and Cordes, M. and Zhao, C. and Ghanem, B.", TITLE = "End-to-End Active Speaker Detection", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XXXVII:126-143", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236298"} @inproceedings{bb241389, AUTHOR = "Chen, C.G. and Gao, R.H. and Calamia, P. and Grauman, K.", TITLE = "Visual Acoustic Matching", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "18836-18846", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236299"} @inproceedings{bb241390, AUTHOR = "Lee, S. and Kim, H.I. and Ro, Y.M.", TITLE = "Weakly Paired Associative Learning for Sound and Image Representations via Bimodal Associative Memory", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10524-10533", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236300"} @inproceedings{bb241391, AUTHOR = "Vasudevan, A.B. and Dai, D.X. and Van Gool, L.J.", TITLE = "Sound and Visual Representation Learning with Multiple Pretraining Tasks", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "14596-14606", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236301"} @inproceedings{bb241392, AUTHOR = "Xia, Y. and Zhao, Z.", TITLE = "Cross-modal Background Suppression for Audio-Visual Event Localization", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "19957-19966", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236302"} @inproceedings{bb241393, AUTHOR = "Jiang, H. and Murdock, C. and Ithapu, V.K.", TITLE = "Egocentric Deep Multi-Channel Audio-Visual Active Speaker Localization", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10534-10542", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236303"} @inproceedings{bb241394, AUTHOR = "Ng, E. and Joo, H. and Hu, L.W. and Li, H. and Darrell, T.J. and Kanazawa, A. and Ginosar, S.", TITLE = "Learning to Listen: Modeling Non-Deterministic Dyadic Facial Motion", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "20363-20373", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236304"} @inproceedings{bb241395, AUTHOR = "Mercea, O.B. and Hummel, T. and Koepke, A.S. and Akata, Z.", TITLE = "Temporal and Cross-modal Attention for Audio-Visual Zero-Shot Learning", BOOKTITLE = ECCV22, YEAR = "2022", PAGES = "XX:488-505", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236305"} @inproceedings{bb241396, AUTHOR = "Mercea, O.B. and Riesch, L. and Koepke, A.S. and Akata, Z.", TITLE = "Audiovisual Generalised Zero-shot Learning with Cross-modal Attention and Language", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "10543-10553", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236306"} @inproceedings{bb241397, AUTHOR = "Karas, V. and Tellamekala, M.K. and Mallol Ragolta, A. and Valstar, M. and Schuller, B.W.", TITLE = "Time-Continuous Audiovisual Fusion with Recurrence vs Attention for In-The-Wild Affect Recognition", BOOKTITLE = ABAW22, YEAR = "2022", PAGES = "2381-2390", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236307"} @inproceedings{bb241398, AUTHOR = "Yang, K. and Markovic, D. and Krenn, S. and Agrawal, V. and Richard, A.", TITLE = "Audio-Visual Speech Codecs: Rethinking Audio-Visual Speech Enhancement by Re-Synthesis", BOOKTITLE = CVPR22, YEAR = "2022", PAGES = "8217-8227", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236308"} @inproceedings{bb241399, AUTHOR = "Kim, M. and Hong, J. and Park, S.J. and Ro, Y.M.", TITLE = "Multi-modality Associative Bridging through Memory: Speech Sound Recollected from Face Video", BOOKTITLE = ICCV21, YEAR = "2021", PAGES = "296-306", BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT236309"}