@article{bb255600,
        AUTHOR = "Petridis, S. and Pantic, M.",
        TITLE = "Audiovisual Discrimination Between Speech and Laughter:
Why and When Visual Information Might Help",
        JOURNAL = MultMed,
        VOLUME = "13",
        YEAR = "2011",
        NUMBER = "2",
        PAGES = "216-234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250413"}

@article{bb255601,
        AUTHOR = "Petridis, S. and Pantic, M.",
        TITLE = "Prediction-Based Audiovisual Fusion for Classification of
Non-Linguistic Vocalisations",
        JOURNAL = AffCom,
        VOLUME = "7",
        YEAR = "2016",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "45-58",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250414"}

@inproceedings{bb255602,
        AUTHOR = "Petridis, S. and Pantic, M.",
        TITLE = "Fusion of audio and visual cues for laughter detection",
        BOOKTITLE = CIVR08,
        YEAR = "2008",
        PAGES = "329-338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250415"}

@inproceedings{bb255603,
        AUTHOR = "Petridis, S. and Pantic, M. and Cohn, J.F.",
        TITLE = "Prediction-based classification for audiovisual discrimination between
laughter and speech",
        BOOKTITLE = FG11,
        YEAR = "2011",
        PAGES = "619-626",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250416"}

@article{bb255604,
        AUTHOR = "Moustakas, K. and Tzovaras, D. and Dybkjaer, L. and Bernsen, N. and Aran, O.",
        TITLE = "Using Modality Replacement to Facilitate Communication between Visually
and Hearing-Impaired People",
        JOURNAL = MultMedMag,
        VOLUME = "18",
        YEAR = "2011",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "26-37",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250417"}

@article{bb255605,
        AUTHOR = "Tariquzzaman, M. and Kim, J.Y. and Na, S.Y. and Kim, H.G. and Har, D.S.",
        TITLE = "A Visual Signal Reliability for Robust Audio-Visual Speaker
Identification",
        JOURNAL = IEICE,
        VOLUME = "E94-D",
        YEAR = "2011",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "2052-2055",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250418"}

@article{bb255606,
        AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.",
        TITLE = "Efficient video coding based on audio-visual focus of attention",
        JOURNAL = JVCIR,
        VOLUME = "22",
        YEAR = "2011",
        NUMBER = "8",
        MONTH = "November",
        PAGES = "704-711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250419"}

@article{bb255607,
        AUTHOR = "Tiawongsombat, P. and Jeong, M.H. and Yun, J.S. and You, B.J. and Oh, S.R.",
        TITLE = "Robust visual speakingness detection using bi-level HMM",
        JOURNAL = PR,
        VOLUME = "45",
        YEAR = "2012",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "783-793",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250420"}

@article{bb255608,
        AUTHOR = "Noulas, A. and Englebienne, G. and Krose, B.J.A.",
        TITLE = "Multimodal Speaker Diarization",
        JOURNAL = PAMI,
        VOLUME = "34",
        YEAR = "2012",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "79-93",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250421"}

@article{bb255609,
        AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.",
        TITLE = "Voice activity detection and speaker localization using audiovisual
cues",
        JOURNAL = PRL,
        VOLUME = "33",
        YEAR = "2012",
        NUMBER = "4",
        MONTH = "March",
        PAGES = "373-380",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250422"}

@inproceedings{bb255610,
        AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.",
        TITLE = "Audiovisual voice activity detection using off-the-shelf cameras",
        BOOKTITLE = ICIP15,
        YEAR = "2015",
        PAGES = "3886-3890",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250423"}

@article{bb255611,
        AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.",
        TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using
Mid-Fusion of SVM and HMMs",
        JOURNAL = MultMed,
        VOLUME = "16",
        YEAR = "2014",
        NUMBER = "4",
        MONTH = "June",
        PAGES = "1032-1044",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250424"}

@article{bb255612,
        AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.",
        TITLE = "Multimodal Multi-Channel On-Line Speaker Diarization Using Sensor
Fusion Through SVM",
        JOURNAL = MultMed,
        VOLUME = "17",
        YEAR = "2015",
        NUMBER = "10",
        MONTH = "October",
        PAGES = "1694-1705",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250425"}

@article{bb255613,
        AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.",
        TITLE = "Output-associative RVM regression for dimensional and continuous
emotion prediction",
        JOURNAL = IVC,
        VOLUME = "30",
        YEAR = "2012",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "186-196",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250426"}

@inproceedings{bb255614,
        AUTHOR = "Pantic, M. and Gunes, H. and Nicolaou, M.A.",
        TITLE = "Output-associative RVM regression for dimensional and continuous
emotion prediction",
        BOOKTITLE = FG11,
        YEAR = "2011",
        PAGES = "16-23",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250426"}

@inproceedings{bb255615,
        AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.",
        TITLE = "Designing frameworks for automatic affect prediction and classification
in dimensional space",
        BOOKTITLE = Gesture11,
        YEAR = "2011",
        PAGES = "20-26",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250427"}

@article{bb255616,
        AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.",
        TITLE = "Continuous Prediction of Spontaneous Affect from Multiple Cues and
Modalities in Valence-Arousal Space",
        JOURNAL = AffCom,
        VOLUME = "2",
        YEAR = "2011",
        NUMBER = "2",
        PAGES = "92-105",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250428"}

@inproceedings{bb255617,
        AUTHOR = "Nicolaou, M.A. and Gunes, H. and Pantic, M.",
        TITLE = "Audio-Visual Classification and Fusion of Spontaneous Affective Data in
Likelihood Space",
        BOOKTITLE = ICPR10,
        YEAR = "2010",
        PAGES = "3695-3699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250429"}

@article{bb255618,
        AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.",
        TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behavior and
Fusion of Continuous Annotations",
        JOURNAL = PAMI,
        VOLUME = "36",
        YEAR = "2014",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1299-1311",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250430"}

@inproceedings{bb255619,
        AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.",
        TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behaviour",
        BOOKTITLE = ECCV12,
        YEAR = "2012",
        PAGES = "VII: 98-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250431"}

@article{bb255620,
        AUTHOR = "Wang, L.J. and Qian, Y. and Scott, M.R. and Chen, G. and Soong, F.K.",
        TITLE = "Computer-Assisted Audiovisual Language Learning",
        JOURNAL = Computer,
        VOLUME = "45",
        YEAR = "2012",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "38-47",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250432"}

@article{bb255621,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Chi, Z. and Feng, D.D.",
        TITLE = "Realistic Human Action Recognition with 
Multimodal Feature Selection and Fusion",
        JOURNAL = SMCS,
        VOLUME = "43",
        YEAR = "2013",
        NUMBER = "4",
        PAGES = "875-885",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250433"}

@article{bb255622,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Xia, Y. and Kang, W.X. and Feng, D.D.",
        TITLE = "Discriminative two-level feature selection for realistic human action
recognition",
        JOURNAL = JVCIR,
        VOLUME = "24",
        YEAR = "2013",
        NUMBER = "7",
        PAGES = "1064-1074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250434"}

@inproceedings{bb255623,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Feng, D.D.",
        TITLE = "Realistic Human Action Recognition with Audio Context",
        BOOKTITLE = DICTA10,
        YEAR = "2010",
        PAGES = "288-293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250435"}

@inproceedings{bb255624,
        AUTHOR = "Wu, Q.X. and Lu, S.Y. and Wang, Z.Y. and Deng, F.Q. and Kang, W.X. and Feng, D.D.",
        TITLE = "Structure Context of Local Features in Realistic Human Action
Recognition",
        BOOKTITLE = VECTaR11,
        YEAR = "2011",
        PAGES = "1496-1501",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250436"}

@article{bb255625,
        AUTHOR = "Mirzaei, M.R. and Ghorshi, S. and Mortazavi, M.",
        TITLE = "Audio-visual speech recognition techniques in augmented reality
environments",
        JOURNAL = VC,
        VOLUME = "30",
        YEAR = "2014",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "245-257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250437"}

@article{bb255626,
        AUTHOR = "Bredin, H. and Roy, A. and Le, V.B. and Barras, C.",
        TITLE = "Person instance graphs for mono-, cross- and multi-modal person
recognition in multimedia data: application to speaker identification
in TV broadcast",
        JOURNAL = MultInfoRetr,
        VOLUME = "3",
        YEAR = "2014",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "161-175",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250438"}

@article{bb255627,
        AUTHOR = "Ozasa, Y. and Nakano, M. and Ariki, Y. and Iwahashi, N.",
        TITLE = "Discriminating Unknown Objects from Known Objects Using Image and
Speech Information",
        JOURNAL = IEICE,
        VOLUME = "E98-D",
        YEAR = "2015",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "704-711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250439"}

@inproceedings{bb255628,
        AUTHOR = "Ozasa, Y. and Ariki, Y. and Nakano, M. and Iwahashi, N.",
        TITLE = "Disambiguation in Unknown Object Detection by Integrating Image and
Speech Recognition Confidences",
        BOOKTITLE = ACCV12,
        YEAR = "2012",
        PAGES = "I:85-96",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250440"}

@inproceedings{bb255629,
        AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.",
        TITLE = "Selection of Unknown Objects Specified by Speech Using Models
Constructed from Web Images",
        BOOKTITLE = ICPR14,
        YEAR = "2014",
        PAGES = "477-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250441"}

@inproceedings{bb255630,
        AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.",
        TITLE = "Object Recognition by Integrated Information Using Web Images",
        BOOKTITLE = ACPR13,
        YEAR = "2013",
        PAGES = "657-661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250442"}

@inproceedings{bb255631,
        AUTHOR = "Ozasa, Y. and Enami, N. and Ariki, Y.",
        TITLE = "Color saliency for object identification",
        BOOKTITLE = FCV15,
        YEAR = "2015",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250443"}

@article{bb255632,
        AUTHOR = "Harte, N. and Gillen, E.",
        TITLE = "TCD-TIMIT: An Audio-Visual Corpus of Continuous Speech",
        JOURNAL = MultMed,
        VOLUME = "17",
        YEAR = "2015",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "603-615",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250444"}

@article{bb255633,
        AUTHOR = "Katsaggelos, A.K. and Bahaadini, S. and Molina, R.",
        TITLE = "Audiovisual Fusion: Challenges and New Approaches",
        JOURNAL = PIEEE,
        VOLUME = "103",
        YEAR = "2015",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1635-1653",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250445"}

@article{bb255634,
        AUTHOR = "Mezai, L. and Hachouf, F.",
        TITLE = "Score-Level Fusion of Face and Voice Using Particle Swarm
Optimization and Belief Functions",
        JOURNAL = HMS,
        VOLUME = "45",
        YEAR = "2015",
        NUMBER = "6",
        MONTH = "December",
        PAGES = "761-772",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250446"}

@article{bb255635,
        AUTHOR = "Wu, P. and Liu, H. and Li, X. and Fan, T. and Zhang, X.",
        TITLE = "A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on
Adaptive Decision Fusion",
        JOURNAL = MultMed,
        VOLUME = "18",
        YEAR = "2016",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "326-338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250447"}

@article{bb255636,
        AUTHOR = "Dilpazir, H. and Muhammad, Z. and Minhas, Q. and Ahmed, F. and Malik, H. and Mahmood, H.",
        TITLE = "Multivariate mutual information for audio video fusion",
        JOURNAL = SIViP,
        VOLUME = "10",
        YEAR = "2016",
        NUMBER = "7",
        MONTH = "October",
        PAGES = "1265-1272",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250448"}

@article{bb255637,
        AUTHOR = "Beyan, C. and Capozzi, F. and Becchio, C. and Murino, V.",
        TITLE = "Prediction of the Leadership Style of an Emergent Leader Using Audio
and Visual Nonverbal Features",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "441-456",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250449"}

@article{bb255638,
        AUTHOR = "Fernandez Lopez, A. and Sukno, F.M.",
        TITLE = "Survey on automatic lip-reading in the era of deep learning",
        JOURNAL = IVC,
        VOLUME = "78",
        YEAR = "2018",
        PAGES = "53-72",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250450"}

@article{bb255639,
        AUTHOR = "Stafylakis, T. and Khan, M.H. and Tzimiropoulos, G.",
        TITLE = "Pushing the boundaries of audiovisual word recognition using Residual
Networks and LSTMs",
        JOURNAL = CVIU,
        VOLUME = "176-177",
        YEAR = "2018",
        PAGES = "22-32",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250451"}

@inproceedings{bb255640,
        AUTHOR = "Stafylakis, T. and Tzimiropoulos, G.",
        TITLE = "Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 536-552",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250452"}

@article{bb255641,
        AUTHOR = "Liu, X. and Geng, J.J. and Ling, H.B. and Cheung, Y.M.",
        TITLE = "Attention guided deep audio-face fusion for efficient speaker naming",
        JOURNAL = PR,
        VOLUME = "88",
        YEAR = "2019",
        PAGES = "557-568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250453"}

@article{bb255642,
        AUTHOR = "Tsiami, A. and Koutras, P. and Katsamanis, A. and Vatakis, A. and Maragos, P.",
        TITLE = "A behaviorally inspired fusion approach for computational audiovisual
saliency modeling",
        JOURNAL = SP:IC,
        VOLUME = "76",
        YEAR = "2019",
        PAGES = "186-200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250454"}

@article{bb255643,
        AUTHOR = "Hsiao, S. and Sun, H. and Hsieh, M. and Tsai, M. and Tsao, Y. and Lee, C.",
        TITLE = "Toward Automating Oral Presentation Scoring During Principal
Certification Program Using Audio-Video Low-Level Behavior Profiles",
        JOURNAL = AffCom,
        VOLUME = "10",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "552-567",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250455"}

@article{bb255644,
        AUTHOR = "Ma, Y. and Hong, H. and Li, H. and Zhao, H. and Li, Y.S. and Sun, L. and Gu, C. and Zhu, X.H.",
        TITLE = "Non-Contact Speech Recovery Technology Using a 24 GHz Portable
Auditory Radar and Webcam",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "4",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250456"}

@inproceedings{bb255645,
        AUTHOR = "Xu, B. and Wang, J. and Lu, C. and Guo, Y.",
        TITLE = "Watch to Listen Clearly: Visual Speech Enhancement Driven
Multi-modality Speech Recognition",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1626-1635",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250457"}

@article{bb255646,
        AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.",
        TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank
and Kernelized Sparsity",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "865-869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250458"}

@article{bb255647,
        AUTHOR = "Tao, F. and Busso, C.",
        TITLE = "End-to-End Audiovisual Speech Recognition System With Multitask
Learning",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "1-11",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250459"}

@article{bb255648,
        AUTHOR = "Liu, L. and Feng, G. and Beautemps, D. and Zhang, X.P.",
        TITLE = "Re-Synchronization Using the Hand Preceding Model for Multi-Modal
Fusion in Automatic Continuous Cued Speech Recognition",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "292-305",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250460"}

@article{bb255649,
        AUTHOR = "Beyan, C. and Shahid, M. and Murino, V.",
        TITLE = "RealVAD: A Real-World Dataset and A Method for Voice Activity
Detection by Body Motion Analysis",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2071-2085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250461"}

@article{bb255650,
        AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.",
        TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling
Factor Estimation",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1405-1409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250462"}

@article{bb255651,
        AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.",
        TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "338-351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250463"}

@article{bb255652,
        AUTHOR = "Xu, J.H. and Zhang, B. and Wang, Z.Y. and Wang, Y. and Chen, F. and Gao, J.B. and Feng, D.D.",
        TITLE = "Affective Audio Annotation of Public Speeches with Convolutional
Clustering Neural Network",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "238-249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250464"}

@article{bb255653,
        AUTHOR = "Afouras, T. and Chung, J.S. and Senior, A. and Vinyals, O. and Zisserman, A.",
        TITLE = "Deep Audio-Visual Speech Recognition",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8717-8727",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250465"}

@inproceedings{bb255654,
        AUTHOR = "Rahimi, A. and Afouras, T. and Zisserman, A.",
        TITLE = "Reading to Listen at the Cocktail Party:
Multi-Modal Speech Separation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10483-10492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250466"}

@article{bb255655,
        AUTHOR = "Narain, J. and Johnson, K.T. and Quatieri, T.F. and Picard, R.W. and Maes, P.",
        TITLE = "Modeling Real-World Affective and Communicative Nonverbal
Vocalizations From Minimally Speaking Individuals",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "2238-2253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250467"}

@article{bb255656,
        AUTHOR = "Gong, Y. and Liu, A.H. and Rouditchenko, A. and Glass, J.",
        TITLE = "UAVM: Towards Unifying Audio and Visual Models",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2437-2441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250468"}

@inproceedings{bb255657,
        AUTHOR = "Oya, T. and Iwase, S. and Morishima, S.",
        TITLE = "The Sound of Bounding-Boxes",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "9-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250469"}

@article{bb255658,
        AUTHOR = "Zhou, J.X. and Guo, D. and Wang, M.",
        TITLE = "Contrastive Positive Sample Propagation Along the Audio-Visual Event
Line",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7239-7257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250470"}

@inproceedings{bb255659,
        AUTHOR = "Zhou, J.X. and Zheng, L. and Zhong, Y. and Hao, S.J. and Wang, M.",
        TITLE = "Positive Sample Propagation along the Audio-Visual Event Line",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8432-8440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250471"}

@article{bb255660,
        AUTHOR = "Sen, T.K. and Naven, G. and Gerstner, L. and Bagley, D. and Baten, R.A. and Rahman, W. and Hasan, M.K. and Haut, K. and Mamun, A.A. and Samrose, S. and Solbu, A. and Barnes, R.E. and Frank, M.G. and Hoque, E.",
        TITLE = "DBATES: Dataset for Discerning Benefits of Audio, Textual, and Facial
Expression Features in Competitive Debate Speeches",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "1028-1043",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250472"}

@article{bb255661,
        AUTHOR = "Sharma, G. and Dhall, A. and Cai, J.F.",
        TITLE = "Audio-Visual Automatic Group Affect Analysis",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "1056-1069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250473"}

@article{bb255662,
        AUTHOR = "Cheng, W.L. and Tang, W. and Huang, Y. and Luo, Y. and Wang, L.",
        TITLE = "A Reconstruction-Based Visual-Acoustic-Semantic Embedding Method for
Speech-Image Retrieval",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "4067-4080",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250474"}

@article{bb255663,
        AUTHOR = "Kefalas, T. and Fotiadou, E. and Georgopoulos, M. and Panagakis, Y. and Ma, P.C. and Petridis, S. and Stafylakis, T. and Pantic, M.",
        TITLE = "KAN-AV dataset for audio-visual face and speech analysis in the wild",
        JOURNAL = IVC,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "104839",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250475"}

@article{bb255664,
        AUTHOR = "Wang, X.M. and Mi, J.C. and Li, B.Q. and Zhao, Y.X. and Meng, J.X.",
        TITLE = "CATNet: Cross-modal fusion for audio-visual speech recognition",
        JOURNAL = PRL,
        VOLUME = "178",
        YEAR = "2024",
        PAGES = "216-222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250476"}

@article{bb255665,
        AUTHOR = "Zhu, D.D. and Zhang, K.W. and Zhang, N. and Zhou, Q.Q. and Min, X.K. and Zhai, G.T. and Yang, X.K.",
        TITLE = "Unified Audio-Visual Saliency Model for Omnidirectional Videos With
Spatial Audio",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "764-775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250477"}

@article{bb255666,
        AUTHOR = "Qian, X.Y. and Xue, W. and Zhang, Q. and Tao, R.J. and Li, H.Z.",
        TITLE = "Deep Cross-Modal Retrieval Between Spatial Image and Acoustic Speech",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4480-4489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250478"}

@article{bb255667,
        AUTHOR = "Xie, J.W. and Liu, Z. and Li, G.Y. and Song, Y.J.",
        TITLE = "Audio-visual saliency prediction with multisensory perception and
integration",
        JOURNAL = IVC,
        VOLUME = "143",
        YEAR = "2024",
        PAGES = "104955",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250479"}

@inproceedings{bb255668,
        AUTHOR = "Rachavarapu, K.K. and Rajagopalan, A.N.",
        TITLE = "Boosting Positive Segments for Weakly-Supervised Audio-Visual Video
Parsing",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10158-10168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250480"}

@inproceedings{bb255669,
        AUTHOR = "Chen, J. and Wang, W.G. and Liu, S. and Li, H.S. and Yang, Y.",
        TITLE = "Omnidirectional Information Gathering for Knowledge Transfer-based
Audio-Visual Navigation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10959-10969",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250481"}

@inproceedings{bb255670,
        AUTHOR = "Cheng, X. and Jin, T. and Huang, R.J. and Li, L.J. and Lin, W. and Wang, Z. and Wang, Y. and Liu, H. and Yin, A. and Zhao, Z.",
        TITLE = "MixSpeech: Cross-Modality Self-Learning with Audio-Visual Stream
Mixup for Visual Speech Translation and Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15689-15699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250482"}

@inproceedings{bb255671,
        AUTHOR = "Georgescu, M.I. and Fonseca, E. and Ionescu, R.T. and Lucic, M. and Schmid, C. and Arnab, A.",
        TITLE = "Audiovisual Masked Autoencoders",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16098-16108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250483"}

@inproceedings{bb255672,
        AUTHOR = "Chen, M.F. and Su, K. and Shlizerman, E.",
        TITLE = "Be Everywhere - Hear Everything (BEE): Audio Scene Reconstruction by
Sparse Audio-Visual Samples",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "7819-7828",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250484"}

@inproceedings{bb255673,
        AUTHOR = "Xie, H.X. and Lee, M.X. and Chen, T.J. and Chen, H.J. and Liu, H.I. and Shuai, H.H. and Cheng, W.H.",
        TITLE = "Most Important Person-guided Dual-branch Cross-Patch Attention for
Group Affect Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20541-20551",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250485"}

@inproceedings{bb255674,
        AUTHOR = "Djilali, Y.A.D. and Narayan, S. and Boussaid, H. and Almazrouei, E. and Debbah, M.",
        TITLE = "Lip2Vec: Efficient and Robust Visual Speech Recognition via
Latent-to-Latent Visual to Audio Representation Mapping",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13744-13755",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250486"}

@inproceedings{bb255675,
        AUTHOR = "Chen, G.Y. and Zhang, D. and Liu, T. and Du, X.Y.",
        TITLE = "Local-Global Contrast for Learning Voice-Face Representations",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "51-55",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250487"}

@inproceedings{bb255676,
        AUTHOR = "Hong, J. and Kim, M. and Choi, J. and Ro, Y.M.",
        TITLE = "Watch or Listen: Robust Audio-Visual Speech Recognition with Visual
Corruption Modeling and Reliability Scoring",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18783-18794",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250488"}

@inproceedings{bb255677,
        AUTHOR = "Gao, J.Y. and Chen, M.Y. and Xu, C.S.",
        TITLE = "Collecting Cross-Modal Presence-Absence Evidence for
Weakly-Supervised Audio-Visual Event Perception",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18827-18836",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250489"}

@inproceedings{bb255678,
        AUTHOR = "Porgali, B. and Albiero, V. and Ryda, J. and Ferrer, C.C. and Hazirbas, C.",
        TITLE = "The Casual Conversations v2 Dataset: A diverse, large benchmark for
measuring fairness and robustness in audio/vision/speech models",
        BOOKTITLE = FaDE-TCV23,
        YEAR = "2023",
        PAGES = "10-17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250490"}

@inproceedings{bb255679,
        AUTHOR = "Xiong, J.W. and Wang, G. and Zhang, P. and Huang, W. and Zha, Y.F. and Zhai, G.T.",
        TITLE = "CASP-Net: Rethinking Video Saliency Prediction from an Audio-Visual
Consistency Perceptual Perspective",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6441-6450",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250491"}

@inproceedings{bb255680,
        AUTHOR = "Huang, C. and Tian, Y. and Kumar, A. and Xu, C.L.",
        TITLE = "Egocentric Audio-Visual Object Localization",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22910-22921",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250492"}

@inproceedings{bb255681,
        AUTHOR = "Liao, J. and Duan, H. and Feng, K. and Zhao, W.B. and Yang, Y.B. and Chen, L.Y.",
        TITLE = "A Light Weight Model for Active Speaker Detection",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22932-22941",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250493"}

@inproceedings{bb255682,
        AUTHOR = "Seo, P.H. and Nagrani, A. and Schmid, C.",
        TITLE = "AVFormer: Injecting Vision into Frozen Speech Models for Zero-Shot
AV-ASR",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22922-22931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250494"}

@inproceedings{bb255683,
        AUTHOR = "Feng, D. and Yang, S. and Shan, S.G. and Chen, X.L.",
        TITLE = "Audio-Driven Deformation Flow for Effective Lip Reading",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "274-280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250495"}

@inproceedings{bb255684,
        AUTHOR = "Varshney, M. and Yadav, R. and Namboodiri, V.P. and Hegde, R.M.",
        TITLE = "Learning Speaker-specific Lip-to-Speech Generation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "491-498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250496"}

@inproceedings{bb255685,
        AUTHOR = "Shi, C. and Yang, S.",
        TITLE = "Spatial and Visual Perspective-Taking via View Rotation and Relation
Reasoning for Embodied Reference Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:201-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250497"}

@inproceedings{bb255686,
        AUTHOR = "Hayes, T. and Zhang, S.Y. and Yin, X. and Pang, G. and Sheng, S. and Yang, H. and Ge, S.W. and Hu, Q.Y. and Parikh, D.",
        TITLE = "MUGEN: A Playground for Video-Audio-Text Multimodal Understanding and
GENeration",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "VIII:431-449",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250498"}

@inproceedings{bb255687,
        AUTHOR = "van Horn, G. and Qian, R. and Wilber, K. and Adam, H. and Aodha, O.M. and Belongie, S.",
        TITLE = "Exploring Fine-Grained Audiovisual Categorization with the SSW60
Dataset",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "VIII:271-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250499"}

@inproceedings{bb255688,
        AUTHOR = "Yu, S. and Wu, P. and Liang, P.P. and Salakhutdinov, R. and Morency, L.P.",
        TITLE = "PACS: A Dataset for Physical Audiovisual CommonSense Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:292-309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250500"}

@inproceedings{bb255689,
        AUTHOR = "Cheng, H.Y. and Liu, Z.Y. and Zhou, H. and Qian, C. and Wu, W. and Wang, L.M.",
        TITLE = "Joint-Modal Label Denoising for Weakly-Supervised Audio-Visual Video
Parsing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:431-448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250501"}

@inproceedings{bb255690,
        AUTHOR = "Zhang, Z.Q. and Zhang, J. and Zhang, J.S. and Wu, M.H. and Fang, X. and Dai, L.R.",
        TITLE = "Learning Contextually Fused Audio-Visual Representations for
Audio-Visual Speech Recognition",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1346-1350",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250502"}

@inproceedings{bb255691,
        AUTHOR = "Mo, S.T. and Morgado, P.",
        TITLE = "Localizing Visual Sounds the Easy Way",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:218-234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250503"}

@inproceedings{bb255692,
        AUTHOR = "Montesinos, J.F. and Kadandale, V.S. and Haro, G.",
        TITLE = "VoViT: Low Latency Graph-Based Audio-Visual Voice Separation
Transformer",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:310-326",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250504"}

@inproceedings{bb255693,
        AUTHOR = "Tzinis, E. and Wisdom, S. and Remez, T. and Hershey, J.R.",
        TITLE = "AudioScopeV2: Audio-Visual Attention Architectures for Calibrated
Open-Domain On-Screen Sound Separation",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:368-385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250505"}

@inproceedings{bb255694,
        AUTHOR = "Zhou, J.X. and Wang, J.Y. and Zhang, J.Y. and Sun, W.X. and Zhang, J. and Birchfield, S. and Guo, D. and Kong, L.P. and Wang, M. and Zhong, Y.",
        TITLE = "Audio-Visual Segmentation",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:386-403",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250506"}

@inproceedings{bb255695,
        AUTHOR = "Alcazar, J.L. and Cordes, M. and Zhao, C. and Ghanem, B.",
        TITLE = "End-to-End Active Speaker Detection",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:126-143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250507"}

@inproceedings{bb255696,
        AUTHOR = "Chen, C.G. and Gao, R.H. and Calamia, P. and Grauman, K.",
        TITLE = "Visual Acoustic Matching",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18836-18846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250508"}

@inproceedings{bb255697,
        AUTHOR = "Lee, S. and Kim, H.I. and Ro, Y.M.",
        TITLE = "Weakly Paired Associative Learning for Sound and Image
Representations via Bimodal Associative Memory",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10524-10533",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250509"}

@inproceedings{bb255698,
        AUTHOR = "Vasudevan, A.B. and Dai, D.X. and Van Gool, L.J.",
        TITLE = "Sound and Visual Representation Learning with Multiple Pretraining
Tasks",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14596-14606",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250510"}

@inproceedings{bb255699,
        AUTHOR = "Xia, Y. and Zhao, Z.",
        TITLE = "Cross-modal Background Suppression for Audio-Visual Event
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19957-19966",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT250511"}

Last update:Mar 25, 2024 at 16:07:51