@article{bb256300,
        AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.",
        TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behavior and
Fusion of Continuous Annotations",
        JOURNAL = PAMI,
        VOLUME = "36",
        YEAR = "2014",
        NUMBER = "7",
        MONTH = "July",
        PAGES = "1299-1311",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251110"}

@inproceedings{bb256301,
        AUTHOR = "Nicolaou, M.A. and Pavlovic, V. and Pantic, M.",
        TITLE = "Dynamic Probabilistic CCA for Analysis of Affective Behaviour",
        BOOKTITLE = ECCV12,
        YEAR = "2012",
        PAGES = "VII: 98-111",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251111"}

@article{bb256302,
        AUTHOR = "Wang, L.J. and Qian, Y. and Scott, M.R. and Chen, G. and Soong, F.K.",
        TITLE = "Computer-Assisted Audiovisual Language Learning",
        JOURNAL = Computer,
        VOLUME = "45",
        YEAR = "2012",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "38-47",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251112"}

@article{bb256303,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Chi, Z. and Feng, D.D.",
        TITLE = "Realistic Human Action Recognition with 
Multimodal Feature Selection and Fusion",
        JOURNAL = SMCS,
        VOLUME = "43",
        YEAR = "2013",
        NUMBER = "4",
        PAGES = "875-885",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251113"}

@article{bb256304,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Xia, Y. and Kang, W.X. and Feng, D.D.",
        TITLE = "Discriminative two-level feature selection for realistic human action
recognition",
        JOURNAL = JVCIR,
        VOLUME = "24",
        YEAR = "2013",
        NUMBER = "7",
        PAGES = "1064-1074",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251114"}

@inproceedings{bb256305,
        AUTHOR = "Wu, Q.X. and Wang, Z.Y. and Deng, F.Q. and Feng, D.D.",
        TITLE = "Realistic Human Action Recognition with Audio Context",
        BOOKTITLE = DICTA10,
        YEAR = "2010",
        PAGES = "288-293",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251115"}

@inproceedings{bb256306,
        AUTHOR = "Wu, Q.X. and Lu, S.Y. and Wang, Z.Y. and Deng, F.Q. and Kang, W.X. and Feng, D.D.",
        TITLE = "Structure Context of Local Features in Realistic Human Action
Recognition",
        BOOKTITLE = VECTaR11,
        YEAR = "2011",
        PAGES = "1496-1501",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251116"}

@article{bb256307,
        AUTHOR = "Mirzaei, M.R. and Ghorshi, S. and Mortazavi, M.",
        TITLE = "Audio-visual speech recognition techniques in augmented reality
environments",
        JOURNAL = VC,
        VOLUME = "30",
        YEAR = "2014",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "245-257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251117"}

@article{bb256308,
        AUTHOR = "Bredin, H. and Roy, A. and Le, V.B. and Barras, C.",
        TITLE = "Person instance graphs for mono-, cross- and multi-modal person
recognition in multimedia data: application to speaker identification
in TV broadcast",
        JOURNAL = MultInfoRetr,
        VOLUME = "3",
        YEAR = "2014",
        NUMBER = "3",
        MONTH = "September",
        PAGES = "161-175",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251118"}

@article{bb256309,
        AUTHOR = "Ozasa, Y. and Nakano, M. and Ariki, Y. and Iwahashi, N.",
        TITLE = "Discriminating Unknown Objects from Known Objects Using Image and
Speech Information",
        JOURNAL = IEICE,
        VOLUME = "E98-D",
        YEAR = "2015",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "704-711",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251119"}

@inproceedings{bb256310,
        AUTHOR = "Ozasa, Y. and Ariki, Y. and Nakano, M. and Iwahashi, N.",
        TITLE = "Disambiguation in Unknown Object Detection by Integrating Image and
Speech Recognition Confidences",
        BOOKTITLE = ACCV12,
        YEAR = "2012",
        PAGES = "I:85-96",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251120"}

@inproceedings{bb256311,
        AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.",
        TITLE = "Selection of Unknown Objects Specified by Speech Using Models
Constructed from Web Images",
        BOOKTITLE = ICPR14,
        YEAR = "2014",
        PAGES = "477-482",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251121"}

@inproceedings{bb256312,
        AUTHOR = "Nishimura, H. and Ozasa, Y. and Ariki, Y. and Nakano, M.",
        TITLE = "Object Recognition by Integrated Information Using Web Images",
        BOOKTITLE = ACPR13,
        YEAR = "2013",
        PAGES = "657-661",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251122"}

@inproceedings{bb256313,
        AUTHOR = "Ozasa, Y. and Enami, N. and Ariki, Y.",
        TITLE = "Color saliency for object identification",
        BOOKTITLE = FCV15,
        YEAR = "2015",
        PAGES = "1-5",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251123"}

@article{bb256314,
        AUTHOR = "Harte, N. and Gillen, E.",
        TITLE = "TCD-TIMIT: An Audio-Visual Corpus of Continuous Speech",
        JOURNAL = MultMed,
        VOLUME = "17",
        YEAR = "2015",
        NUMBER = "5",
        MONTH = "May",
        PAGES = "603-615",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251124"}

@article{bb256315,
        AUTHOR = "Katsaggelos, A.K. and Bahaadini, S. and Molina, R.",
        TITLE = "Audiovisual Fusion: Challenges and New Approaches",
        JOURNAL = PIEEE,
        VOLUME = "103",
        YEAR = "2015",
        NUMBER = "9",
        MONTH = "September",
        PAGES = "1635-1653",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251125"}

@article{bb256316,
        AUTHOR = "Mezai, L. and Hachouf, F.",
        TITLE = "Score-Level Fusion of Face and Voice Using Particle Swarm
Optimization and Belief Functions",
        JOURNAL = HMS,
        VOLUME = "45",
        YEAR = "2015",
        NUMBER = "6",
        MONTH = "December",
        PAGES = "761-772",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251126"}

@article{bb256317,
        AUTHOR = "Wu, P. and Liu, H. and Li, X. and Fan, T. and Zhang, X.",
        TITLE = "A Novel Lip Descriptor for Audio-Visual Keyword Spotting Based on
Adaptive Decision Fusion",
        JOURNAL = MultMed,
        VOLUME = "18",
        YEAR = "2016",
        NUMBER = "3",
        MONTH = "March",
        PAGES = "326-338",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251127"}

@article{bb256318,
        AUTHOR = "Dilpazir, H. and Muhammad, Z. and Minhas, Q. and Ahmed, F. and Malik, H. and Mahmood, H.",
        TITLE = "Multivariate mutual information for audio video fusion",
        JOURNAL = SIViP,
        VOLUME = "10",
        YEAR = "2016",
        NUMBER = "7",
        MONTH = "October",
        PAGES = "1265-1272",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251128"}

@article{bb256319,
        AUTHOR = "Beyan, C. and Capozzi, F. and Becchio, C. and Murino, V.",
        TITLE = "Prediction of the Leadership Style of an Emergent Leader Using Audio
and Visual Nonverbal Features",
        JOURNAL = MultMed,
        VOLUME = "20",
        YEAR = "2018",
        NUMBER = "2",
        MONTH = "February",
        PAGES = "441-456",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251129"}

@article{bb256320,
        AUTHOR = "Fernandez Lopez, A. and Sukno, F.M.",
        TITLE = "Survey on automatic lip-reading in the era of deep learning",
        JOURNAL = IVC,
        VOLUME = "78",
        YEAR = "2018",
        PAGES = "53-72",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251130"}

@article{bb256321,
        AUTHOR = "Stafylakis, T. and Khan, M.H. and Tzimiropoulos, G.",
        TITLE = "Pushing the boundaries of audiovisual word recognition using Residual
Networks and LSTMs",
        JOURNAL = CVIU,
        VOLUME = "176-177",
        YEAR = "2018",
        PAGES = "22-32",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251131"}

@inproceedings{bb256322,
        AUTHOR = "Stafylakis, T. and Tzimiropoulos, G.",
        TITLE = "Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild",
        BOOKTITLE = ECCV18,
        YEAR = "2018",
        PAGES = "II: 536-552",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251132"}

@article{bb256323,
        AUTHOR = "Liu, X. and Geng, J.J. and Ling, H.B. and Cheung, Y.M.",
        TITLE = "Attention guided deep audio-face fusion for efficient speaker naming",
        JOURNAL = PR,
        VOLUME = "88",
        YEAR = "2019",
        PAGES = "557-568",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251133"}

@article{bb256324,
        AUTHOR = "Tsiami, A. and Koutras, P. and Katsamanis, A. and Vatakis, A. and Maragos, P.",
        TITLE = "A behaviorally inspired fusion approach for computational audiovisual
saliency modeling",
        JOURNAL = SP:IC,
        VOLUME = "76",
        YEAR = "2019",
        PAGES = "186-200",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251134"}

@article{bb256325,
        AUTHOR = "Hsiao, S. and Sun, H. and Hsieh, M. and Tsai, M. and Tsao, Y. and Lee, C.",
        TITLE = "Toward Automating Oral Presentation Scoring During Principal
Certification Program Using Audio-Video Low-Level Behavior Profiles",
        JOURNAL = AffCom,
        VOLUME = "10",
        YEAR = "2019",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "552-567",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251135"}

@article{bb256326,
        AUTHOR = "Ma, Y. and Hong, H. and Li, H. and Zhao, H. and Li, Y.S. and Sun, L. and Gu, C. and Zhu, X.H.",
        TITLE = "Non-Contact Speech Recovery Technology Using a 24 GHz Portable
Auditory Radar and Webcam",
        JOURNAL = RS,
        VOLUME = "12",
        YEAR = "2020",
        NUMBER = "4",
        PAGES = "xx-yy",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251136"}

@inproceedings{bb256327,
        AUTHOR = "Xu, B. and Wang, J. and Lu, C. and Guo, Y.",
        TITLE = "Watch to Listen Clearly: Visual Speech Enhancement Driven
Multi-modality Speech Recognition",
        BOOKTITLE = WACV20,
        YEAR = "2020",
        PAGES = "1626-1635",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251137"}

@article{bb256328,
        AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.",
        TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank
and Kernelized Sparsity",
        JOURNAL = SPLetters,
        VOLUME = "27",
        YEAR = "2020",
        PAGES = "865-869",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251138"}

@article{bb256329,
        AUTHOR = "Tao, F. and Busso, C.",
        TITLE = "End-to-End Audiovisual Speech Recognition System With Multitask
Learning",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "1-11",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251139"}

@article{bb256330,
        AUTHOR = "Liu, L. and Feng, G. and Beautemps, D. and Zhang, X.P.",
        TITLE = "Re-Synchronization Using the Hand Preceding Model for Multi-Modal
Fusion in Automatic Continuous Cued Speech Recognition",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "292-305",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251140"}

@article{bb256331,
        AUTHOR = "Beyan, C. and Shahid, M. and Murino, V.",
        TITLE = "RealVAD: A Real-World Dataset and A Method for Voice Activity
Detection by Body Motion Analysis",
        JOURNAL = MultMed,
        VOLUME = "23",
        YEAR = "2021",
        PAGES = "2071-2085",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251141"}

@article{bb256332,
        AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.",
        TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling
Factor Estimation",
        JOURNAL = SPLetters,
        VOLUME = "28",
        YEAR = "2021",
        PAGES = "1405-1409",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251142"}

@article{bb256333,
        AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.",
        TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching",
        JOURNAL = MultMed,
        VOLUME = "24",
        YEAR = "2022",
        PAGES = "338-351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251143"}

@article{bb256334,
        AUTHOR = "Xu, J.H. and Zhang, B. and Wang, Z.Y. and Wang, Y. and Chen, F. and Gao, J.B. and Feng, D.D.",
        TITLE = "Affective Audio Annotation of Public Speeches with Convolutional
Clustering Neural Network",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "1",
        MONTH = "January",
        PAGES = "238-249",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251144"}

@article{bb256335,
        AUTHOR = "Afouras, T. and Chung, J.S. and Senior, A. and Vinyals, O. and Zisserman, A.",
        TITLE = "Deep Audio-Visual Speech Recognition",
        JOURNAL = PAMI,
        VOLUME = "44",
        YEAR = "2022",
        NUMBER = "12",
        MONTH = "December",
        PAGES = "8717-8727",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251145"}

@inproceedings{bb256336,
        AUTHOR = "Rahimi, A. and Afouras, T. and Zisserman, A.",
        TITLE = "Reading to Listen at the Cocktail Party:
Multi-Modal Speech Separation",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10483-10492",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251146"}

@article{bb256337,
        AUTHOR = "Narain, J. and Johnson, K.T. and Quatieri, T.F. and Picard, R.W. and Maes, P.",
        TITLE = "Modeling Real-World Affective and Communicative Nonverbal
Vocalizations From Minimally Speaking Individuals",
        JOURNAL = AffCom,
        VOLUME = "13",
        YEAR = "2022",
        NUMBER = "4",
        MONTH = "October",
        PAGES = "2238-2253",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251147"}

@article{bb256338,
        AUTHOR = "Gong, Y. and Liu, A.H. and Rouditchenko, A. and Glass, J.",
        TITLE = "UAVM: Towards Unifying Audio and Visual Models",
        JOURNAL = SPLetters,
        VOLUME = "29",
        YEAR = "2022",
        PAGES = "2437-2441",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251148"}

@inproceedings{bb256339,
        AUTHOR = "Oya, T. and Iwase, S. and Morishima, S.",
        TITLE = "The Sound of Bounding-Boxes",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "9-15",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251149"}

@article{bb256340,
        AUTHOR = "Zhou, J.X. and Guo, D. and Wang, M.",
        TITLE = "Contrastive Positive Sample Propagation Along the Audio-Visual Event
Line",
        JOURNAL = PAMI,
        VOLUME = "45",
        YEAR = "2023",
        NUMBER = "6",
        MONTH = "June",
        PAGES = "7239-7257",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251150"}

@inproceedings{bb256341,
        AUTHOR = "Zhou, J.X. and Zheng, L. and Zhong, Y. and Hao, S.J. and Wang, M.",
        TITLE = "Positive Sample Propagation along the Audio-Visual Event Line",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "8432-8440",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251151"}

@article{bb256342,
        AUTHOR = "Sen, T.K. and Naven, G. and Gerstner, L. and Bagley, D. and Baten, R.A. and Rahman, W. and Hasan, M.K. and Haut, K. and Mamun, A.A. and Samrose, S. and Solbu, A. and Barnes, R.E. and Frank, M.G. and Hoque, E.",
        TITLE = "DBATES: Dataset for Discerning Benefits of Audio, Textual, and Facial
Expression Features in Competitive Debate Speeches",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "1028-1043",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251152"}

@article{bb256343,
        AUTHOR = "Sharma, G. and Dhall, A. and Cai, J.F.",
        TITLE = "Audio-Visual Automatic Group Affect Analysis",
        JOURNAL = AffCom,
        VOLUME = "14",
        YEAR = "2023",
        NUMBER = "2",
        MONTH = "April",
        PAGES = "1056-1069",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251153"}

@article{bb256344,
        AUTHOR = "Cheng, W.L. and Tang, W. and Huang, Y. and Luo, Y. and Wang, L.",
        TITLE = "A Reconstruction-Based Visual-Acoustic-Semantic Embedding Method for
Speech-Image Retrieval",
        JOURNAL = MultMed,
        VOLUME = "25",
        YEAR = "2023",
        PAGES = "4067-4080",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251154"}

@article{bb256345,
        AUTHOR = "Kefalas, T. and Fotiadou, E. and Georgopoulos, M. and Panagakis, Y. and Ma, P.C. and Petridis, S. and Stafylakis, T. and Pantic, M.",
        TITLE = "KAN-AV dataset for audio-visual face and speech analysis in the wild",
        JOURNAL = IVC,
        VOLUME = "140",
        YEAR = "2023",
        PAGES = "104839",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251155"}

@article{bb256346,
        AUTHOR = "Wang, X.M. and Mi, J.C. and Li, B.Q. and Zhao, Y.X. and Meng, J.X.",
        TITLE = "CATNet: Cross-modal fusion for audio-visual speech recognition",
        JOURNAL = PRL,
        VOLUME = "178",
        YEAR = "2024",
        PAGES = "216-222",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251156"}

@article{bb256347,
        AUTHOR = "Zhu, D.D. and Zhang, K.W. and Zhang, N. and Zhou, Q.Q. and Min, X.K. and Zhai, G.T. and Yang, X.K.",
        TITLE = "Unified Audio-Visual Saliency Model for Omnidirectional Videos With
Spatial Audio",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "764-775",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251157"}

@article{bb256348,
        AUTHOR = "Qian, X.Y. and Xue, W. and Zhang, Q. and Tao, R.J. and Li, H.Z.",
        TITLE = "Deep Cross-Modal Retrieval Between Spatial Image and Acoustic Speech",
        JOURNAL = MultMed,
        VOLUME = "26",
        YEAR = "2024",
        PAGES = "4480-4489",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251158"}

@article{bb256349,
        AUTHOR = "Xie, J.W. and Liu, Z. and Li, G.Y. and Song, Y.J.",
        TITLE = "Audio-visual saliency prediction with multisensory perception and
integration",
        JOURNAL = IVC,
        VOLUME = "143",
        YEAR = "2024",
        PAGES = "104955",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251159"}

@inproceedings{bb256350,
        AUTHOR = "Rachavarapu, K.K. and Rajagopalan, A.N.",
        TITLE = "Boosting Positive Segments for Weakly-Supervised Audio-Visual Video
Parsing",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10158-10168",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251160"}

@inproceedings{bb256351,
        AUTHOR = "Chen, J. and Wang, W.G. and Liu, S. and Li, H.S. and Yang, Y.",
        TITLE = "Omnidirectional Information Gathering for Knowledge Transfer-based
Audio-Visual Navigation",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "10959-10969",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251161"}

@inproceedings{bb256352,
        AUTHOR = "Cheng, X. and Jin, T. and Huang, R.J. and Li, L.J. and Lin, W. and Wang, Z. and Wang, Y. and Liu, H. and Yin, A. and Zhao, Z.",
        TITLE = "MixSpeech: Cross-Modality Self-Learning with Audio-Visual Stream
Mixup for Visual Speech Translation and Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "15689-15699",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251162"}

@inproceedings{bb256353,
        AUTHOR = "Georgescu, M.I. and Fonseca, E. and Ionescu, R.T. and Lucic, M. and Schmid, C. and Arnab, A.",
        TITLE = "Audiovisual Masked Autoencoders",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "16098-16108",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251163"}

@inproceedings{bb256354,
        AUTHOR = "Chen, M.F. and Su, K. and Shlizerman, E.",
        TITLE = "Be Everywhere - Hear Everything (BEE): Audio Scene Reconstruction by
Sparse Audio-Visual Samples",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "7819-7828",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251164"}

@inproceedings{bb256355,
        AUTHOR = "Xie, H.X. and Lee, M.X. and Chen, T.J. and Chen, H.J. and Liu, H.I. and Shuai, H.H. and Cheng, W.H.",
        TITLE = "Most Important Person-guided Dual-branch Cross-Patch Attention for
Group Affect Recognition",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "20541-20551",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251165"}

@inproceedings{bb256356,
        AUTHOR = "Djilali, Y.A.D. and Narayan, S. and Boussaid, H. and Almazrouei, E. and Debbah, M.",
        TITLE = "Lip2Vec: Efficient and Robust Visual Speech Recognition via
Latent-to-Latent Visual to Audio Representation Mapping",
        BOOKTITLE = ICCV23,
        YEAR = "2023",
        PAGES = "13744-13755",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251166"}

@inproceedings{bb256357,
        AUTHOR = "Chen, G.Y. and Zhang, D. and Liu, T. and Du, X.Y.",
        TITLE = "Local-Global Contrast for Learning Voice-Face Representations",
        BOOKTITLE = ICIP23,
        YEAR = "2023",
        PAGES = "51-55",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251167"}

@inproceedings{bb256358,
        AUTHOR = "Hong, J. and Kim, M. and Choi, J. and Ro, Y.M.",
        TITLE = "Watch or Listen: Robust Audio-Visual Speech Recognition with Visual
Corruption Modeling and Reliability Scoring",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18783-18794",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251168"}

@inproceedings{bb256359,
        AUTHOR = "Gao, J.Y. and Chen, M.Y. and Xu, C.S.",
        TITLE = "Collecting Cross-Modal Presence-Absence Evidence for
Weakly-Supervised Audio-Visual Event Perception",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "18827-18836",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251169"}

@inproceedings{bb256360,
        AUTHOR = "Porgali, B. and Albiero, V. and Ryda, J. and Ferrer, C.C. and Hazirbas, C.",
        TITLE = "The Casual Conversations v2 Dataset: A diverse, large benchmark for
measuring fairness and robustness in audio/vision/speech models",
        BOOKTITLE = FaDE-TCV23,
        YEAR = "2023",
        PAGES = "10-17",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251170"}

@inproceedings{bb256361,
        AUTHOR = "Xiong, J.W. and Wang, G. and Zhang, P. and Huang, W. and Zha, Y.F. and Zhai, G.T.",
        TITLE = "CASP-Net: Rethinking Video Saliency Prediction from an Audio-Visual
Consistency Perceptual Perspective",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "6441-6450",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251171"}

@inproceedings{bb256362,
        AUTHOR = "Huang, C. and Tian, Y. and Kumar, A. and Xu, C.L.",
        TITLE = "Egocentric Audio-Visual Object Localization",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22910-22921",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251172"}

@inproceedings{bb256363,
        AUTHOR = "Liao, J. and Duan, H. and Feng, K. and Zhao, W.B. and Yang, Y.B. and Chen, L.Y.",
        TITLE = "A Light Weight Model for Active Speaker Detection",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22932-22941",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251173"}

@inproceedings{bb256364,
        AUTHOR = "Seo, P.H. and Nagrani, A. and Schmid, C.",
        TITLE = "AVFormer: Injecting Vision into Frozen Speech Models for Zero-Shot
AV-ASR",
        BOOKTITLE = CVPR23,
        YEAR = "2023",
        PAGES = "22922-22931",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251174"}

@inproceedings{bb256365,
        AUTHOR = "Feng, D. and Yang, S. and Shan, S.G. and Chen, X.L.",
        TITLE = "Audio-Driven Deformation Flow for Effective Lip Reading",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "274-280",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251175"}

@inproceedings{bb256366,
        AUTHOR = "Varshney, M. and Yadav, R. and Namboodiri, V.P. and Hegde, R.M.",
        TITLE = "Learning Speaker-specific Lip-to-Speech Generation",
        BOOKTITLE = "ICPR22",
        YEAR = "2022",
        PAGES = "491-498",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251176"}

@inproceedings{bb256367,
        AUTHOR = "Shi, C. and Yang, S.",
        TITLE = "Spatial and Visual Perspective-Taking via View Rotation and Relation
Reasoning for Embodied Reference Understanding",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVI:201-218",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251177"}

@inproceedings{bb256368,
        AUTHOR = "Hayes, T. and Zhang, S.Y. and Yin, X. and Pang, G. and Sheng, S. and Yang, H. and Ge, S.W. and Hu, Q.Y. and Parikh, D.",
        TITLE = "MUGEN: A Playground for Video-Audio-Text Multimodal Understanding and
GENeration",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "VIII:431-449",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251178"}

@inproceedings{bb256369,
        AUTHOR = "van Horn, G. and Qian, R. and Wilber, K. and Adam, H. and Aodha, O.M. and Belongie, S.",
        TITLE = "Exploring Fine-Grained Audiovisual Categorization with the SSW60
Dataset",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "VIII:271-289",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251179"}

@inproceedings{bb256370,
        AUTHOR = "Yu, S. and Wu, P. and Liang, P.P. and Salakhutdinov, R. and Morency, L.P.",
        TITLE = "PACS: A Dataset for Physical Audiovisual CommonSense Reasoning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:292-309",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251180"}

@inproceedings{bb256371,
        AUTHOR = "Cheng, H.Y. and Liu, Z.Y. and Zhou, H. and Qian, C. and Wu, W. and Wang, L.M.",
        TITLE = "Joint-Modal Label Denoising for Weakly-Supervised Audio-Visual Video
Parsing",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXIV:431-448",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251181"}

@inproceedings{bb256372,
        AUTHOR = "Zhang, Z.Q. and Zhang, J. and Zhang, J.S. and Wu, M.H. and Fang, X. and Dai, L.R.",
        TITLE = "Learning Contextually Fused Audio-Visual Representations for
Audio-Visual Speech Recognition",
        BOOKTITLE = ICIP22,
        YEAR = "2022",
        PAGES = "1346-1350",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251182"}

@inproceedings{bb256373,
        AUTHOR = "Mo, S.T. and Morgado, P.",
        TITLE = "Localizing Visual Sounds the Easy Way",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:218-234",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251183"}

@inproceedings{bb256374,
        AUTHOR = "Montesinos, J.F. and Kadandale, V.S. and Haro, G.",
        TITLE = "VoViT: Low Latency Graph-Based Audio-Visual Voice Separation
Transformer",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:310-326",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251184"}

@inproceedings{bb256375,
        AUTHOR = "Tzinis, E. and Wisdom, S. and Remez, T. and Hershey, J.R.",
        TITLE = "AudioScopeV2: Audio-Visual Attention Architectures for Calibrated
Open-Domain On-Screen Sound Separation",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:368-385",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251185"}

@inproceedings{bb256376,
        AUTHOR = "Zhou, J.X. and Wang, J.Y. and Zhang, J.Y. and Sun, W.X. and Zhang, J. and Birchfield, S. and Guo, D. and Kong, L.P. and Wang, M. and Zhong, Y.",
        TITLE = "Audio-Visual Segmentation",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:386-403",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251186"}

@inproceedings{bb256377,
        AUTHOR = "Alcazar, J.L. and Cordes, M. and Zhao, C. and Ghanem, B.",
        TITLE = "End-to-End Active Speaker Detection",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XXXVII:126-143",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251187"}

@inproceedings{bb256378,
        AUTHOR = "Chen, C.G. and Gao, R.H. and Calamia, P. and Grauman, K.",
        TITLE = "Visual Acoustic Matching",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "18836-18846",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251188"}

@inproceedings{bb256379,
        AUTHOR = "Lee, S. and Kim, H.I. and Ro, Y.M.",
        TITLE = "Weakly Paired Associative Learning for Sound and Image
Representations via Bimodal Associative Memory",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10524-10533",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251189"}

@inproceedings{bb256380,
        AUTHOR = "Vasudevan, A.B. and Dai, D.X. and Van Gool, L.J.",
        TITLE = "Sound and Visual Representation Learning with Multiple Pretraining
Tasks",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "14596-14606",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251190"}

@inproceedings{bb256381,
        AUTHOR = "Xia, Y. and Zhao, Z.",
        TITLE = "Cross-modal Background Suppression for Audio-Visual Event
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "19957-19966",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251191"}

@inproceedings{bb256382,
        AUTHOR = "Jiang, H. and Murdock, C. and Ithapu, V.K.",
        TITLE = "Egocentric Deep Multi-Channel Audio-Visual Active Speaker
Localization",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10534-10542",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251192"}

@inproceedings{bb256383,
        AUTHOR = "Ng, E. and Joo, H. and Hu, L.W. and Li, H. and Darrell, T.J. and Kanazawa, A. and Ginosar, S.",
        TITLE = "Learning to Listen: Modeling Non-Deterministic Dyadic Facial Motion",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "20363-20373",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251193"}

@inproceedings{bb256384,
        AUTHOR = "Mercea, O.B. and Hummel, T. and Koepke, A.S. and Akata, Z.",
        TITLE = "Temporal and Cross-modal Attention for Audio-Visual Zero-Shot Learning",
        BOOKTITLE = ECCV22,
        YEAR = "2022",
        PAGES = "XX:488-505",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251194"}

@inproceedings{bb256385,
        AUTHOR = "Mercea, O.B. and Riesch, L. and Koepke, A.S. and Akata, Z.",
        TITLE = "Audiovisual Generalised Zero-shot Learning with Cross-modal Attention
and Language",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "10543-10553",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251195"}

@inproceedings{bb256386,
        AUTHOR = "Karas, V. and Tellamekala, M.K. and Mallol Ragolta, A. and Valstar, M. and Schuller, B.W.",
        TITLE = "Time-Continuous Audiovisual Fusion with Recurrence vs Attention for
In-The-Wild Affect Recognition",
        BOOKTITLE = ABAW22,
        YEAR = "2022",
        PAGES = "2381-2390",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251196"}

@inproceedings{bb256387,
        AUTHOR = "Yang, K. and Markovic, D. and Krenn, S. and Agrawal, V. and Richard, A.",
        TITLE = "Audio-Visual Speech Codecs: Rethinking Audio-Visual Speech
Enhancement by Re-Synthesis",
        BOOKTITLE = CVPR22,
        YEAR = "2022",
        PAGES = "8217-8227",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251197"}

@inproceedings{bb256388,
        AUTHOR = "Kim, M. and Hong, J. and Park, S.J. and Ro, Y.M.",
        TITLE = "Multi-modality Associative Bridging through Memory:
Speech Sound Recollected from Face Video",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "296-306",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251198"}

@inproceedings{bb256389,
        AUTHOR = "Li, J. and Kang, D. and Pei, W.J. and Zhe, X.F. and Zhang, Y. and He, Z.Y. and Bao, L.C.",
        TITLE = "Audio2Gestures: Generating Diverse Gestures from Speech Audio with
Conditional Variational Autoencoders",
        BOOKTITLE = ICCV21,
        YEAR = "2021",
        PAGES = "11273-11282",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251199"}

@inproceedings{bb256390,
        AUTHOR = "Ye, M. and You, Q.Z. and Ma, F.L.",
        TITLE = "QUALIFIER: Question-Guided Self-Attentive Multimodal Fusion Network
for Audio Visual Scene-Aware Dialog",
        BOOKTITLE = WACV22,
        YEAR = "2022",
        PAGES = "2503-2511",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251200"}

@inproceedings{bb256391,
        AUTHOR = "Yao, S. and Min, X.K. and Zhai, G.T.",
        TITLE = "Deep Audio-Visual Fusion Neural Network for Saliency Estimation",
        BOOKTITLE = ICIP21,
        YEAR = "2021",
        PAGES = "1604-1608",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251201"}

@inproceedings{bb256392,
        AUTHOR = "Krishnamurthy, S.",
        TITLE = "Learning Self-supervised Audio-Visual Representations for Sound
Recommendations",
        BOOKTITLE = ISVC21,
        YEAR = "2021",
        PAGES = "II:124-138",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251202"}

@inproceedings{bb256393,
        AUTHOR = "Shi, W.J. and Pattichis, M.S. and Celedon Pattichis, S. and LopezLeiva, C.",
        TITLE = "Talking Detection in Collaborative Learning Environments",
        BOOKTITLE = CAIP21,
        YEAR = "2021",
        PAGES = "II:242-251",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251203"}

@inproceedings{bb256394,
        AUTHOR = "Wang, G. and Chen, C.Z. and Fan, D.P. and Hao, A. and Qin, H.",
        TITLE = "From Semantic Categories to Fixations: A Novel Weakly-supervised
Visual-auditory Saliency Detection Approach",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "15114-15123",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251204"}

@inproceedings{bb256395,
        AUTHOR = "Wen, P.S. and Xu, Q.Q. and Jiang, Y.B. and Yang, Z.Y. and He, Y. and Huang, Q.M.",
        TITLE = "Seeking the Shape of Sound:
An Adaptive Framework for Learning Voice-Face Association",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "16342-16351",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251205"}

@inproceedings{bb256396,
        AUTHOR = "Monfort, M. and Jin, S. and Liu, A. and Harwath, D. and Feris, R.S. and Glass, J. and Oliva, A.",
        TITLE = "Spoken Moments: Learning Joint Audio-Visual Representations from
Video Descriptions",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "14866-14876",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251206"}

@inproceedings{bb256397,
        AUTHOR = "Tian, Y.P. and Xu, C.L.",
        TITLE = "Can audio-visual integration strengthen robustness under multimodal
attacks?",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "5597-5607",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251207"}

@inproceedings{bb256398,
        AUTHOR = "Morgado, P. and Vasconcelos, N.M. and Misra, I.",
        TITLE = "Audio-Visual Instance Discrimination with Cross-Modal Agreement",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12470-12481",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251208"}

@inproceedings{bb256399,
        AUTHOR = "Morgado, P. and Misra, I. and Vasconcelos, N.M.",
        TITLE = "Robust Audio-Visual Instance Discrimination",
        BOOKTITLE = CVPR21,
        YEAR = "2021",
        PAGES = "12929-12940",
        BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT251209"}

Last update:Apr 18, 2024 at 11:38:49