@inproceedings{bb287800,
AUTHOR = "Zhang, D. and Ghobakhlou, A. and Kasabov, N.",
TITLE = "An adaptive model of person identification combining speech and image
information",
BOOKTITLE = ICARCV04,
YEAR = "2004",
PAGES = "I: 413-418",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282435"}
@inproceedings{bb287801,
AUTHOR = "Kratt, J. and Metze, F. and Stiefelhagen, R. and Waibel, A.",
TITLE = "Large Vocabulary Audio-Visual Speech Recognition Using the Janus Speech
Recognition Toolkit",
BOOKTITLE = DAGM04,
YEAR = "2004",
PAGES = "488-495",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282436"}
@inproceedings{bb287802,
AUTHOR = "Hanafiah, Z.M. and Yamazaki, C. and Nakamura, A. and Kuno, Y.",
TITLE = "Understanding inexplicit utterances using vision for helper robots",
BOOKTITLE = ICPR04,
YEAR = "2004",
PAGES = "IV: 925-928",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282437"}
@inproceedings{bb287803,
AUTHOR = "Hermann, T. and Henning, T. and Ritter, H.",
TITLE = "Gesture Desk an Integrated Multi-modal Gestural Workplace
for Sonification",
BOOKTITLE = GW03,
YEAR = "2003",
PAGES = "369-379",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282438"}
@inproceedings{bb287804,
AUTHOR = "Merola, G.",
TITLE = "The Effects of the Gesture Viewpoint on the Students' Memory of Words
and Stories",
BOOKTITLE = GW07,
YEAR = "2007",
PAGES = "272-281",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282439"}
@inproceedings{bb287805,
AUTHOR = "Merola, G. and Poggi, I.",
TITLE = "Multimodality and Gestures in the Teacher's Communication",
BOOKTITLE = GW03,
YEAR = "2003",
PAGES = "101-111",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282440"}
@inproceedings{bb287806,
AUTHOR = "Kranstedt, A. and Kuhnlein, P. and Wachsmuth, I.",
TITLE = "Deixis in Multimodal Human Computer Interaction:
An Interdisciplinary Approach",
BOOKTITLE = GW03,
YEAR = "2003",
PAGES = "112-123",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282441"}
@inproceedings{bb287807,
AUTHOR = "Saeed, K. and Kozlowski, M.",
TITLE = "An Image-Based System for Spoken-Letter Recognition",
BOOKTITLE = CAIP03,
YEAR = "2003",
PAGES = "494-502",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282442"}
@inproceedings{bb287808,
AUTHOR = "Ho, P. and Armington, J.",
TITLE = "A Dual-Factor Authentication System Featuring Speaker Verification and
Token Technology",
BOOKTITLE = AVBPA03,
YEAR = "2003",
PAGES = "128-136",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282443"}
@inproceedings{bb287809,
AUTHOR = "Fox, N.A. and Reilly, R.B.",
TITLE = "Audio-Visual Speaker Identification Based on the Use of Dynamic Audio
and Visual Features",
BOOKTITLE = AVBPA03,
YEAR = "2003",
PAGES = "743-751",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282444"}
@inproceedings{bb287810,
AUTHOR = "Czyz, J. and Bengio, S. and Marcel, C. and Vandendorpe, L.",
TITLE = "Scalability Analysis of Audio-Visual Person Identity Verification",
BOOKTITLE = AVBPA03,
YEAR = "2003",
PAGES = "752-760",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282445"}
@inproceedings{bb287811,
AUTHOR = "Bengio, S.",
TITLE = "Multimodal Authentication Using Asynchronous HMMs",
BOOKTITLE = AVBPA03,
YEAR = "2003",
PAGES = "770-777",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282446"}
@inproceedings{bb287812,
AUTHOR = "Lucey, S. and Chen, T.H.",
TITLE = "Improved Audio-Visual Speaker Recognition via the Use of a Hybrid
Combination Strategy",
BOOKTITLE = AVBPA03,
YEAR = "2003",
PAGES = "929-936",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282447"}
@inproceedings{bb287813,
AUTHOR = "Krahnstoever, N. and Schapira, E. and Kettebeko, S. and Sharma, R.",
TITLE = "Multimodal human-computer interaction for crisis management systems",
BOOKTITLE = WACV02,
YEAR = "2002",
PAGES = "203-207",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282448"}
@inproceedings{bb287814,
AUTHOR = "Kettebekov, S. and Yeasin, M. and Sharma, R.",
TITLE = "Improving continuous gesture recognition with spoken prosody",
BOOKTITLE = CVPR03,
YEAR = "2003",
PAGES = "I: 565-570",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282449"}
@inproceedings{bb287815,
AUTHOR = "Poh, N. and Korczak, J.",
TITLE = "Hybrid Biometric Person Authentication Using Face and Voice Features",
BOOKTITLE = AVBPA01,
YEAR = "2001",
PAGES = "348",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282450"}
@inproceedings{bb287816,
AUTHOR = "Nakamura, S.",
TITLE = "Fusion of Audio-Visual Information for Integrated Speech Processing",
BOOKTITLE = AVBPA01,
YEAR = "2001",
PAGES = "127",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282451"}
@inproceedings{bb287817,
AUTHOR = "Sullivan, K.P.H. and Pelecanos, J.",
TITLE = "Revisiting Carl Bildt's Impostor: Would a Speaker Verification System
Foil Him?",
BOOKTITLE = AVBPA01,
YEAR = "2001",
PAGES = "144",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282452"}
@inproceedings{bb287818,
AUTHOR = "Geiger, G. and Ezzat, T. and Poggio, T.",
TITLE = "Perceptual Evaluation of Video-Realistic Speech",
BOOKTITLE = "MIT AIM",
YEAR = "2003",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282453"}
@inproceedings{bb287819,
AUTHOR = "Zhang, X.Z. and Merserratt, R.M. and Clements, M.",
TITLE = "Bimodal fusion in audio-visual speech recognition",
BOOKTITLE = ICIP02,
YEAR = "2002",
PAGES = "I: 964-967",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282454"}
@inproceedings{bb287820,
AUTHOR = "Graf, H.P. and Cosatto, E. and Strom, V. and Huang, F.J.",
TITLE = "Visual prosody: facial movements accompanying speech",
BOOKTITLE = AFGR02,
YEAR = "2002",
PAGES = "381-386",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282455"}
@inproceedings{bb287821,
AUTHOR = "Qi, Y.",
TITLE = "Learning Algorithms for Audio and Video Processing:
Independent Component Analysis and Support Vector Machine Based Approaches",
BOOKTITLE = UMD,
YEAR = "2000",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282456"}
@inproceedings{bb287822,
AUTHOR = "Nankaku, Y. and Tokuda, K. and Kitamura, T.",
TITLE = "Normalized Training for HMM-based Visual Speech Recognition",
BOOKTITLE = ICIP00,
YEAR = "2000",
PAGES = "Vol III: 234-237",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282457"}
@inproceedings{bb287823,
AUTHOR = "Zhang, Y. and Levinson, S. and Huang, T.S.",
TITLE = "Speaker Independent Audio-Visual Speech Recognition",
BOOKTITLE = ICME00,
YEAR = "2000",
PAGES = "TP8",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282458"}
@inproceedings{bb287824,
AUTHOR = "Pan, H. and Huang, T.S.",
TITLE = "A New Approach to Integrate Audio and Visual Features of Speech",
BOOKTITLE = ICME00,
YEAR = "2000",
PAGES = "TP8",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282459"}
@inproceedings{bb287825,
AUTHOR = "Potamianos, G. and Verma, A. and Neti, C. and Iyengar, G. and Basu, S.",
TITLE = "A Cascade Image Transform for Speaker Independent Automatic Speech
Reading",
BOOKTITLE = ICME00,
YEAR = "2000",
PAGES = "TP8",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282460"}
@inproceedings{bb287826,
AUTHOR = "Pan, H. and Liang, Z.P. and Huang, T.S.",
TITLE = "Fusing Audio and Visual Features of Speech",
BOOKTITLE = ICIP00,
YEAR = "2000",
PAGES = "Vol III: 214-217",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282461"}
@inproceedings{bb287827,
AUTHOR = "Faruquie, T.A. and Majumdar, A. and Rajput, N. and Subramaniam, L.V.",
TITLE = "Large Vocabulary Audio-visual Speech Recognition Using Active Shape
Models",
BOOKTITLE = ICPR00,
YEAR = "2000",
PAGES = "Vol III: 106-109",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282462"}
@inproceedings{bb287828,
AUTHOR = "Yu, K. and Jiang, X. and Bunke, H.",
TITLE = "Combining Acoustic and Visual Classifiers for the Recognition of Spoken
Sentences",
BOOKTITLE = ICPR00,
YEAR = "2000",
PAGES = "Vol II: 491-494",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282463"}
@inproceedings{bb287829,
AUTHOR = "Nam, J. and Alghoniemy, M. and Tewfik, A.H.",
TITLE = "Audio-visual content-based violent scene characterization",
BOOKTITLE = ICIP98,
YEAR = "1998",
PAGES = "I: 353-357",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282464"}
@inproceedings{bb287830,
AUTHOR = "Luettin, J. and Dupont, S.",
TITLE = "Continuous Audio-Visual Speech Recognition",
BOOKTITLE = ECCV98,
YEAR = "1998",
PAGES = "II: 657",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282465"}
@inproceedings{bb287831,
AUTHOR = "Yang, J. and Xiao, J. and Ritter, M.",
TITLE = "Automatic Selection of Visemes for Image-based Visual Speech Synthesis",
BOOKTITLE = ICME00,
YEAR = "2000",
PAGES = "TP8",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282466"}
@inproceedings{bb287832,
AUTHOR = "Sharma, R. and Cai, J.Y. and Chakravarthy, S. and Poddar, I. and Sethi, Y.",
TITLE = "Exploiting Speech/Gesture Co-occurrence for Improving Continuous
Gesture Recognition in Weather Narration",
BOOKTITLE = AFGR00,
YEAR = "2000",
PAGES = "422-427",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282467"}
@inproceedings{bb287833,
AUTHOR = "Yamamoto, E. and Nakamura, S. and Shikano, K.",
TITLE = "Lip Movement Synthesis from Speech Based on Hidden Markov Models",
BOOKTITLE = AFGR98,
YEAR = "1998",
PAGES = "154-159",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282468"}
@inproceedings{bb287834,
AUTHOR = "Roy, D. and Pentland, A.P.",
TITLE = "Automatic spoken affect classification and analysis",
BOOKTITLE = AFGR96,
YEAR = "1996",
PAGES = "363-367",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282469"}
@inproceedings{bb287835,
AUTHOR = "Petajan, E.D.",
TITLE = "An Architecture for Automatic Lipreading to Enhance Speech Recognition",
BOOKTITLE = CVPR85,
YEAR = "1985",
PAGES = "40-47",
BIBSOURCE = "http://www.visionbib.com/bibliography/people916.html#TT282470"}
@article{bb287836,
AUTHOR = "Zotkin, D.N. and Duraiswami, R. and Davis, L.S.",
TITLE = "Joint Audio-Visual Tracking Using Particle Filters",
JOURNAL = JASP,
VOLUME = "2002",
YEAR = "2002",
NUMBER = "11",
MONTH = "November",
PAGES = "1154",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282471"}
@article{bb287837,
AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
TITLE = "Boosted learning in dynamic Bayesian networks for multimodal speaker
detection",
JOURNAL = PIEEE,
VOLUME = "91",
YEAR = "2003",
NUMBER = "9",
MONTH = "September",
PAGES = "1355-1369",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282472"}
@inproceedings{bb287838,
AUTHOR = "Garg, A. and Pavlovic, V. and Rehg, J.M.",
TITLE = "Audio-visual speaker detection using dynamic Bayesian networks",
BOOKTITLE = AFGR00,
YEAR = "2000",
PAGES = "384-390",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282473"}
@inproceedings{bb287839,
AUTHOR = "Pavlovic, V. and Garg, A. and Rehg, J.M. and Huang, T.S.",
TITLE = "Multimodal Speaker Detection using Error Feedback Dynamic Bayesian
Networks",
BOOKTITLE = CVPR00,
YEAR = "2000",
PAGES = "II: 34-41",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282474"}
@inproceedings{bb287840,
AUTHOR = "Pavlovic, V. and Berry, G. and Huang, T.S.",
TITLE = "Integration of Audio/Visual Information for Use in
Human-Computer Intelligent Interaction",
BOOKTITLE = ICIP97,
YEAR = "1997",
PAGES = "I: 121-124",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282475"}
@inproceedings{bb287841,
AUTHOR = "Choudhury, T. and Rehg, J.M. and Pavlovic, V. and Pentland, A.P.",
TITLE = "Boosting and structure learning in dynamic Bayesian networks for
audio-visual speaker detection",
BOOKTITLE = ICPR02,
YEAR = "2002",
PAGES = "III: 789-794",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282476"}
@inproceedings{bb287842,
AUTHOR = "Pavlovic, V.",
TITLE = "Multimodal tracking and classification of audio-visual features",
BOOKTITLE = ICIP98,
YEAR = "1998",
PAGES = "I: 343-347",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282477"}
@inproceedings{bb287843,
AUTHOR = "Rehg, J.M. and Murphy, K.P. and Fieguth, P.W.",
TITLE = "Vision-Based Speaker Detection Using Bayesian Networks",
BOOKTITLE = CVPR99,
YEAR = "1999",
PAGES = "II: 110-116",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282478"}
@article{bb287844,
AUTHOR = "Vajaria, H. and Sankar, R. and Kasturi, R.",
TITLE = "Exploring Co-Occurence Between Speech and Body Movement for
Audio-Guided Video Localization",
JOURNAL = CirSysVideo,
VOLUME = "18",
YEAR = "2008",
NUMBER = "11",
MONTH = "November",
PAGES = "1608-1617",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282479"}
@inproceedings{bb287845,
AUTHOR = "Vajaria, H. and Islam, T. and Sarkar, S. and Sankar, R. and Kasturi, R.",
TITLE = "Audio Segmentation and Speaker Localization in Meeting Videos",
BOOKTITLE = ICPR06,
YEAR = "2006",
PAGES = "II: 1150-1153",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282480"}
@article{bb287846,
AUTHOR = "Talantzis, F. and Pnevmatikakis, A. and Constantinides, A.G.",
TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
JOURNAL = SMC-B,
VOLUME = "39",
YEAR = "2009",
NUMBER = "1",
MONTH = "February",
PAGES = "7-15",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282481"}
@article{bb287847,
AUTHOR = "Constantinides, A.G. and Pnevmatikakis, A. and Talantzis, F.",
TITLE = "Audio-Visual Active Speaker Tracking in Cluttered Indoors Environments",
JOURNAL = SMC-B,
VOLUME = "38",
YEAR = "2008",
NUMBER = "3",
MONTH = "June",
PAGES = "799-807",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282481"}
@article{bb287848,
AUTHOR = "Lee, J.S. and de Simone, F. and Ebrahimi, T.",
TITLE = "Efficient video coding based on audio-visual focus of attention",
JOURNAL = JVCIR,
VOLUME = "22",
YEAR = "2011",
NUMBER = "8",
MONTH = "November",
PAGES = "704-711",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282482"}
@article{bb287849,
AUTHOR = "Blauth, D.A. and Minotto, V.P. and Jung, C.R. and Lee, B. and Kalker, T.",
TITLE = "Voice activity detection and speaker localization using audiovisual
cues",
JOURNAL = PRL,
VOLUME = "33",
YEAR = "2012",
NUMBER = "4",
MONTH = "March",
PAGES = "373-380",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282483"}
@inproceedings{bb287850,
AUTHOR = "Montazzolli, S. and Jung, C.R. and Gelb, D.",
TITLE = "Audiovisual voice activity detection using off-the-shelf cameras",
BOOKTITLE = ICIP15,
YEAR = "2015",
PAGES = "3886-3890",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282484"}
@article{bb287851,
AUTHOR = "Minotto, V.P. and Jung, C.R. and Lee, B.",
TITLE = "Simultaneous-Speaker Voice Activity Detection and Localization Using
Mid-Fusion of SVM and HMMs",
JOURNAL = MultMed,
VOLUME = "16",
YEAR = "2014",
NUMBER = "4",
MONTH = "June",
PAGES = "1032-1044",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282485"}
@article{bb287852,
AUTHOR = "Qian, X. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
TITLE = "Multi-Speaker Tracking From an Audio-Visual Sensing Device",
JOURNAL = MultMed,
VOLUME = "21",
YEAR = "2019",
NUMBER = "10",
MONTH = "October",
PAGES = "2576-2588",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282486"}
@article{bb287853,
AUTHOR = "Pu, J. and Panagakis, Y. and Pantic, M.",
TITLE = "Active Speaker Detection and Localization in Videos Using Low-Rank
and Kernelized Sparsity",
JOURNAL = SPLetters,
VOLUME = "27",
YEAR = "2020",
PAGES = "865-869",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282487"}
@article{bb287854,
AUTHOR = "Qian, X.Y. and Liu, Q. and Wang, J.D. and Li, H.Z.",
TITLE = "Three-Dimensional Speaker Localization: Audio-Refined Visual Scaling
Factor Estimation",
JOURNAL = SPLetters,
VOLUME = "28",
YEAR = "2021",
PAGES = "1405-1409",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282488"}
@article{bb287855,
AUTHOR = "Ban, Y.T. and Alameda Pineda, X. and Girin, L. and Horaud, R.",
TITLE = "Variational Bayesian Inference for Audio-Visual Tracking of Multiple
Speakers",
JOURNAL = PAMI,
VOLUME = "43",
YEAR = "2021",
NUMBER = "5",
MONTH = "May",
PAGES = "1761-1776",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282489"}
@inproceedings{bb287856,
AUTHOR = "Ban, Y.T. and Girin, L. and Alameda Pineda, X. and Horaud, R.",
TITLE = "Exploiting the Complementarity of Audio and Visual Data in
Multi-speaker Tracking",
BOOKTITLE = CVAVM17,
YEAR = "2017",
PAGES = "446-454",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282490"}
@article{bb287857,
AUTHOR = "Qian, X.Y. and Brutti, A. and Lanz, O. and Omologo, M. and Cavallaro, A.",
TITLE = "Audio-Visual Tracking of Concurrent Speakers",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "942-954",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282491"}
@article{bb287858,
AUTHOR = "Hu, D. and Wei, Y. and Qian, R. and Lin, W.Y. and Song, R.H. and Wen, J.R.",
TITLE = "Class-Aware Sounding Objects Localization via Audiovisual
Correspondence",
JOURNAL = PAMI,
VOLUME = "44",
YEAR = "2022",
NUMBER = "12",
MONTH = "December",
PAGES = "9844-9859",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282492"}
@article{bb287859,
AUTHOR = "Zheng, A. and Hu, M. and Jiang, B. and Huang, Y. and Yan, Y. and Luo, B.",
TITLE = "Adversarial-Metric Learning for Audio-Visual Cross-Modal Matching",
JOURNAL = MultMed,
VOLUME = "24",
YEAR = "2022",
PAGES = "338-351",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282493"}
@article{bb287860,
AUTHOR = "Wang, Y. and Qian, X.H. and Zhou, W.",
TITLE = "Transformer-Prompted Network: Efficient Audio-Visual Segmentation via
Transformer and Prompt Learning",
JOURNAL = SPLetters,
VOLUME = "32",
YEAR = "2025",
PAGES = "516-520",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282494"}
@article{bb287861,
AUTHOR = "Wang, H. and Zha, Z.J. and Li, L. and Chen, X.J. and Luo, J.B.",
TITLE = "Semantic and Relation Modulation for Audio-Visual Event Localization",
JOURNAL = PAMI,
VOLUME = "45",
YEAR = "2023",
NUMBER = "6",
MONTH = "June",
PAGES = "7711-7725",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282495"}
@article{bb287862,
AUTHOR = "Garg, R. and Gao, R.H. and Grauman, K.",
TITLE = "Visually-Guided Audio Spatialization in Video with Geometry-Aware
Multi-task Learning",
JOURNAL = IJCV,
VOLUME = "131",
YEAR = "2023",
NUMBER = "10",
MONTH = "October",
PAGES = "2723-2737",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282496"}
@article{bb287863,
AUTHOR = "Wang, J.X. and Li, C.L. and Zheng, A. and Tang, J. and Luo, B.",
TITLE = "Looking and Hearing Into Details:
Dual-Enhanced Siamese Adversarial Network for Audio-Visual Matching",
JOURNAL = MultMed,
VOLUME = "25",
YEAR = "2023",
PAGES = "7505-7516",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282497"}
@article{bb287864,
AUTHOR = "Liu, C. and Li, P. and Zhang, H. and Li, L.C. and Huang, Z. and Wang, D.D. and Yu, X.",
TITLE = "BAVS: Bootstrapping Audio-Visual Segmentation by Integrating
Foundation Knowledge",
JOURNAL = MultMed,
VOLUME = "26",
YEAR = "2024",
PAGES = "10015-10028",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282498"}
@inproceedings{bb287865,
AUTHOR = "Liu, C. and Li, P. and Yang, L.Y. and Wang, D.D. and Li, L.C. and Yu, X.",
TITLE = "Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent
Alignment",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "28922-28931",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282499"}
@inproceedings{bb287866,
AUTHOR = "Liu, C. and Li, P.P. and Yu, Q. and Sheng, H.W. and Wang, D.D. and Li, L.C. and Yu, X.",
TITLE = "Benchmarking Audio Visual Segmentation for Long-Untrimmed Videos",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "22712-22722",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282500"}
@article{bb287867,
AUTHOR = "Traa, J. and Smaragdis, P.",
TITLE = "A Wrapped Kalman Filter for Azimuthal Speaker Tracking",
JOURNAL = SPLetters,
VOLUME = "20",
YEAR = "2013",
NUMBER = "12",
PAGES = "1257-1260",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282501"}
@article{bb287868,
AUTHOR = "Li, Y. and Liu, H. and Yang, B.",
TITLE = "STNet: Deep Audio-Visual Fusion Network for Robust Speaker Tracking",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "1835-1847",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282502"}
@article{bb287869,
AUTHOR = "Shi, Z.F. and Wu, Q.B. and Meng, F.M. and Xu, L.F. and Li, H.L.",
TITLE = "Cross-Modal Cognitive Consensus Guided Audio-Visual Segmentation",
JOURNAL = MultMed,
VOLUME = "27",
YEAR = "2025",
PAGES = "209-223",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282503"}
@article{bb287870,
AUTHOR = "Senocak, A. and Ryu, H. and Kim, J. and Oh, T.H. and Pfister, H. and Chung, J.S.",
TITLE = "Toward Interactive Sound Source Localization:
Better Align Sight and Sound!",
JOURNAL = PAMI,
VOLUME = "47",
YEAR = "2025",
NUMBER = "9",
MONTH = "September",
PAGES = "7643-7659",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282504"}
@article{bb287871,
AUTHOR = "Jiang, Z.Y. and Chen, X. and Wang, S. and Qian, X.Y. and Li, H.Z.",
TITLE = "TPEech: Target Speaker Extraction and Noise Suppression With
Historical Dialogue Text Cues",
JOURNAL = SPLetters,
VOLUME = "33",
YEAR = "2026",
PAGES = "351-355",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282505"}
@article{bb287872,
AUTHOR = "Yang, W.H. and Wei, J.G. and Lu, W.H. and Song, X.Y. and Yue, X.",
TITLE = "Listening for 'You': Enhancing Speech Image Retrieval via Target
Speaker Extraction",
JOURNAL = SPLetters,
VOLUME = "33",
YEAR = "2026",
PAGES = "201-205",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282506"}
@inproceedings{bb287873,
AUTHOR = "Um, S.J. and Kim, D.J. and Lee, S. and Kim, J.U.",
TITLE = "Object-aware Sound Source Localization via Audio-Visual Scene
Understanding",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "8342-8351",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282507"}
@inproceedings{bb287874,
AUTHOR = "Kim, I.H. and Song, Y. and Park, J. and Kim, W.H. and Kwak, S.",
TITLE = "Improving Sound Source Localization with Joint Slot Attention on
Image and Audio",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3121-3130",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282508"}
@inproceedings{bb287875,
AUTHOR = "Liu, C. and Yang, L.Y. and Li, P. and Wang, D.D. and Li, L. and Yu, X.",
TITLE = "Dynamic Derivation and Elimination: Audio Visual Segmentation with
Enhanced Audio Semantics",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "3131-3141",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282509"}
@inproceedings{bb287876,
AUTHOR = "Ryu, H. and Kim, S. and Chung, J.S. and Senocak, A.",
TITLE = "Seeing Speech and Sound: Distinguishing and Locating Audio Sources in
Visual Scenes",
BOOKTITLE = CVPR25,
YEAR = "2025",
PAGES = "13540-13549",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282510"}
@inproceedings{bb287877,
AUTHOR = "Wang, X.Z. and Cheng, F. and Bertasius, G.",
TITLE = "LoCoNet: Long-Short Context Network for Active Speaker Detection",
BOOKTITLE = CVPR24,
YEAR = "2024",
PAGES = "18462-18472",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282511"}
@inproceedings{bb287878,
AUTHOR = "Huang, C. and Tian, Y.P. and Kumar, A. and Xu, C.L.",
TITLE = "Egocentric Audio-Visual Object Localization",
BOOKTITLE = CVPR23,
YEAR = "2023",
PAGES = "22910-22921",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282512"}
@inproceedings{bb287879,
AUTHOR = "Nugroho, M.A. and Woo, S. and Lee, S. and Kim, C.",
TITLE = "Audio-Visual Glance Network for Efficient Video Recognition",
BOOKTITLE = ICCV23,
YEAR = "2023",
PAGES = "10116-10125",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282513"}
@inproceedings{bb287880,
AUTHOR = "Liu, Y. and Tan, Y. and Lan, H.Y.",
TITLE = "Self-Supervised Contrastive Learning for Audio-Visual Action
Recognition",
BOOKTITLE = ICIP23,
YEAR = "2023",
PAGES = "1000-1004",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282514"}
@inproceedings{bb287881,
AUTHOR = "Mo, S.T. and Morgado, P.",
TITLE = "Localizing Visual Sounds the Easy Way",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXVII:218-234",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282515"}
@inproceedings{bb287882,
AUTHOR = "Xia, Y. and Zhao, Z.",
TITLE = "Cross-modal Background Suppression for Audio-Visual Event
Localization",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "19957-19966",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282516"}
@inproceedings{bb287883,
AUTHOR = "Jiang, H. and Murdock, C. and Ithapu, V.K.",
TITLE = "Egocentric Deep Multi-Channel Audio-Visual Active Speaker
Localization",
BOOKTITLE = CVPR22,
YEAR = "2022",
PAGES = "10534-10542",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282517"}
@inproceedings{bb287884,
AUTHOR = "Min, K. and Roy, S. and Tripathi, S. and Guha, T. and Majumdar, S.",
TITLE = "Learning Long-Term Spatial-Temporal Graphs for Active Speaker Detection",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXXV:371-387",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282518"}
@inproceedings{bb287885,
AUTHOR = "Duan, B. and Tang, H. and Wang, W. and Zong, Z.L. and Yang, G.W. and Yan, Y.",
TITLE = "Audio-Visual Event Localization via Recursive Fusion by Joint
Co-Attention",
BOOKTITLE = WACV21,
YEAR = "2021",
PAGES = "4012-4021",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282519"}
@inproceedings{bb287886,
AUTHOR = "Wu, Y. and Zhu, L.C. and Yan, Y. and Yang, Y.",
TITLE = "Dual Attention Matching for Audio-Visual Event Localization",
BOOKTITLE = ICCV19,
YEAR = "2019",
PAGES = "6291-6299",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282520"}
@inproceedings{bb287887,
AUTHOR = "Majumder, S. and Al Halah, Z. and Grauman, K.",
TITLE = "Move2Hear: Active Audio-Visual Source Separation",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "275-285",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282521"}
@inproceedings{bb287888,
AUTHOR = "Majumder, S. and Grauman, K.",
TITLE = "Active Audio-Visual Separation of Dynamic Sound Sources",
BOOKTITLE = ECCV22,
YEAR = "2022",
PAGES = "XXIX:551-569",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282522"}
@inproceedings{bb287889,
AUTHOR = "Alcazar, J.L. and Heilbron, F.C. and Thabet, A.K. and Ghanem, B.",
TITLE = "MAAS: Multi-modal Assignation for Active Speaker Detection",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "265-274",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282523"}
@inproceedings{bb287890,
AUTHOR = "Kopuklu, O. and Taseska, M. and Rigoll, G.",
TITLE = "How to Design a Three-Stage Architecture for Audio-Visual Active
Speaker Detection in the Wild",
BOOKTITLE = ICCV21,
YEAR = "2021",
PAGES = "1173-1183",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282524"}
@inproceedings{bb287891,
AUTHOR = "Wu, Y. and Yang, Y.",
TITLE = "Exploring Heterogeneous Clues for Weakly-Supervised Audio-Visual
Video Parsing",
BOOKTITLE = CVPR21,
YEAR = "2021",
PAGES = "1326-1335",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282525"}
@inproceedings{bb287892,
AUTHOR = "Liu, H. and Sun, Y.H. and Li, Y.D. and Yang, B.",
TITLE = "3D Audio-Visual Speaker Tracking with A Novel Particle Filter",
BOOKTITLE = ICPR21,
YEAR = "2021",
PAGES = "7343-7348",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282526"}
@inproceedings{bb287893,
AUTHOR = "Liu, H. and Li, Y.D. and Yang, B.",
TITLE = "3D Audio-Visual Speaker Tracking with A Two-Layer Particle Filter",
BOOKTITLE = ICIP19,
YEAR = "2019",
PAGES = "1955-1959",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282527"}
@inproceedings{bb287894,
AUTHOR = "He, G. and Liu, X. and Fan, F. and You, J.",
TITLE = "Image2Audio: Facilitating Semi-supervised Audio Emotion Recognition
with Facial Expression Image",
BOOKTITLE = VL3W20,
YEAR = "2020",
PAGES = "3978-3983",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282528"}
@inproceedings{bb287895,
AUTHOR = "Le, N. and Heili, A. and Wu, D. and Odobez, J.M.",
TITLE = "Temporally subsampled detection for accurate and efficient face
tracking and diarization",
BOOKTITLE = ICPR16,
YEAR = "2016",
PAGES = "1792-1797",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282529"}
@inproceedings{bb287896,
AUTHOR = "Saeed, A. and Al Hamadi, A. and Heuer, M.",
TITLE = "Speaker Tracking Using Multi-modal Fusion Framework",
BOOKTITLE = ICISP12,
YEAR = "2012",
PAGES = "539-546",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282530"}
@inproceedings{bb287897,
AUTHOR = "Kelly, D. and Pitie, F. and Kokaram, A. and Boland, F.",
TITLE = "A Comparative Error Analysis of Audio-Visual Source Localization",
BOOKTITLE = M2SFA208,
YEAR = "2008",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282531"}
@inproceedings{bb287898,
AUTHOR = "Katsarakis, N. and Talantzis, F. and Pnevmatikakis, A. and Polymenakos, L.",
TITLE = "The AIT 3D Audio / Visual Person Tracker for CLEAR 2007",
BOOKTITLE = MTPH07,
YEAR = "2007",
PAGES = "xx-yy",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282532"}
@inproceedings{bb287899,
AUTHOR = "Kushal, A. and Rahurkar, M. and Fei Fei, L. and Ponce, J. and Huang, T.",
TITLE = "Audio-Visual Speaker Localization Using Graphical Models",
BOOKTITLE = ICPR06,
YEAR = "2006",
PAGES = "I: 291-294",
BIBSOURCE = "http://www.visionbib.com/bibliography/people917avt1.html#TT282533"}
Last update:Jan 23, 2026 at 20:54:10