{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/skill/audio-processing"},"x-facet":{"type":"skill","slug":"audio-processing","display":"Audio Processing","count":14},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_859c75b7-6fc"},"title":"Engineering Manager, Multimodal (API)","description":"<p>We are seeking an Engineering Manager to lead our multimodal API product suite. Your team will be responsible for delivering innovative APIs across real-time processing, speech transcription, speech generation, and image creation.</p>\n<p>You will own the product roadmap for how we evolve our multimodal API offerings, and you will build the products that allow developers to reach millions of end users through AI audio, video, and images.</p>\n<p><strong>Responsibilities</strong></p>\n<ul>\n<li>Build, mentor, and grow a high-performing engineering team focused on multimodal API products – including our real-time API, our transcription models (Whisper), our speech generation models (TTS), and our image generation APIs (DALLE and native 4o).</li>\n<li>Collaborate closely with product managers, designers, and other stakeholders to define the strategic vision and product roadmap.</li>\n<li>Work closely with our research teams to improve our core multimodal models for API customer use cases.</li>\n<li>Guide technical and architectural decisions, emphasizing scalability, robustness, and user experience.</li>\n<li>Foster a culture of innovation, continuous improvement, and accountability within your team.</li>\n</ul>\n<p><strong>Qualifications</strong></p>\n<ul>\n<li>Proven experience managing engineering teams that deliver complex, high-quality products at scale.</li>\n<li>Strong technical background and proficiency in modern software engineering practices and system architecture.</li>\n<li>Excellent collaboration and communication skills to effectively coordinate across diverse teams and stakeholders.</li>\n<li>Familiarity with or strong interest in multimodal AI, including speech technologies, real-time systems, and image generation.</li>\n<li>Ability to operate effectively in a fast-paced, ambiguous startup environment.</li>\n</ul>\n<p><strong>Preferred Qualifications</strong></p>\n<ul>\n<li>Experience developing multimodal systems or APIs in AI/ML domains, especially around image generation, audio generation, or speech transcription.</li>\n<li>Familiarity with real-time streaming technologies, audio processing, and computer vision.</li>\n<li>Hands-on experience with cloud platforms and distributed architectures.</li>\n</ul>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_859c75b7-6fc","directApply":true,"hiringOrganization":{"@type":"Organization","name":"OpenAI","sameAs":"https://openai.com/","logo":"https://logos.yubhub.co/openai.com.png"},"x-apply-url":"https://jobs.ashbyhq.com/openai/1d7f4747-54a3-4141-a39a-c6e7700e969b","x-work-arrangement":"onsite","x-experience-level":"mid","x-job-type":"Full time","x-salary-range":"$293K – $385K","x-skills-required":["multimodal AI","speech technologies","real-time systems","image generation","cloud platforms","distributed architectures"],"x-skills-preferred":["audio generation","speech transcription","real-time streaming technologies","audio processing","computer vision"],"datePosted":"2026-04-24T12:18:02.815Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"multimodal AI, speech technologies, real-time systems, image generation, cloud platforms, distributed architectures, audio generation, speech transcription, real-time streaming technologies, audio processing, computer vision","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":293000,"maxValue":385000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_c85087bb-6a8"},"title":"AI Tutor - Danish","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Danish with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_c85087bb-6a8","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090189007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech recognition","Auditory experiences","Diverse languages","Accents","Cultural contexts","Linguistic and prosodic details","Professional audio standards","Speech modulation","Accent variation","Noise in real-world recordings","Multilingual audio processing","Audio workflows","Transcription","Audio quality","Voice recordings","Feedback on audio samples","Independent judgments","Ambiguous audio scenarios","Defensible annotation decisions","Portfolio","Voice samples","Annotated transcripts","Audio-related work","Quality","Methodology","Attention to detail","Professional experience in voice","Linguistics","Speech data","Speech evaluation and research"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Disfluencies","Prosodic features","Intonation","Stress","Rhythm","Emotion","Phonetics","Phonology","Sociolinguistics","Speech sciences","Cognitive science","Pronunciation differences","Multilingual speech patterns","Speech/audio datasets","Annotation workflows","AI training data","Training voice models","Data quality impacts model performance","Voice acting","Voice recording","Podcasting","Measurable audience","Clarity and recording quality"],"datePosted":"2026-04-18T15:57:57.773Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech recognition, Auditory experiences, Diverse languages, Accents, Cultural contexts, Linguistic and prosodic details, Professional audio standards, Speech modulation, Accent variation, Noise in real-world recordings, Multilingual audio processing, Audio workflows, Transcription, Audio quality, Voice recordings, Feedback on audio samples, Independent judgments, Ambiguous audio scenarios, Defensible annotation decisions, Portfolio, Voice samples, Annotated transcripts, Audio-related work, Quality, Methodology, Attention to detail, Professional experience in voice, Linguistics, Speech data, Speech evaluation and research, Exceptional attention to linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Disfluencies, Prosodic features, Intonation, Stress, Rhythm, Emotion, Phonetics, Phonology, Sociolinguistics, Speech sciences, Cognitive science, Pronunciation differences, Multilingual speech patterns, Speech/audio datasets, Annotation workflows, AI training data, Training voice models, Data quality impacts model performance, Voice acting, Voice recording, Podcasting, Measurable audience, Clarity and recording quality"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_96e9554d-edc"},"title":"AI Tutor - Arabic","description":"<p>As an AI Tutor specialised in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities: Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages. Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards. Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing. Work with technical staff to improve annotation tools for efficient audio workflows.</p>\n<p>Basic Qualifications: Native proficiency in Arabic with exposure to diverse accents, dialects, or regional variations. Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes. Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages. Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form. Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality. Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages. Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech. Strong communication, interpersonal, analytical, detail-oriented, and organisational skills, with the ability to articulate audio-related feedback effectively. Commitment to developing AI that masters sophisticated multilingual audio capabilities.</p>\n<p>Preferred Skills and Experience: Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work. Deep understanding and taste of what good/useful Audio data is. Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy. Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyse accent variation, pronunciation differences, and multilingual speech patterns. Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance. Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality. Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions. Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail. Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</p>\n<p>Location and Other Expectations: Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit. For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables. Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs. For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time. We are unable to provide visa sponsorship. For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</p>\n<p>Compensation and Benefits: US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process. Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_96e9554d-edc","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090171007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech recognition","Auditory experiences","Natural language processing","Accent variation","Noise reduction","Multilingual audio processing"],"x-skills-preferred":["Linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Disfluencies","Accents","Prosodic features","Linguistics","Speech sciences","Cognitive science","Voice work","Voice acting","Voice recording","Podcasting"],"datePosted":"2026-04-18T15:52:57.094Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech recognition, Auditory experiences, Natural language processing, Accent variation, Noise reduction, Multilingual audio processing, Linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Disfluencies, Accents, Prosodic features, Linguistics, Speech sciences, Cognitive science, Voice work, Voice acting, Voice recording, Podcasting"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_993beba7-87d"},"title":"AI Tutor - Vietnamese","description":"<p>As an AI Tutor specializing in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts. Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Vietnamese with exposure to diverse accents, dialects, or regional variations.</li>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n<li>We are unable to provide visa sponsorship.</li>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits: US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process. Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_993beba7-87d","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090274007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Speech recognition","Auditory experiences","Accent variation","Noise in real-world recordings","Multilingual audio processing","Annotation tools","Efficient audio workflows","Native proficiency in Vietnamese","Proficiency in English","Strong auditory perception","Multilingual audio content","Speech accuracy","Cultural vocal expressions","Contextual interpretation","Transcription","High-quality voice recordings","Feedback on audio samples","Independent judgments","Ambiguous audio scenarios","Defensible annotation decisions","Portfolio","Voice samples","Annotated transcripts","Audio-related work","Quality","Methodology","Attention to detail"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Disfluencies","Accents","Prosodic features","Linguistics","Phonetics","Phonology","Sociolinguistics","Speech sciences","Cognitive science","Pronunciation differences","Multilingual speech patterns","Speech/audio datasets","Annotation workflows","AI training data","Training voice models","Data quality impacts model performance","Professional experience in voice work","Voice acting","Voice recording","Podcasting","Measurable audience","Similar audio production","Clarity and recording quality"],"datePosted":"2026-04-18T15:22:57.303Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Speech recognition, Auditory experiences, Accent variation, Noise in real-world recordings, Multilingual audio processing, Annotation tools, Efficient audio workflows, Native proficiency in Vietnamese, Proficiency in English, Strong auditory perception, Multilingual audio content, Speech accuracy, Cultural vocal expressions, Contextual interpretation, Transcription, High-quality voice recordings, Feedback on audio samples, Independent judgments, Ambiguous audio scenarios, Defensible annotation decisions, Portfolio, Voice samples, Annotated transcripts, Audio-related work, Quality, Methodology, Attention to detail, Exceptional attention to linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Disfluencies, Accents, Prosodic features, Linguistics, Phonetics, Phonology, Sociolinguistics, Speech sciences, Cognitive science, Pronunciation differences, Multilingual speech patterns, Speech/audio datasets, Annotation workflows, AI training data, Training voice models, Data quality impacts model performance, Professional experience in voice work, Voice acting, Voice recording, Podcasting, Measurable audience, Similar audio production, Clarity and recording quality"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_738a6055-653"},"title":"AI Tutor - Italian","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Italian with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_738a6055-653","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090209007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["multilingual audio capabilities","speech recognition","auditory experiences","linguistic and prosodic details","professional audio standards","accent variation","noise in real-world recordings","multilingual audio processing","annotation tools","efficient audio workflows","native proficiency in Italian","English (minimum B2 level)","strong auditory perception","nuances in speech","accents","pronunciation","audio quality","speech accuracy","cultural vocal expressions","contextual interpretation","transcription","voice recordings","feedback on audio samples","independent judgments","ambiguous audio material","communication","interpersonal","analytical","detail-oriented","organizational","audio-related feedback","commitment to developing AI"],"x-skills-preferred":["exceptional attention to linguistic nuance","auditory detail","data quality","deep understanding of good/useful Audio data","advanced transcription and annotation practices","handling disfluencies","prosodic features","background in linguistics","phonetics","phonology","sociolinguistics","speech sciences","cognitive science","equivalent practical experience","analysis of accent variation","pronunciation differences","multilingual speech patterns","experience working with speech/audio datasets","annotation workflows","AI training data","training voice models","understanding of data quality impacts model performance","professional experience in voice work","voice acting","voice recording","podcasting","similar audio production","exercise independent judgment","defensible annotation decisions","portfolio","voice samples","annotated transcripts","audio-related work"],"datePosted":"2026-04-18T15:21:33.699Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"multilingual audio capabilities, speech recognition, auditory experiences, linguistic and prosodic details, professional audio standards, accent variation, noise in real-world recordings, multilingual audio processing, annotation tools, efficient audio workflows, native proficiency in Italian, English (minimum B2 level), strong auditory perception, nuances in speech, accents, pronunciation, audio quality, speech accuracy, cultural vocal expressions, contextual interpretation, transcription, voice recordings, feedback on audio samples, independent judgments, ambiguous audio material, communication, interpersonal, analytical, detail-oriented, organizational, audio-related feedback, commitment to developing AI, exceptional attention to linguistic nuance, auditory detail, data quality, deep understanding of good/useful Audio data, advanced transcription and annotation practices, handling disfluencies, prosodic features, background in linguistics, phonetics, phonology, sociolinguistics, speech sciences, cognitive science, equivalent practical experience, analysis of accent variation, pronunciation differences, multilingual speech patterns, experience working with speech/audio datasets, annotation workflows, AI training data, training voice models, understanding of data quality impacts model performance, professional experience in voice work, voice acting, voice recording, podcasting, similar audio production, exercise independent judgment, defensible annotation decisions, portfolio, voice samples, annotated transcripts, audio-related work"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_5f816847-74d"},"title":"AI Tutor - Indonesian","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Indonesian with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_5f816847-74d","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5095657007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech recognition","Auditory experiences","Linguistic and prosodic details","Professional audio standards","Accent variation","Noise in real-world recordings","Multilingual audio processing","Data quality","Independent judgment","Audio-related feedback","Multilingual speech patterns","Speech/audio datasets","Annotation workflows","AI training data","Voice models","Data quality impacts model performance","Voice work","Voice acting","Voice recording","Podcasting","Audio production","Clearity and recording quality","Defensible annotation decisions","Portfolio","Voice samples","Annotated transcripts","Audio-related work"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Auditory detail","Deep understanding and taste of what good/useful Audio data is","Strong command of advanced transcription and annotation practices","Handling disfluencies","Accents","Prosodic features","Intonation","Stress","Rhythm","Emotion","Background in linguistics","Phonetics","Phonology","Sociolinguistics","Speech sciences","Cognitive science","Equivalent practical experience","Analyzing accent variation","Pronunciation differences","Experience working with speech/audio datasets","Training voice models","Understanding data quality impacts model performance","Professional experience in voice work","Similar audio production","Attention to clarity and recording quality","Independent judgment in ambiguous audio scenarios"],"datePosted":"2026-04-18T15:21:16.886Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech recognition, Auditory experiences, Linguistic and prosodic details, Professional audio standards, Accent variation, Noise in real-world recordings, Multilingual audio processing, Data quality, Independent judgment, Audio-related feedback, Multilingual speech patterns, Speech/audio datasets, Annotation workflows, AI training data, Voice models, Data quality impacts model performance, Voice work, Voice acting, Voice recording, Podcasting, Audio production, Clearity and recording quality, Defensible annotation decisions, Portfolio, Voice samples, Annotated transcripts, Audio-related work, Exceptional attention to linguistic nuance, Auditory detail, Deep understanding and taste of what good/useful Audio data is, Strong command of advanced transcription and annotation practices, Handling disfluencies, Accents, Prosodic features, Intonation, Stress, Rhythm, Emotion, Background in linguistics, Phonetics, Phonology, Sociolinguistics, Speech sciences, Cognitive science, Equivalent practical experience, Analyzing accent variation, Pronunciation differences, Experience working with speech/audio datasets, Training voice models, Understanding data quality impacts model performance, Professional experience in voice work, Similar audio production, Attention to clarity and recording quality, Independent judgment in ambiguous audio scenarios"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_047a7c93-c55"},"title":"AI Tutor - Hindi","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities: Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages. Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards. Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing. Work with technical staff to improve annotation tools for efficient audio workflows.</p>\n<p>Basic Qualifications: Native proficiency in Hindi with exposure to diverse accents, dialects, or regional variations. Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes. Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages. Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form. Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality. Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages. Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech. Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively. Commitment to developing AI that masters sophisticated multilingual audio capabilities.</p>\n<p>Preferred Skills and Experience: Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work. Deep understanding and taste of what good/useful Audio data is. Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy. Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns. Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance. Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality. Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions. Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail. Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</p>\n<p>Location and Other Expectations: Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit. For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables. Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs. For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time. We are unable to provide visa sponsorship. For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</p>\n<p>Compensation and Benefits: US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process. Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_047a7c93-c55","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090207007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech recognition","Auditory experiences","Diverse languages","Accents","Cultural contexts","High-quality audio data","Clear spoken output","Linguistic and prosodic details","Professional audio standards","Speech modulation","Accent variation","Noise in real-world recordings","Multilingual audio processing","Efficient audio workflows","Native proficiency in Hindi","English (minimum B2 level)","Strong auditory perception","Multilingual audio content","Speech accuracy","Cultural vocal expressions","Contextual interpretation","Transcription","Audio quality","Voice recordings","Feedback on audio samples","Independent judgments","Ambiguous audio material","Noisy or accented speech","Communication","Interpersonal","Analytical","Detail-oriented","Organizational","Independent judgment","Defensible annotation decisions","Voice samples","Annotated transcripts","Audio-related work","Quality","Methodology","Attention to detail"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Disfluencies","Prosodic features","Intonation","Stress","Rhythm","Emotion","Linguistics","Phonetics","Phonology","Sociolinguistics","Speech sciences","Cognitive science","Pronunciation differences","Multilingual speech patterns","Speech/audio datasets","Annotation workflows","AI training data","Training voice models","Data quality impacts model performance","Voice work","Voice acting","Voice recording","Podcasting","Measurable audience","Clarity and recording quality"],"datePosted":"2026-04-18T15:21:00.615Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech recognition, Auditory experiences, Diverse languages, Accents, Cultural contexts, High-quality audio data, Clear spoken output, Linguistic and prosodic details, Professional audio standards, Speech modulation, Accent variation, Noise in real-world recordings, Multilingual audio processing, Efficient audio workflows, Native proficiency in Hindi, English (minimum B2 level), Strong auditory perception, Multilingual audio content, Speech accuracy, Cultural vocal expressions, Contextual interpretation, Transcription, Audio quality, Voice recordings, Feedback on audio samples, Independent judgments, Ambiguous audio material, Noisy or accented speech, Communication, Interpersonal, Analytical, Detail-oriented, Organizational, Independent judgment, Defensible annotation decisions, Voice samples, Annotated transcripts, Audio-related work, Quality, Methodology, Attention to detail, Exceptional attention to linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Disfluencies, Prosodic features, Intonation, Stress, Rhythm, Emotion, Linguistics, Phonetics, Phonology, Sociolinguistics, Speech sciences, Cognitive science, Pronunciation differences, Multilingual speech patterns, Speech/audio datasets, Annotation workflows, AI training data, Training voice models, Data quality impacts model performance, Voice work, Voice acting, Voice recording, Podcasting, Measurable audience, Clarity and recording quality"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_de2d9d14-939"},"title":"AI Tutor - Spanish","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p><strong>Responsibilities:</strong> Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages. Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards. Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing. Work with technical staff to improve annotation tools for efficient audio workflows.</p>\n<p><strong>Basic Qualifications:</strong> Native proficiency in Spanish with exposure to diverse accents, dialects, or regional variations. Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes. Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages. Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form. Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality. Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages. Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech. Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively. Commitment to developing AI that masters sophisticated multilingual audio capabilities.</p>\n<p><strong>Preferred Skills and Experience:</strong> Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work. Deep understanding and taste of what good/useful Audio data is. Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy. Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns. Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance. Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality. Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions. Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail. Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_de2d9d14-939","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090264007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Spanish","English","Audio annotation","Speech recognition","Multilingual audio processing"],"x-skills-preferred":["Advanced transcription and annotation practices","Linguistics","Speech sciences","Cognitive science","Voice work"],"datePosted":"2026-04-18T15:20:57.454Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Spanish, English, Audio annotation, Speech recognition, Multilingual audio processing, Advanced transcription and annotation practices, Linguistics, Speech sciences, Cognitive science, Voice work"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_a15f16b0-f9b"},"title":"AI Tutor - Hebrew","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Hebrew with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_a15f16b0-f9b","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090206007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","High-quality audio data","Speech recognition","Auditory experiences","Diverse languages","Accents","Cultural contexts","Audio clips","Voice recordings","Speech samples","Auditory elements","Professional audio standards","Speech modulation","Accent variation","Noise in real-world recordings","Multilingual audio processing","Annotation tools","Efficient audio workflows","Native proficiency in Hebrew","Proficiency in English","Strong auditory perception","Multilingual audio content","Speech accuracy","Cultural vocal expressions","Contextual interpretation","Transcription","Audio quality","Comfort providing high-quality voice recordings","Feedback on audio samples","Strong comprehension skills","Independent judgments","Ambiguous audio material","Noisy or accented speech","Communication skills","Interpersonal skills","Analytical skills","Detail-oriented skills","Organizational skills","Commitment to developing AI","Sophisticated multilingual audio capabilities"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Handling disfluencies","Prosodic features","Intonation","Stress","Rhythm","Emotion","Background in linguistics","Speech sciences","Cognitive science","Linguistics","Phonetics","Phonology","Sociolinguistics","Pronunciation differences","Multilingual speech patterns","Experience working with speech/audio datasets","Annotation workflows","AI training data","Training voice models","Data quality impacts model performance","Professional experience in voice work","Voice acting","Voice recording","Podcasting","Audio production","Attention to clarity and recording quality","Independent judgment in ambiguous audio scenarios","Defensible annotation decisions","Portfolio","Voice samples","Annotated transcripts","Audio-related work","Quality","Methodology","Attention to detail"],"datePosted":"2026-04-18T15:20:42.554Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, High-quality audio data, Speech recognition, Auditory experiences, Diverse languages, Accents, Cultural contexts, Audio clips, Voice recordings, Speech samples, Auditory elements, Professional audio standards, Speech modulation, Accent variation, Noise in real-world recordings, Multilingual audio processing, Annotation tools, Efficient audio workflows, Native proficiency in Hebrew, Proficiency in English, Strong auditory perception, Multilingual audio content, Speech accuracy, Cultural vocal expressions, Contextual interpretation, Transcription, Audio quality, Comfort providing high-quality voice recordings, Feedback on audio samples, Strong comprehension skills, Independent judgments, Ambiguous audio material, Noisy or accented speech, Communication skills, Interpersonal skills, Analytical skills, Detail-oriented skills, Organizational skills, Commitment to developing AI, Sophisticated multilingual audio capabilities, Exceptional attention to linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Handling disfluencies, Prosodic features, Intonation, Stress, Rhythm, Emotion, Background in linguistics, Speech sciences, Cognitive science, Linguistics, Phonetics, Phonology, Sociolinguistics, Pronunciation differences, Multilingual speech patterns, Experience working with speech/audio datasets, Annotation workflows, AI training data, Training voice models, Data quality impacts model performance, Professional experience in voice work, Voice acting, Voice recording, Podcasting, Audio production, Attention to clarity and recording quality, Independent judgment in ambiguous audio scenarios, Defensible annotation decisions, Portfolio, Voice samples, Annotated transcripts, Audio-related work, Quality, Methodology, Attention to detail"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_01687c8a-4e6"},"title":"AI Tutor - French","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>\\nYour work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>\\nResponsibilities:</p>\n<p>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</p>\n<p>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</p>\n<p>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</p>\n<p>Work with technical staff to improve annotation tools for efficient audio workflows.</p>\n<p>Basic Qualifications:</p>\n<p>Native proficiency in French with exposure to diverse accents, dialects, or regional variations.</p>\n<p>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</p>\n<p>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</p>\n<p>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</p>\n<p>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</p>\n<p>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</p>\n<p>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</p>\n<p>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</p>\n<p>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</p>\n<p>Preferred Skills and Experience:</p>\n<p>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</p>\n<p>Deep understanding and taste of what good/useful Audio data is.</p>\n<p>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</p>\n<p>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</p>\n<p>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</p>\n<p>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</p>\n<p>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</p>\n<p>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</p>\n<p>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</p>\n<p>Location and Other Expectations:</p>\n<p>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</p>\n<p>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</p>\n<p>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</p>\n<p>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</p>\n<p>We are unable to provide visa sponsorship.</p>\n<p>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</p>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_01687c8a-4e6","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090202007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["French","English","Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech modulation","Accent variation","Noise reduction","Multilingual audio processing"],"x-skills-preferred":["Linguistic nuance","Auditory detail","Data quality","Advanced transcription and annotation practices","Disfluencies","Accents","Prosodic features","Linguistics","Speech sciences","Cognitive science","Voice work","Voice acting","Voice recording","Podcasting"],"datePosted":"2026-04-18T15:19:48.609Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"French, English, Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech modulation, Accent variation, Noise reduction, Multilingual audio processing, Linguistic nuance, Auditory detail, Data quality, Advanced transcription and annotation practices, Disfluencies, Accents, Prosodic features, Linguistics, Speech sciences, Cognitive science, Voice work, Voice acting, Voice recording, Podcasting"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_9c65d655-aff"},"title":"AI Tutor - Finnish","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Finnish with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_9c65d655-aff","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090199007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract","x-salary-range":"$35/hour - $45/hour","x-skills-required":["multilingual audio capabilities","voice interactions","speech recognition","auditory experiences","linguistic and prosodic details","intonation","rhythm","accent","professional audio standards","speech modulation","accent variation","noise in real-world recordings","multilingual audio processing","annotation tools","efficient audio workflows","native proficiency in Finnish","English (minimum B2 level)","strong auditory perception","nuances in speech","accents","pronunciation","audio quality","multilingual audio content","speech accuracy","cultural vocal expressions","contextual interpretation","transcription","high accuracy","various audio quality","voice recordings","feedback on audio samples","independent judgments","ambiguous or varied audio material","noisy or accented speech","communication","interpersonal","analytical","detail-oriented","organizational","audio-related feedback","commitment to developing AI"],"x-skills-preferred":["exceptional attention to linguistic nuance","auditory detail","data quality","deep understanding and taste of what good/useful Audio data is","strong command of advanced transcription and annotation practices","handling disfluencies","prosodic features","background in linguistics","phonetics","phonology","sociolinguistics","speech sciences","cognitive science","equivalent practical experience","analysis of accent variation","pronunciation differences","multilingual speech patterns","experience working with speech/audio datasets","annotation workflows","AI training data","training voice models","understanding of how data quality impacts model performance","professional experience in voice work","voice acting","voice recording","podcasting","measurable audience","similar audio production","exercise independent judgment","defensible annotation decisions","portfolio","voice samples","annotated transcripts","audio-related work","quality","methodology","attention to detail"],"datePosted":"2026-04-18T15:19:34.287Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"multilingual audio capabilities, voice interactions, speech recognition, auditory experiences, linguistic and prosodic details, intonation, rhythm, accent, professional audio standards, speech modulation, accent variation, noise in real-world recordings, multilingual audio processing, annotation tools, efficient audio workflows, native proficiency in Finnish, English (minimum B2 level), strong auditory perception, nuances in speech, accents, pronunciation, audio quality, multilingual audio content, speech accuracy, cultural vocal expressions, contextual interpretation, transcription, high accuracy, various audio quality, voice recordings, feedback on audio samples, independent judgments, ambiguous or varied audio material, noisy or accented speech, communication, interpersonal, analytical, detail-oriented, organizational, audio-related feedback, commitment to developing AI, exceptional attention to linguistic nuance, auditory detail, data quality, deep understanding and taste of what good/useful Audio data is, strong command of advanced transcription and annotation practices, handling disfluencies, prosodic features, background in linguistics, phonetics, phonology, sociolinguistics, speech sciences, cognitive science, equivalent practical experience, analysis of accent variation, pronunciation differences, multilingual speech patterns, experience working with speech/audio datasets, annotation workflows, AI training data, training voice models, understanding of how data quality impacts model performance, professional experience in voice work, voice acting, voice recording, podcasting, measurable audience, similar audio production, exercise independent judgment, defensible annotation decisions, portfolio, voice samples, annotated transcripts, audio-related work, quality, methodology, attention to detail"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_02d3881d-a73"},"title":"AI Tutor - Dutch","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Dutch with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<ul>\n<li>Strong comprehension skills and the ability to make independent judgments on ambiguous or varied audio material, including noisy or accented speech.</li>\n</ul>\n<ul>\n<li>Strong communication, interpersonal, analytical, detail-oriented, and organizational skills, with the ability to articulate audio-related feedback effectively.</li>\n</ul>\n<ul>\n<li>Commitment to developing AI that masters sophisticated multilingual audio capabilities.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<ul>\n<li>Portfolio (strongly preferred for advanced candidates): Voice samples, annotated transcripts, or audio-related work demonstrating quality, methodology, and attention to detail.</li>\n</ul>\n<ul>\n<li>Candidates with professional experience in voice, linguistics, speech data, or speech evaluation and research are especially encouraged to apply.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_02d3881d-a73","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090197007","x-work-arrangement":"remote","x-experience-level":"mid","x-job-type":"full-time|part-time|contract|temporary|internship","x-salary-range":"$35/hour - $45/hour","x-skills-required":["multilingual audio capabilities","voice interactions","speech recognition","auditory experiences","linguistic and prosodic details","intonation","rhythm","accent","professional audio standards","speech modulation","accent variation","noise in real-world recordings","multilingual audio processing","annotation tools","efficient audio workflows","native proficiency in Dutch","proficiency in English","auditory perception","nuances in speech","accents","pronunciation","audio quality","speech accuracy","cultural vocal expressions","contextual interpretation","transcription","high accuracy","vocal delivery","clarity","recording quality","independent judgments","ambiguous audio scenarios","defensible annotation decisions","portfolio","voice samples","annotated transcripts","audio-related work","quality","methodology","attention to detail","voice work","voice acting","voice recording","podcasting","measurable audience","speech sciences","cognitive science","linguistics","phonetics","phonology","sociolinguistics","pronunciation differences","multilingual speech patterns","speech/audio datasets","annotation workflows","AI training data","training voice models","data quality","model performance","independent judgment"],"x-skills-preferred":["exceptional attention to linguistic nuance","auditory detail","deep understanding","taste of what good/useful Audio data is","strong command of advanced transcription and annotation practices","handling disfluencies","prosodic features","high consistency and accuracy","background in linguistics","equivalent practical experience","analyzing accent variation","experienced working with speech/audio datasets","knowledge/experience with training voice models","understanding how data quality impacts model performance","professional experience in voice work","similar audio production","attention to clarity"],"datePosted":"2026-04-18T15:19:04.366Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"multilingual audio capabilities, voice interactions, speech recognition, auditory experiences, linguistic and prosodic details, intonation, rhythm, accent, professional audio standards, speech modulation, accent variation, noise in real-world recordings, multilingual audio processing, annotation tools, efficient audio workflows, native proficiency in Dutch, proficiency in English, auditory perception, nuances in speech, accents, pronunciation, audio quality, speech accuracy, cultural vocal expressions, contextual interpretation, transcription, high accuracy, vocal delivery, clarity, recording quality, independent judgments, ambiguous audio scenarios, defensible annotation decisions, portfolio, voice samples, annotated transcripts, audio-related work, quality, methodology, attention to detail, voice work, voice acting, voice recording, podcasting, measurable audience, speech sciences, cognitive science, linguistics, phonetics, phonology, sociolinguistics, pronunciation differences, multilingual speech patterns, speech/audio datasets, annotation workflows, AI training data, training voice models, data quality, model performance, independent judgment, exceptional attention to linguistic nuance, auditory detail, deep understanding, taste of what good/useful Audio data is, strong command of advanced transcription and annotation practices, handling disfluencies, prosodic features, high consistency and accuracy, background in linguistics, equivalent practical experience, analyzing accent variation, experienced working with speech/audio datasets, knowledge/experience with training voice models, understanding how data quality impacts model performance, professional experience in voice work, similar audio production, attention to clarity"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_5fc4a599-f52"},"title":"AI Tutor - Arabic","description":"<p>As an AI Tutor specialized in multilingual audio capabilities, you will contribute to xAI&#39;s mission by training and refining Grok to excel in voice interactions, speech recognition, and auditory experiences across diverse languages, accents, and cultural contexts.</p>\n<p>Your work will focus on curating and annotating high-quality audio data to enhance Grok&#39;s global accessibility, enabling natural spoken interactions for users worldwide, bridging language barriers through accurate speech processing, and improving the AI&#39;s handling of multilingual audio nuances.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Use proprietary software to provide labels, annotations, recordings, and inputs on projects involving multilingual audio clips, voice recordings, speech samples, and auditory elements in various languages.</li>\n</ul>\n<ul>\n<li>Support the delivery of high-quality curated audio data that ensures clear, natural spoken output, accurate representation of linguistic and prosodic details (such as intonation, rhythm, and accent), and professional audio standards.</li>\n</ul>\n<ul>\n<li>Collaborate with technical staff to develop tasks that improve AI&#39;s ability to handle speech modulation, accent variation, noise in real-world recordings, and multilingual audio processing.</li>\n</ul>\n<ul>\n<li>Work with technical staff to improve annotation tools for efficient audio workflows.</li>\n</ul>\n<p>Basic Qualifications:</p>\n<ul>\n<li>Native proficiency in Arabic with exposure to diverse accents, dialects, or regional variations.</li>\n</ul>\n<ul>\n<li>Proficiency in English (minimum B2 level) with clear, natural vocal delivery and pronunciation suitable for audio recording purposes.</li>\n</ul>\n<ul>\n<li>Strong auditory perception to identify nuances in speech, accents, pronunciation, intonation, and audio quality across languages.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to handle multilingual audio content, including evaluating speech accuracy, cultural vocal expressions, and contextual interpretation in spoken form.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to transcribe audio with high accuracy across accents and varying audio quality.</li>\n</ul>\n<ul>\n<li>Comfort providing high-quality voice recordings and feedback on audio samples in multiple languages.</li>\n</ul>\n<p>Preferred Skills and Experience:</p>\n<ul>\n<li>Demonstration of exceptional attention to linguistic nuance, auditory detail, and data quality beyond standard transcription work.</li>\n</ul>\n<ul>\n<li>Deep understanding and taste of what good/useful Audio data is.</li>\n</ul>\n<ul>\n<li>Strong command of advanced transcription and annotation practices, including handling disfluencies, accents, and prosodic features (intonation, stress, rhythm, emotion, etc) with high consistency and accuracy.</li>\n</ul>\n<ul>\n<li>Background in linguistics (e.g., phonetics, phonology, sociolinguistics), speech sciences, cognitive science, or a related field, or equivalent practical experience, with demonstrated ability to analyze accent variation, pronunciation differences, and multilingual speech patterns.</li>\n</ul>\n<ul>\n<li>Experience working with speech/audio datasets, annotation workflows, or AI training data, including knowledge/experience with training voice models, and an understanding of how data quality impacts model performance.</li>\n</ul>\n<ul>\n<li>Professional experience in voice work, including voice acting, voice recording, podcasting with a measurable audience (e.g., X following), or similar audio production demonstrating attention to clarity and recording quality.</li>\n</ul>\n<ul>\n<li>Demonstrated ability to exercise independent judgment in ambiguous audio scenarios and make consistent, defensible annotation decisions.</li>\n</ul>\n<p>Location and Other Expectations:</p>\n<ul>\n<li>Tutor roles may be offered as full-time, part-time, or contractor positions, depending on role needs and candidate fit.</li>\n</ul>\n<ul>\n<li>For contractor positions, hours will vary widely based on project scope and contractor availability, with no fixed commitments required. On average, most projects may require at least 10 hours per week to deliver effectively, though this is not a fixed commitment and depends on the scope of work. Contractors have full flexibility to set their own hours and determine the exact amount of time needed to complete deliverables.</li>\n</ul>\n<ul>\n<li>Tutor roles may be performed remotely from any location worldwide, subject to legal eligibility, time-zone compatibility, and role-specific needs.</li>\n</ul>\n<ul>\n<li>For US-based candidates, please note that we are unable to hire in Wyoming and Illinois at this time.</li>\n</ul>\n<ul>\n<li>We are unable to provide visa sponsorship.</li>\n</ul>\n<ul>\n<li>For those who will be working from a personal device, your computer must be a Chromebook, a Mac with macOS 11.0 or later, or Windows 10 or later.</li>\n</ul>\n<p>Compensation and Benefits:</p>\n<p>US-based candidates: $35/hour - $45/hour depending on factors including relevant experience, skills, education, geographic location, and qualifications. International candidates: Information will be provided to you during the recruitment process.</p>\n<p>Benefits vary based on employment type, location, and jurisdiction. Benefits for eligible U.S.-based positions include health insurance, 401(k) plan, and paid sick leave. Specific details and role-specific information will be provided to you during the interview process.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_5fc4a599-f52","directApply":true,"hiringOrganization":{"@type":"Organization","name":"xAI","sameAs":"https://www.xai.com/","logo":"https://logos.yubhub.co/xai.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/xai/jobs/5090171007","x-work-arrangement":"remote","x-experience-level":"entry","x-job-type":"Full-time, Part-time, Contractor","x-salary-range":"$35/hour - $45/hour","x-skills-required":["Multilingual audio capabilities","Proprietary software","Audio data curation","Annotation tools","Speech recognition","Auditory experiences","Linguistic and prosodic details","Professional audio standards","Accent variation","Noise in real-world recordings","Multilingual audio processing","Independent judgment","Ambiguous audio scenarios","Defensible annotation decisions"],"x-skills-preferred":["Exceptional attention to linguistic nuance","Deep understanding of good/useful Audio data","Advanced transcription and annotation practices","Handling disfluencies, accents, and prosodic features","Background in linguistics, speech sciences, or cognitive science","Experience working with speech/audio datasets","Knowledge/experience with training voice models","Understanding of how data quality impacts model performance","Professional experience in voice work","Voice acting, voice recording, or podcasting"],"datePosted":"2026-04-18T15:17:39.431Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Multilingual audio capabilities, Proprietary software, Audio data curation, Annotation tools, Speech recognition, Auditory experiences, Linguistic and prosodic details, Professional audio standards, Accent variation, Noise in real-world recordings, Multilingual audio processing, Independent judgment, Ambiguous audio scenarios, Defensible annotation decisions, Exceptional attention to linguistic nuance, Deep understanding of good/useful Audio data, Advanced transcription and annotation practices, Handling disfluencies, accents, and prosodic features, Background in linguistics, speech sciences, or cognitive science, Experience working with speech/audio datasets, Knowledge/experience with training voice models, Understanding of how data quality impacts model performance, Professional experience in voice work, Voice acting, voice recording, or podcasting"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_bfb338dc-601"},"title":"Research Scientist","description":"<p>We are seeking a Research Scientist to join our Artist-First AI Music lab. Our team pioneers and advances state-of-the-art generative technologies for music that create breakthrough experiences for fans and artists.</p>\n<p>Our products will put artists and songwriters first, through four key principles:</p>\n<ul>\n<li>Partnerships with record labels, distributors, and music publishers: We’ll develop new products for artists and fans through upfront agreements, not by asking for forgiveness later.</li>\n<li>Choice in participation: We recognize there’s a wide range of views on use of generative music tools within the artistic community. Therefore, artists and rightsholders will choose if and how to participate to ensure the use of AI tools aligns with the values of the people behind the music.</li>\n<li>Fair compensation and new revenue: We will build products that create wholly new revenue streams for rightsholders, artists, and songwriters, ensuring they are properly compensated for uses of their work and transparently credited for their contributions.</li>\n<li>Artist-fan connection: AI tools we develop will not replace human artistry. They will give artists new ways to be creative and connect with fans.</li>\n</ul>\n<p>As a Research Scientist, you will conduct groundbreaking research in music generation, run large-scale experiments, create practical applications, and collaborate with a cross-functional team to craft innovative solutions to complex challenges.</p>\n<p>You will have a direct impact on Spotify’s products, tools, and services, working on projects that influence the entire organization. You will also engage with the broader research community by publishing your findings, delivering talks, and attending top conferences.</p>\n<p>We offer a flexible work arrangement, allowing you to work where you work best, as long as you are within the EMEA region and operate within the Central European and GMT time zone for collaboration.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_bfb338dc-601","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Spotify","sameAs":"https://www.spotify.com","logo":"https://logos.yubhub.co/spotify.com.png"},"x-apply-url":"https://jobs.lever.co/spotify/bd54129d-d6d6-4ffd-a25a-dd240374a0ce","x-work-arrangement":"remote","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":null,"x-skills-required":["generative modeling","machine learning","music information retrieval","speech processing","audio processing","signal processing","probabilistic modeling","computer vision"],"x-skills-preferred":["Python","PyTorch","NumPy"],"datePosted":"2026-03-31T18:16:49.236Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"EMEA region"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"generative modeling, machine learning, music information retrieval, speech processing, audio processing, signal processing, probabilistic modeling, computer vision, Python, PyTorch, NumPy"}]}