{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/skill/research-and-engineering-development"},"x-facet":{"type":"skill","slug":"research-and-engineering-development","display":"Research And Engineering Development","count":1},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_60a7e1e6-b51"},"title":"Tech Lead/Manager, Machine Learning Research Scientist- LLM Evals","description":"<p>As the leading data and evaluation partner for frontier AI companies, we&#39;re dedicated to advancing the evaluation and benchmarking of large language models (LLMs). Our Research teams work with the industry&#39;s leading AI labs to provide high-quality data and accelerate progress in GenAI research.</p>\n<p>We&#39;re seeking a Tech Lead Manager to lead a talented team of research scientists and research engineers focused on developing and implementing novel evaluation methodologies, metrics, and benchmarks to assess the capabilities and limitations of our cutting-edge LLMs.</p>\n<p>Key responsibilities:</p>\n<ul>\n<li>Lead a team of highly effective research scientists and research engineers on LLM evals.</li>\n<li>Conduct research on the effectiveness and limitations of existing LLM evaluation techniques.</li>\n<li>Design and develop novel evaluation benchmarks for large language models, covering areas such as instruction following, factuality, robustness, and fairness.</li>\n<li>Communicate, collaborate, and build relationships with clients and peer teams to facilitate cross-functional projects.</li>\n<li>Collaborate with internal teams and external partners to refine metrics and create standardized evaluation protocols.</li>\n<li>Implement scalable and reproducible evaluation pipelines using modern ML frameworks.</li>\n<li>Publish research findings in top-tier AI conferences and contribute to open-source benchmarking initiatives.</li>\n</ul>\n<p>Ideal candidate has 5+ years of hands-on experience in large language model, NLP, and Transformer modeling, in the setting of both research and engineering development. Experience supporting and leading a team of research scientists and research engineers is also required.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_60a7e1e6-b51","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Scale","sameAs":"https://scale.com/","logo":"https://logos.yubhub.co/scale.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/scaleai/jobs/4304790005","x-work-arrangement":"onsite","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$264,800-$331,000 USD","x-skills-required":["large language model","NLP","Transformer modeling","research and engineering development","team leadership","cross-functional collaboration","evaluation methodologies","metrics and benchmarks","scalable and reproducible evaluation pipelines","modern ML frameworks"],"x-skills-preferred":["published research in top-tier AI conferences","open-source benchmarking initiatives","customer-facing role"],"datePosted":"2026-04-18T15:59:10.794Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA; Seattle, WA; New York, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"large language model, NLP, Transformer modeling, research and engineering development, team leadership, cross-functional collaboration, evaluation methodologies, metrics and benchmarks, scalable and reproducible evaluation pipelines, modern ML frameworks, published research in top-tier AI conferences, open-source benchmarking initiatives, customer-facing role","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":264800,"maxValue":331000,"unitText":"YEAR"}}}]}