<?xml version="1.0" encoding="UTF-8"?>
<source>
  <jobs>
    <job>
      <externalid>60a7e1e6-b51</externalid>
      <Title>Tech Lead/Manager, Machine Learning Research Scientist- LLM Evals</Title>
      <Description><![CDATA[<p>As the leading data and evaluation partner for frontier AI companies, we&#39;re dedicated to advancing the evaluation and benchmarking of large language models (LLMs). Our Research teams work with the industry&#39;s leading AI labs to provide high-quality data and accelerate progress in GenAI research.</p>
<p>We&#39;re seeking a Tech Lead Manager to lead a talented team of research scientists and research engineers focused on developing and implementing novel evaluation methodologies, metrics, and benchmarks to assess the capabilities and limitations of our cutting-edge LLMs.</p>
<p>Key responsibilities:</p>
<ul>
<li>Lead a team of highly effective research scientists and research engineers on LLM evals.</li>
<li>Conduct research on the effectiveness and limitations of existing LLM evaluation techniques.</li>
<li>Design and develop novel evaluation benchmarks for large language models, covering areas such as instruction following, factuality, robustness, and fairness.</li>
<li>Communicate, collaborate, and build relationships with clients and peer teams to facilitate cross-functional projects.</li>
<li>Collaborate with internal teams and external partners to refine metrics and create standardized evaluation protocols.</li>
<li>Implement scalable and reproducible evaluation pipelines using modern ML frameworks.</li>
<li>Publish research findings in top-tier AI conferences and contribute to open-source benchmarking initiatives.</li>
</ul>
<p>Ideal candidate has 5+ years of hands-on experience in large language model, NLP, and Transformer modeling, in the setting of both research and engineering development. Experience supporting and leading a team of research scientists and research engineers is also required.</p>
<p style="margin-top:24px;font-size:13px;color:#666;">XML job scraping automation by <a href="https://yubhub.co">YubHub</a></p>]]></Description>
      <Jobtype>full-time</Jobtype>
      <Experiencelevel>senior</Experiencelevel>
      <Workarrangement>onsite</Workarrangement>
      <Salaryrange>$264,800-$331,000 USD</Salaryrange>
      <Skills>large language model, NLP, Transformer modeling, research and engineering development, team leadership, cross-functional collaboration, evaluation methodologies, metrics and benchmarks, scalable and reproducible evaluation pipelines, modern ML frameworks, published research in top-tier AI conferences, open-source benchmarking initiatives, customer-facing role</Skills>
      <Category>Engineering</Category>
      <Industry>Technology</Industry>
      <Employername>Scale</Employername>
      <Employerlogo>https://logos.yubhub.co/scale.com.png</Employerlogo>
      <Employerdescription>Scale develops reliable AI systems for the world&apos;s most important decisions, providing high-quality data and full-stack technologies.</Employerdescription>
      <Employerwebsite>https://scale.com/</Employerwebsite>
      <Compensationcurrency></Compensationcurrency>
      <Compensationmin></Compensationmin>
      <Compensationmax></Compensationmax>
      <Applyto>https://job-boards.greenhouse.io/scaleai/jobs/4304790005</Applyto>
      <Location>San Francisco, CA; Seattle, WA; New York, NY</Location>
      <Country></Country>
      <Postedate>2026-04-18</Postedate>
    </job>
  </jobs>
</source>