<?xml version="1.0" encoding="UTF-8"?>
<source>
  <jobs>
    <job>
      <externalid>4054dca1-a4f</externalid>
      <Title>AI Inference Engineer</Title>
      <Description><![CDATA[<p>We are looking for an AI Inference engineer to join our growing team. Our current stack is Python, Rust, C++, PyTorch, Triton, CUDA, Kubernetes. You will have the opportunity to work on large-scale deployment of machine learning models for real-time inference.</p>
<p><strong>What you&#39;ll do</strong></p>
<p>Develop APIs for AI inference that will be used by both internal and external customers.</p>
<ul>
<li>Develop APIs for AI inference that will be used by both internal and external customers</li>
<li>Benchmark and address bottlenecks throughout our inference stack</li>
<li>Improve the reliability and observability of our systems and respond to system outages</li>
<li>Explore novel research and implement LLM inference optimizations</li>
</ul>
<p><strong>What you need</strong></p>
<ul>
<li>Experience with ML systems and deep learning frameworks (e.g. PyTorch, TensorFlow, ONNX)</li>
<li>Familiarity with common LLM architectures and inference optimization techniques (e.g. continuous batching, quantization, etc.)</li>
<li>Understanding of GPU architectures or experience with GPU kernel programming using CUDA</li>
</ul>
<p><strong>Why this matters</strong></p>
<p>As an AI Inference engineer, you will play a critical role in the development and deployment of our machine learning models. Your work will have a direct impact on the performance and reliability of our systems, and will help us to continue to innovate and improve our products.</p>
<p style="margin-top:24px;font-size:13px;color:#666;">XML job scraping automation by <a href="https://yubhub.co">YubHub</a></p>]]></Description>
      <Jobtype>full-time</Jobtype>
      <Experiencelevel>mid</Experiencelevel>
      <Workarrangement>onsite</Workarrangement>
      <Salaryrange>Final offer amounts are determined by multiple factors, including, experience and expertise.</Salaryrange>
      <Skills>ML systems, deep learning frameworks, GPU architectures, LLM architectures, inference optimization techniques</Skills>
      <Category>Engineering</Category>
      <Industry>Technology</Industry>
      <Employername>Perplexity</Employername>
      <Employerlogo>https://logos.yubhub.co/perplexity.com.png</Employerlogo>
      <Employerdescription>Perplexity is a company that is looking for an AI Inference engineer to join their growing team. They are a technology company that is working on large-scale deployment of machine learning models for real-time inference.</Employerdescription>
      <Employerwebsite>https://jobs.ashbyhq.com</Employerwebsite>
      <Compensationcurrency></Compensationcurrency>
      <Compensationmin></Compensationmin>
      <Compensationmax></Compensationmax>
      <Applyto>https://jobs.ashbyhq.com/perplexity/e4777627-ff8f-4257-8612-3a016bb58592</Applyto>
      <Location>London</Location>
      <Country></Country>
      <Postedate>2026-03-04</Postedate>
    </job>
  </jobs>
</source>