{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/title/senior-staff-machine-learning-engineer-genai-platform"},"x-facet":{"type":"title","slug":"senior-staff-machine-learning-engineer-genai-platform","display":"Senior Staff Machine Learning Engineer, GenAI Platform","count":1},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_711f5c89-ed8"},"title":"Senior Staff Machine Learning Engineer, GenAI Platform","description":"<p>As a Senior Staff Machine Learning Engineer, you will help define and lead the vision for Reddit&#39;s large-scale GenAI Platform, shaping the strategy, architecture, and operating model that enable teams across the company to build, deploy, and scale generative AI products with confidence.</p>\n<p>Contribute to the design, implementation, and maintenance of the LLM Gateway, focusing on features like unified API endpoints for internal/externally hosted LLM, rate/token limit management, and intelligent failover mechanisms to boost uptime and reliability.</p>\n<p>Lead and execute the vision, strategy, and roadmap for Reddit&#39;s large-scale GenAI Platform.</p>\n<p>Define the platform architecture and operating model that enable teams to build, deploy, and scale GenAI products reliably.</p>\n<p>Drive the strategy for a unified LAG Gateway supporting internally and externally hosted LLMs through consistent APIs and abstractions.</p>\n<p>Set the direction for core platform capabilities such as rate and token limit management, intelligent failover, and production resilience.</p>\n<p>Shape Reddit&#39;s approach to an enterprise-grade RAG system.</p>\n<p>Establish the strategic direction for agentic AI workflows and tool-use patterns across the platform.</p>\n<p>Own the end-to-end platform strategy from concept through production adoption and long-term evolution.</p>\n<p>Drive MLOps and LLMOps standards across CI/CD, testing, versioning, evaluation, and lifecycle management.</p>\n<p>Define best practices for observability, monitoring, governance, and operational excellence across GenAI systems.</p>\n<p>Partner across engineering, product, and leadership to align platform investments with company priorities and user needs.</p>\n<p>Champion platform thinking with a strong focus on scalability, reliability, performance, and developer experience.</p>\n<p>Influence technical direction across teams by turning emerging AI capabilities into a scalable platform strategy.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_711f5c89-ed8","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Reddit","sameAs":"https://www.redditinc.com","logo":"https://logos.yubhub.co/redditinc.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/reddit/jobs/7772274?utm_source=yubhub.co&utm_medium=jobs_feed&utm_campaign=apply","x-work-arrangement":"remote","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$292,500-$409,500 USD","x-skills-required":["Machine Learning","GenAI Platform","LLM Gateway","API Endpoints","Rate/Token Limit Management","Intelligent Failover","Kubernetes","Cloud-Based Technologies","AWS","Google Cloud Storage","Infrastructure-as-Code","Terraform","Go","Python","CI/CD","Testing","Versioning","Evaluation","Lifecycle Management","Observability","Monitoring","Governance","Operational Excellence"],"x-skills-preferred":[],"datePosted":"2026-04-18T15:46:48.652Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote - United States"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Machine Learning, GenAI Platform, LLM Gateway, API Endpoints, Rate/Token Limit Management, Intelligent Failover, Kubernetes, Cloud-Based Technologies, AWS, Google Cloud Storage, Infrastructure-as-Code, Terraform, Go, Python, CI/CD, Testing, Versioning, Evaluation, Lifecycle Management, Observability, Monitoring, Governance, Operational Excellence","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":292500,"maxValue":409500,"unitText":"YEAR"}}}]}