{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/skill/rl-environments"},"x-facet":{"type":"skill","slug":"rl-environments","display":"Rl Environments","count":5},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_5920f836-9df"},"title":"Manager, Machine Learning Research Scientist, GenAI","description":"<p>Scale AI accelerates the development of AI systems by providing data, infrastructure, and tooling that power advanced models. As AI evolves from static models to dynamic, agentic systems, Scale builds foundational research, evaluation methodologies, and agent/RL infrastructure.</p>\n<p>As a Research Scientist Manager, you will lead a world-class team of research scientists and engineers, defining the research roadmap and driving execution from early prototyping to deployment. You&#39;ll thrive in a fast-moving environment, balancing deep technical leadership with people management, vision setting, and delivery.</p>\n<p>Key responsibilities include:</p>\n<ul>\n<li>Leading, mentoring, and growing a team of research scientists and engineers working on GenAI research initiatives</li>\n<li>Defining and driving a multi-year research roadmap, identifying key scientific questions, setting milestones, allocating resources, and ensuring rigorous execution</li>\n<li>Collaborating cross-functionally with engineering, product, client-facing teams, and external academic or industry partners to translate research into components, insights, and actionable outcomes</li>\n<li>Communicating compellingly, publishing research, presenting at conferences, engaging in open-source contributions, and representing the team externally</li>\n<li>Driving an inclusive, high-performing culture, helping your team through technical challenges, providing growth opportunities, and attracting top talent</li>\n</ul>\n<p>Ideal candidates will have:</p>\n<ul>\n<li>5+ years of hands-on research experience in machine learning, deep learning, generative models, agent/RL systems, or related domains</li>\n<li>A strong track record of research excellence, including publications in top-tier ML/AI venues</li>\n<li>Experience leading or managing research teams, mentoring, coaching, and developing talent</li>\n<li>Excellent written and verbal communication skills, articulating research ideas and outcomes to technical and non-technical stakeholders</li>\n</ul>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_5920f836-9df","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Scale AI","sameAs":"https://scale.com/","logo":"https://logos.yubhub.co/scale.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/scaleai/jobs/4631811005","x-work-arrangement":"onsite","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$273,000-$393,000 USD","x-skills-required":["machine learning","deep learning","generative models","agent/RL systems","research leadership","team management","communication","publication","open-source contribution"],"x-skills-preferred":["PhD in machine learning or related domain","experience with large language models","post-training evaluation","agentic/RL environments"],"datePosted":"2026-04-18T16:00:09.239Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA; Seattle, WA; New York, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"machine learning, deep learning, generative models, agent/RL systems, research leadership, team management, communication, publication, open-source contribution, PhD in machine learning or related domain, experience with large language models, post-training evaluation, agentic/RL environments","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":273000,"maxValue":393000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_fe04c8cc-782"},"title":"Forward Deployed Engineering Manager","description":"<p>Shape the Future of AI</p>\n<p>At Labelbox, we&#39;re building the critical infrastructure that powers breakthrough AI models at leading research labs and enterprises. Since 2018, we&#39;ve been pioneering data-centric approaches that are fundamental to AI development, and our work becomes even more essential as AI capabilities expand exponentially.</p>\n<p>We&#39;re the only company offering three integrated solutions for frontier AI development:</p>\n<p>Enterprise Platform &amp; Tools: Advanced annotation tools, workflow automation, and quality control systems that enable teams to produce high-quality training data at scale</p>\n<p>Frontier Data Labeling Service: Specialized data labeling through Alignerr, leveraging subject matter experts for next-generation AI models</p>\n<p>Expert Marketplace: Connecting AI teams with highly skilled annotators and domain experts for flexible scaling</p>\n<p>Why Join Us</p>\n<p>High-Impact Environment: We operate like an early-stage startup, focusing on impact over process. You&#39;ll take on expanded responsibilities quickly, with career growth directly tied to your contributions.</p>\n<p>Technical Excellence: Work at the cutting edge of AI development, collaborating with industry leaders and shaping the future of artificial intelligence.</p>\n<p>Innovation at Speed: We celebrate those who take ownership, move fast, and deliver impact. Our environment rewards high agency and rapid execution.</p>\n<p>Continuous Growth: Every role requires continuous learning and evolution. You&#39;ll be surrounded by curious minds solving complex problems at the frontier of AI.</p>\n<p>Clear Ownership: You&#39;ll know exactly what you&#39;re responsible for and have the autonomy to execute. We empower people to drive results through clear ownership and metrics.</p>\n<p>The role</p>\n<p>We’re hiring a Forward Deployed Engineering Manager to lead the design, development, and delivery of reinforcement learning environments for agentic AI systems.</p>\n<p>You’ll manage a team responsible for building sandboxed, reproducible environments,terminal-based workflows, browser automation, and computer-use simulations,that power both model training and human-in-the-loop evaluation. This is a hands-on leadership role where you’ll set technical direction, guide execution, and stay close to architecture and critical systems.</p>\n<p>What You’ll Do</p>\n<p>Lead, hire, and develop a high-performing team of Forward Deployed Engineers, setting a high bar for ownership, velocity, and technical quality</p>\n<p>Own the RL environment roadmap, aligning team execution with customer needs and evolving model capabilities</p>\n<p>Oversee development of sandboxed environments (terminal, browser, tool-augmented workspaces) that support deterministic execution and multi-step agent interaction</p>\n<p>Ensure reliability, observability, and data integrity through strong instrumentation (logging, trajectory capture, state snapshotting)</p>\n<p>Drive infrastructure excellence across containerization, sandboxing, CI/CD, automated testing, and monitoring</p>\n<p>Partner cross-functionally with data operations, product, and leading AI labs to define task design, evaluation protocols, and environment requirements</p>\n<p>Enable rapid prototyping and iteration, helping the team move from ambiguous requirements to production-ready systems quickly</p>\n<p>Stay close to the technical details,reviewing architecture, unblocking complex issues, and guiding design decisions</p>\n<p>What We’re Looking For</p>\n<p>5+ years of software engineering experience (Python)</p>\n<p>2+ years of experience managing or leading engineers in fast-paced environments</p>\n<p>Strong experience with containerization and sandboxing (Docker, Firecracker, or similar)</p>\n<p>Solid understanding of reinforcement learning fundamentals (MDPs, reward design, episode structure, observation/action spaces)</p>\n<p>Background in infrastructure, developer tooling, or distributed systems</p>\n<p>Strong debugging skills and systems thinking across layered, containerized environments</p>\n<p>Ability to operate in ambiguity and translate loosely defined problems into clear execution plans</p>\n<p>Excellent communication and stakeholder management skills</p>\n<p>Preferred</p>\n<p>Experience building or working with RL environments (Gym, PettingZoo) or agent benchmarks (SWE-bench, WebArena, OSWorld, TerminalBench)</p>\n<p>Familiarity with cloud infrastructure (GCP or AWS)</p>\n<p>Prior experience in AI/ML platforms, data companies, or research environments</p>\n<p>Contributions to open-source projects in RL, agents, or developer tooling</p>\n<p>Why This Role Matters</p>\n<p>RL environment quality is a critical bottleneck in advancing agentic AI. Poorly designed or unreliable environments introduce noise into training loops and directly impact model performance.</p>\n<p>In this role, you’ll lead the team building the environments that define how models learn,working across a range of cutting-edge projects with leading AI labs. Alignerr offers the speed and ownership of a startup with the scale and resources of Labelbox, giving you the opportunity to have outsized impact on the future of AI.</p>\n<p>About Alignerr</p>\n<p>Alignerr is Labelbox’s human data organization, powering next-generation AI through high-quality training data, reinforcement learning environments, and evaluation systems. We partner directly with leading AI labs to build the data and infrastructure that push model capabilities forward.</p>\n<p>Life at Labelbox</p>\n<p>Location: Join our dedicated tech hubs in San Francisco or Wrocław, Poland</p>\n<p>Work Style: Hybrid model with 2 days per week in office, combining collaboration and flexibility</p>\n<p>Environment: Fast-paced and high-intensity, perfect for ambitious individuals who thrive on ownership and quick decision-making</p>\n<p>Growth: Career advancement opportunities directly tied to your impact</p>\n<p>Vision: Be part of building the foundation for humanity&#39;s most transformative technology</p>\n<p>Our Vision</p>\n<p>We believe data will remain crucial in achieving artificial general intelligence. As AI models become more sophisticated, the need for high-quality, specialized training data will only grow. Join us in developing new products and services that enable the next generation of AI breakthroughs.</p>\n<p>Labelbox is backed by leading investors including SoftBank, Andreessen Horowitz, B Capital, Gradient Ventures, Databricks Ventures, and Kleiner Perkins. Our customers include Fortune 500 enterprises and leading AI labs.</p>\n<p>Any emails from Labelbox team members will originate from a @labelbox.com email address. If you encounter anything that raises suspicions during your interactions, we encourage you to exercise caution and suspend or discontinue communications.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_fe04c8cc-782","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Labelbox","sameAs":"https://www.labelbox.com/","logo":"https://logos.yubhub.co/labelbox.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/labelbox/jobs/5101195007","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$180,000-$220,000 USD","x-skills-required":["Software engineering experience (Python)","Containerization and sandboxing (Docker, Firecracker, or similar)","Reinforcement learning fundamentals (MDPs, reward design, episode structure, observation/action spaces)","Infrastructure, developer tooling, or distributed systems","Debugging skills and systems thinking"],"x-skills-preferred":["Experience building or working with RL environments (Gym, PettingZoo) or agent benchmarks (SWE-bench, WebArena, OSWorld, TerminalBench)","Familiarity with cloud infrastructure (GCP or AWS)","Prior experience in AI/ML platforms, data companies, or research environments","Contributions to open-source projects in RL, agents, or developer tooling"],"datePosted":"2026-04-18T15:56:05.491Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco Bay Area"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Software engineering experience (Python), Containerization and sandboxing (Docker, Firecracker, or similar), Reinforcement learning fundamentals (MDPs, reward design, episode structure, observation/action spaces), Infrastructure, developer tooling, or distributed systems, Debugging skills and systems thinking, Experience building or working with RL environments (Gym, PettingZoo) or agent benchmarks (SWE-bench, WebArena, OSWorld, TerminalBench), Familiarity with cloud infrastructure (GCP or AWS), Prior experience in AI/ML platforms, data companies, or research environments, Contributions to open-source projects in RL, agents, or developer tooling","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":180000,"maxValue":220000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_c9ab5cbc-dd6"},"title":"Research Engineer, Performance RL","description":"<p>We&#39;re hiring a Research Engineer to join our Code RL team within the RL organization. As a Research Engineer, you&#39;ll advance our models&#39; ability to safely write correct, fast code for accelerators.</p>\n<p>You&#39;ll need to know accelerator performance well to turn it into tasks and signals models can learn from. Specifically, you will:</p>\n<ul>\n<li>Invent, design and implement RL environments and evaluations.</li>\n<li>Conduct experiments and shape our research roadmap.</li>\n<li>Deliver your work into training runs.</li>\n<li>Collaborate with other researchers, engineers, and performance engineering specialists across and outside Anthropic.</li>\n</ul>\n<p>We&#39;re looking for someone with expertise in accelerators (CUDA, ROCm, Triton, Pallas), ML framework programming (JAX or PyTorch), and experience with balancing research exploration with engineering implementation.</p>\n<p>Strong candidates may also have experience with reinforcement learning, porting ML workloads between different types of accelerators, and familiarity with LLM training methodologies.</p>\n<p>The annual compensation range for this role is $350,000-$850,000 USD.</p>\n<p>Please note that we&#39;re an extremely collaborative group, and we value communication skills. The easiest way to understand our research directions is to read our recent research.</p>\n<p>We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lovely office space in which to collaborate with colleagues.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_c9ab5cbc-dd6","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5160330008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$350,000-$850,000 USD","x-skills-required":["accelerator performance","ML framework programming","reinforcement learning","RL environments and evaluations","experiments and research roadmap","training runs","collaboration with researchers and engineers"],"x-skills-preferred":["CUDA","ROCm","Triton","Pallas","JAX","PyTorch","LLM training methodologies"],"datePosted":"2026-04-18T15:54:02.762Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"accelerator performance, ML framework programming, reinforcement learning, RL environments and evaluations, experiments and research roadmap, training runs, collaboration with researchers and engineers, CUDA, ROCm, Triton, Pallas, JAX, PyTorch, LLM training methodologies","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":350000,"maxValue":850000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_26b9d76f-c85"},"title":"Research Engineer, Universes","description":"<p>We&#39;re looking for Research Engineers to help us build the next generation of training environments for capable and safe agentic AI.</p>\n<p>This role blends research and engineering responsibilities, requiring you to both implement novel approaches and contribute to research direction. You&#39;ll work on fundamental research in reinforcement learning, designing training environments and methodologies that push the state of the art, and building evaluations that measure genuine capability.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Build the next generation of agentic environments</li>\n<li>Build rigorous evaluations that measure real capability</li>\n<li>Collaborate across research and infrastructure teams to ship environments into production training</li>\n<li>Debug and iterate rapidly across research and production ML stacks</li>\n<li>Contribute to research culture through technical discussions and collaborative problem-solving</li>\n</ul>\n<p>You may be a good fit if you:</p>\n<ul>\n<li>Are highly impact-driven , you care about outcomes, not activity</li>\n<li>Operate with high agency</li>\n<li>Have good research taste or senior technical experience, demonstrating good judgment in identifying what actually matters in complex problem spaces</li>\n<li>Can balance research exploration with engineering implementation</li>\n<li>Are passionate about the potential impact of AI and are committed to developing safe and beneficial systems</li>\n<li>Are comfortable with uncertainty and adapt quickly as the landscape shifts</li>\n<li>Have strong software engineering skills and can build robust infrastructure</li>\n<li>Enjoy pair programming (we love to pair!)</li>\n</ul>\n<p>Strong candidates may also have one or more of the following:</p>\n<ul>\n<li>Have industry experience with large language model training, fine-tuning or evaluation</li>\n<li>Have industry experience building RL environments, simulation systems, or large-scale ML infrastructure</li>\n<li>Senior experience in a relevant technical field even if transitioning domains</li>\n<li>Deep expertise in sandboxing, containerization, VM infrastructure, or distributed systems</li>\n<li>Published influential work in relevant ML areas</li>\n</ul>\n<p>The annual compensation range for this role is $500,000-$850,000 USD.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_26b9d76f-c85","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5061517008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$500,000-$850,000 USD","x-skills-required":["Reinforcement learning","Training environments","ML stacks","Software engineering","Pair programming"],"x-skills-preferred":["Large language model training","RL environments","Simulation systems","Distributed systems"],"datePosted":"2026-04-18T15:46:02.776Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote-Friendly (Travel-Required) | San Francisco, CA | Seattle, WA | New York City, NY"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Reinforcement learning, Training environments, ML stacks, Software engineering, Pair programming, Large language model training, RL environments, Simulation systems, Distributed systems","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":500000,"maxValue":850000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_447c26bd-a83"},"title":"Research Engineer, Universes","description":"<p><strong>About the Role</strong></p>\n<p>We&#39;re looking for Research Engineers to help us build the next generation of training environments for capable and safe agentic AI. This role blends research and engineering responsibilities, requiring you to both implement novel approaches and contribute to research direction.</p>\n<p><strong>Responsibilities:</strong></p>\n<ul>\n<li>Build the next generation of agentic environments</li>\n<li>Build rigorous evaluations that measure real capability</li>\n<li>Collaborate across research and infrastructure teams to ship environments into production training</li>\n<li>Debug and iterate rapidly across research and production ML stacks</li>\n<li>Contribute to research culture through technical discussions and collaborative problem-solving</li>\n</ul>\n<p><strong>You may be a good fit if you:</strong></p>\n<ul>\n<li>Are highly impact-driven — you care about outcomes, not activity</li>\n<li>Operate with high agency</li>\n<li>Have good research taste or senior technical experience, demonstrating good judgment in identifying what actually matters in complex problem spaces</li>\n<li>Can balance research exploration with engineering implementation</li>\n<li>Are passionate about the potential impact of AI and are committed to developing safe and beneficial systems</li>\n<li>Are comfortable with uncertainty and adapt quickly as the landscape shifts</li>\n<li>Have strong software engineering skills and can build robust infrastructure</li>\n<li>Enjoy pair programming (we love to pair!)</li>\n</ul>\n<p><strong>Strong candidates may also have one or more of the following:</strong></p>\n<ul>\n<li>Have industry experience with large language model training, fine-tuning or evaluation</li>\n<li>Have industry experience building RL environments, simulation systems, or large-scale ML infrastructure</li>\n<li>Senior experience in a relevant technical field even if transitioning domains</li>\n<li>Deep expertise in sandboxing, containerization, VM infrastructure, or distributed systems</li>\n<li>Published influential work in relevant ML areas</li>\n</ul>\n<p><strong>Logistics</strong></p>\n<ul>\n<li>Education requirements: We require at least a Bachelor&#39;s degree in a related field or equivalent experience.</li>\n<li>Location-based hybrid policy: Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</li>\n<li>Visa sponsorship: We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</li>\n</ul>\n<p><strong>How we&#39;re different</strong></p>\n<p>We believe that the highest-impact AI research will be big science. At Anthropic we work as a single cohesive team on just a few large-scale research efforts. And we value impact — advancing our long-term goals of steerable, trustworthy AI — rather than work on smaller and more specific puzzles. We view AI research as an empirical science, which has as much in common with physics and biology as with traditional efforts in computer science. We&#39;re an extremely collaborative group, and we host frequent research discussions to ensure that we are pursuing the highest-impact work at any given time. As such, we greatly value communication skills.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_447c26bd-a83","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://job-boards.greenhouse.io","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5061517008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$500,000 - $850,000 USD","x-skills-required":["reinforcement learning","training environments","evaluation methodologies","software engineering","pair programming"],"x-skills-preferred":["large language model training","RL environments","simulation systems","distributed systems","influential work in ML areas"],"datePosted":"2026-03-08T13:49:07.277Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA, Seattle, WA, New York City, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"reinforcement learning, training environments, evaluation methodologies, software engineering, pair programming, large language model training, RL environments, simulation systems, distributed systems, influential work in ML areas","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":500000,"maxValue":850000,"unitText":"YEAR"}}}]}