{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/skill/training-methodologies"},"x-facet":{"type":"skill","slug":"training-methodologies","display":"Training Methodologies","count":8},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_c9ab5cbc-dd6"},"title":"Research Engineer, Performance RL","description":"<p>We&#39;re hiring a Research Engineer to join our Code RL team within the RL organization. As a Research Engineer, you&#39;ll advance our models&#39; ability to safely write correct, fast code for accelerators.</p>\n<p>You&#39;ll need to know accelerator performance well to turn it into tasks and signals models can learn from. Specifically, you will:</p>\n<ul>\n<li>Invent, design and implement RL environments and evaluations.</li>\n<li>Conduct experiments and shape our research roadmap.</li>\n<li>Deliver your work into training runs.</li>\n<li>Collaborate with other researchers, engineers, and performance engineering specialists across and outside Anthropic.</li>\n</ul>\n<p>We&#39;re looking for someone with expertise in accelerators (CUDA, ROCm, Triton, Pallas), ML framework programming (JAX or PyTorch), and experience with balancing research exploration with engineering implementation.</p>\n<p>Strong candidates may also have experience with reinforcement learning, porting ML workloads between different types of accelerators, and familiarity with LLM training methodologies.</p>\n<p>The annual compensation range for this role is $350,000-$850,000 USD.</p>\n<p>Please note that we&#39;re an extremely collaborative group, and we value communication skills. The easiest way to understand our research directions is to read our recent research.</p>\n<p>We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lovely office space in which to collaborate with colleagues.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_c9ab5cbc-dd6","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5160330008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$350,000-$850,000 USD","x-skills-required":["accelerator performance","ML framework programming","reinforcement learning","RL environments and evaluations","experiments and research roadmap","training runs","collaboration with researchers and engineers"],"x-skills-preferred":["CUDA","ROCm","Triton","Pallas","JAX","PyTorch","LLM training methodologies"],"datePosted":"2026-04-18T15:54:02.762Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"accelerator performance, ML framework programming, reinforcement learning, RL environments and evaluations, experiments and research roadmap, training runs, collaboration with researchers and engineers, CUDA, ROCm, Triton, Pallas, JAX, PyTorch, LLM training methodologies","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":350000,"maxValue":850000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_c306b85d-1ff"},"title":"Sales Enablement & Training Specialist","description":"<p>As a Sales Enablement &amp; Training Specialist at Stripe, you will play a vital role in the delivery of our Sales Spin Up onboarding program and targeted skills workshops.</p>\n<p>You will engage with new hires, current team members, and sales leaders to enhance their understanding of Stripe&#39;s products and sales strategies. You&#39;ll collaborate closely with cross-functional teams to ensure that training content is not only informative but also engaging and relevant.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Lead onboarding and 101/201 live training sessions, ensuring an engaging and effective training experience for new hires</li>\n<li>Organize and facilitate sales skills workshops to uplevel seller performance in our SF hub</li>\n<li>Be prepared to travel to other areas of your region and support virtual delivery for the APAC team</li>\n<li>Build strong relationships with stakeholders to identify their unique enablement needs and sales skills gaps</li>\n<li>Collaborate with subject matter experts to develop engaging training materials, including presentations, role-plays, and interactive exercises that enhance efficiency and drive business outcomes</li>\n<li>Tailor content and information to various roles, segments, and sales motions, demonstrating empathy for sellers</li>\n<li>Assess training effectiveness through participant feedback and performance metrics, iterating on content and delivery methods as needed</li>\n<li>Provide ongoing coaching and support to sales team members to reinforce learning and drive continuous improvement</li>\n<li>Represent a macro-optimistic exothermic team and be the first face many see as an onboarding trainer</li>\n<li>Help drive a learning culture at Stripe by building strong relationships with stakeholders in your region</li>\n</ul>\n<p>Requirements:</p>\n<ul>\n<li>8+ years of experience in sales, sales enablement, or a related field, with a focus on onboarding or skills development</li>\n<li>A passion for teaching, a strong grasp of adult learning principles, and the ability to deliver engaging, hands-on programs while representing a positive and collaborative team</li>\n<li>Proven ability to design and deliver engaging training sessions, both virtual and in-person</li>\n<li>Experience coaching and delivering value selling or solution selling programs</li>\n<li>Excellent communication and presentation skills, with the ability to work cross-functionally with senior leaders and subject matter experts and to quickly convey complex concepts in simple terms</li>\n<li>Strong organizational skills and attention to detail, with the ability to manage multiple training initiatives simultaneously</li>\n<li>Experience in sales environments, ideally with an understanding of SaaS or financial technology solutions</li>\n<li>A proactive and adaptable mindset, capable of navigating ambiguity and changing priorities</li>\n</ul>\n<p>Preferred Qualifications:</p>\n<ul>\n<li>Experience in a fast-paced, dynamic sales environment with a focus on quota attainment</li>\n<li>Familiarity with learning management systems (LMS) and e-learning tools</li>\n<li>Strong facilitation skills with experience in coaching and mentoring</li>\n<li>Certification in training methodologies or instructional design (e.g., ATD, CPLP, etc.) is a plus</li>\n</ul>\n<p>At Stripe, we believe in nurturing talent and providing opportunities for professional growth. If you’re an enthusiastic and experienced training facilitator with a passion for empowering sales teams, we would love to hear from you!</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_c306b85d-1ff","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Stripe","sameAs":"https://stripe.com/","logo":"https://logos.yubhub.co/stripe.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/stripe/jobs/7359215","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":null,"x-skills-required":["sales enablement","training facilitation","adult learning principles","communication and presentation skills","organizational skills","attention to detail","experience in sales environments","SaaS or financial technology solutions"],"x-skills-preferred":["learning management systems (LMS)","e-learning tools","facilitation skills","coaching and mentoring","certification in training methodologies or instructional design"],"datePosted":"2026-04-18T15:53:30.991Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"SF"}},"employmentType":"FULL_TIME","occupationalCategory":"Sales","industry":"Technology","skills":"sales enablement, training facilitation, adult learning principles, communication and presentation skills, organizational skills, attention to detail, experience in sales environments, SaaS or financial technology solutions, learning management systems (LMS), e-learning tools, facilitation skills, coaching and mentoring, certification in training methodologies or instructional design"},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_8549c317-12f"},"title":"Senior Research Scientist, Reward Models","description":"<p>As a Senior Research Scientist on our Reward Models team, you&#39;ll lead research efforts to improve how we specify and learn human preferences at scale.</p>\n<p>Your work will directly shape how our models understand and optimize for what humans actually want , enabling Claude to be more useful, more reliable, and better aligned with human values.</p>\n<p>This role focuses on pushing the frontier of reward modeling for large language models. You&#39;ll develop novel architectures and training methodologies for RLHF, research new approaches to LLM-based evaluation and grading (including rubric-based methods), and investigate techniques to identify and mitigate reward hacking.</p>\n<p>You&#39;ll collaborate closely with teams across Anthropic, including Finetuning, Alignment Science, and our broader research organization, to ensure your work translates into concrete improvements in both model capabilities and safety.</p>\n<p>We&#39;re looking for someone who can drive ambitious research agendas while also shipping practical improvements to production systems. You&#39;ll have the opportunity to work on some of the most important open problems in AI alignment, with access to frontier models and significant computational resources.</p>\n<p>Your work will directly advance the science of how we train AI systems to be both highly capable and safe.</p>\n<p>Responsibilities:</p>\n<ul>\n<li>Lead research on novel reward model architectures and training approaches for RLHF</li>\n</ul>\n<ul>\n<li>Develop and evaluate LLM-based grading and evaluation methods, including rubric-driven approaches that improve consistency and interpretability</li>\n</ul>\n<ul>\n<li>Research techniques to detect, characterize, and mitigate reward hacking and specification gaming</li>\n</ul>\n<ul>\n<li>Design experiments to understand reward model generalization, robustness, and failure modes</li>\n</ul>\n<ul>\n<li>Collaborate with the Finetuning team to translate research insights into improvements for production training pipelines</li>\n</ul>\n<ul>\n<li>Contribute to research publications, blog posts, and internal documentation</li>\n</ul>\n<ul>\n<li>Mentor other researchers and help build institutional knowledge around reward modeling</li>\n</ul>\n<p>You may be a good fit if you:</p>\n<ul>\n<li>Have a track record of research contributions in reward modeling, RLHF, or closely related areas of machine learning</li>\n</ul>\n<ul>\n<li>Have experience training and evaluating reward models for large language models</li>\n</ul>\n<ul>\n<li>Are comfortable designing and running large-scale experiments with significant computational resources</li>\n</ul>\n<ul>\n<li>Can work effectively across research and engineering, iterating quickly while maintaining scientific rigor</li>\n</ul>\n<ul>\n<li>Enjoy collaborative research and can communicate complex ideas clearly to diverse audiences</li>\n</ul>\n<ul>\n<li>Care deeply about building AI systems that are both highly capable and safe</li>\n</ul>\n<p>Strong candidates may also:</p>\n<ul>\n<li>Have published research on reward modeling, preference learning, or RLHF</li>\n</ul>\n<ul>\n<li>Have experience with LLM-as-judge approaches, including calibration and reliability challenges</li>\n</ul>\n<ul>\n<li>Have worked on reward hacking, specification gaming, or related robustness problems</li>\n</ul>\n<ul>\n<li>Have experience with constitutional AI, debate, or other scalable oversight approaches</li>\n</ul>\n<ul>\n<li>Have contributed to production ML systems at scale</li>\n</ul>\n<ul>\n<li>Have familiarity with interpretability techniques as applied to understanding reward model behavior</li>\n</ul>\n<p>The annual compensation range for this role is $350,000-$500,000 USD.</p>\n<p>Logistics:</p>\n<ul>\n<li>Minimum education: Bachelor’s degree or an equivalent combination of education, training, and/or experience</li>\n</ul>\n<ul>\n<li>Required field of study: A field relevant to the role as demonstrated through coursework, training, or professional experience</li>\n</ul>\n<ul>\n<li>Minimum years of experience: Years of experience required will correlate with the internal job level requirements for the position</li>\n</ul>\n<p>Location-based hybrid policy: Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</p>\n<p>Visa sponsorship: We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</p>\n<p>We encourage you to apply even if you do not believe you meet every single qualification. Not all strong candidates will meet every single qualification as listed. Research shows that people who identify as being from underrepresented groups are more prone to experiencing imposter syndrome and doubting the strength of their candidacy, so we urge you not to exclude yourself prematurely and to submit an application if you&#39;re interested in this work.</p>\n<p>Your safety matters to us. To protect yourself from potential scams, remember that Anthropic recruiters only contact you from @anthropic.com email addresses. In some cases, we may partner with vetted recruiting agencies who will identify themselves as working on behalf of Anthropic. Be cautious of emails from other domains. Legitimate Anthropic recruiters will never ask for money, fees, or banking information before your first day. If you&#39;re ever unsure about a communication, don&#39;t click any links,visit anthropic.com/careers directly for confirmed position openings.</p>\n<p>How we&#39;re different:</p>\n<p>We believe that the highest-impact AI research will be big science. At Anthropic we work as a single cohesive team on just a few large-scale research efforts. And we value impact , advancing our long-term goals of steerable, trustworthy AI , rather than work on smaller and more specific puzzles. We view AI research as an empirical science, which has as much in common with physics and biology as with traditional efforts in computer science. We&#39;re an extremely collaborative group, and we host frequent research discussions to ensure that we are pursuing the highest-impact work at any given time. As such, we greatly value communication skills.</p>\n<p>The easiest way to understand our research directions is to read our recent research. This research continues many of the directions our team worked on prior to Anthropic, including: GPT-3, Circuit-Based Interpretability, Multimodal Neurons, Scaling Laws, AI &amp; Compute, Concrete Problems in AI Safety, and Learning from Human Preferences.</p>\n<p>Come work with us!</p>\n<p>Anthropic is a public benefit corporation headquartered in San Francisco. We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lovely office space in which to collaborate with colleagues.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_8549c317-12f","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5024835008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$350,000-$500,000 USD","x-skills-required":["reward modeling","RLHF","large language models","novel architectures","training methodologies","evaluation and grading","rubric-based methods","reward hacking","specification gaming","generalization","robustness","failure modes","computational resources","scientific rigor","communication skills","interpretability techniques"],"x-skills-preferred":[],"datePosted":"2026-04-18T15:47:13.514Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Remote-Friendly (Travel Required) | San Francisco, CA"}},"jobLocationType":"TELECOMMUTE","employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"reward modeling, RLHF, large language models, novel architectures, training methodologies, evaluation and grading, rubric-based methods, reward hacking, specification gaming, generalization, robustness, failure modes, computational resources, scientific rigor, communication skills, interpretability techniques","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":350000,"maxValue":500000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_faffcca4-e94"},"title":"Research Engineer, Cybersecurity Reinforcement Learning","description":"<p>About the role</p>\n<p>We&#39;re hiring for the Cybersecurity RL team within Horizons. As a Research Engineer, you&#39;ll help to safely advance the capabilities of our models in secure coding, vulnerability remediation, and other areas of defensive cybersecurity.</p>\n<p>This role blends research and engineering, requiring you to both develop novel approaches and realize them in code. Your work will include designing and implementing RL environments, conducting experiments and evaluations, delivering your work into production training runs, and collaborating with other researchers, engineers, and cybersecurity specialists across and outside Anthropic.</p>\n<p>The role requires domain expertise in cybersecurity paired with interest or experience in training safe AI models. For example, you might be a white hat hacker who&#39;s curious about how LLMs could augment or transform your work, a security engineer interested in how AI could help harden systems at scale, or a detection and response professional wondering how models could enhance defensive workflows.</p>\n<p>Responsibilities</p>\n<ul>\n<li>Design and implement RL environments for secure coding and vulnerability remediation</li>\n<li>Conduct experiments and evaluations to assess the effectiveness of our models</li>\n<li>Deliver your work into production training runs to advance the capabilities of our models</li>\n<li>Collaborate with other researchers, engineers, and cybersecurity specialists across and outside Anthropic</li>\n</ul>\n<p>Requirements</p>\n<ul>\n<li>Experience in cybersecurity research</li>\n<li>Experience with machine learning</li>\n<li>Strong software engineering skills</li>\n<li>Ability to balance research exploration with engineering implementation</li>\n<li>Passion for AI&#39;s potential and commitment to developing safe and beneficial systems</li>\n</ul>\n<p>Strong candidates may also have:</p>\n<ul>\n<li>Professional experience in security engineering, fuzzing, detection and response, or other applied defensive work</li>\n<li>Experience participating in or building CTF competitions and cyber ranges</li>\n<li>Academic research experience in cybersecurity</li>\n<li>Familiarity with RL techniques and environments</li>\n<li>Familiarity with LLM training methodologies</li>\n</ul>\n<p>Logistics</p>\n<ul>\n<li>Minimum education: Bachelor’s degree or an equivalent combination of education, training, and/or experience</li>\n<li>Required field of study: A field relevant to the role as demonstrated through coursework, training, or professional experience</li>\n<li>Minimum years of experience: Years of experience required will correlate with the internal job level requirements for the position</li>\n<li>Location-based hybrid policy: Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</li>\n<li>Visa sponsorship: We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</li>\n</ul>\n<p>We encourage you to apply even if you do not believe you meet every single qualification. Not all strong candidates will meet every single qualification as listed. Research shows that people who identify as being from underrepresented groups are more prone to experiencing imposter syndrome and doubting the strength of their candidacy, so we urge you not to exclude yourself prematurely and to submit an application if you&#39;re interested in this work.</p>\n<p>Your safety matters to us. To protect yourself from potential scams, remember that Anthropic recruiters only contact you from @anthropic.com email addresses. In some cases, we may partner with vetted recruiting agencies who will identify themselves as working on behalf of Anthropic. Be cautious of emails from other domains. Legitimate Anthropic recruiters will never ask for money, fees, or banking information before your first day. If you&#39;re ever unsure about a communication, don&#39;t click any links,visit anthropic.com/careers directly for confirmed position openings.</p>\n<p>How we&#39;re different</p>\n<p>We believe that the highest-impact AI research will be big science. At Anthropic we work as a single cohesive team on just a few large-scale research efforts. And we value impact , advancing our long-term goals of steerable, trustworthy AI , rather than work on smaller and more specific puzzles. We view AI research as an empirical science, which has as much in common with physics and biology as with traditional efforts in computer science. We&#39;re an extremely collaborative group, and we host frequent research discussions to ensure that we are pursuing the highest-impact work at any given time. As such, we greatly value communication skills.</p>\n<p>The easiest way to understand our research directions is to read our recent research. This research continues many of the directions our team worked on prior to Anthropic, including: GPT-3, Circuit-Based Interpretability, Multimodal Neurons, Scaling Laws, AI &amp; Compute, Concrete Problems in AI Safety, and Learning from Human Preferences.</p>\n<p>Come work with us!</p>\n<p>Anthropic is a public benefit corporation headquartered in San Francisco. We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lovely office space in which to collaborate with colleagues.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_faffcca4-e94","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5025624008","x-work-arrangement":"hybrid","x-experience-level":"mid","x-job-type":"full-time","x-salary-range":"$300,000-$405,000 USD","x-skills-required":["cybersecurity research","machine learning","software engineering","research exploration","engineering implementation"],"x-skills-preferred":["security engineering","fuzzing","detection and response","RL techniques","LLM training methodologies"],"datePosted":"2026-04-18T15:43:50.288Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA | New York City, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"cybersecurity research, machine learning, software engineering, research exploration, engineering implementation, security engineering, fuzzing, detection and response, RL techniques, LLM training methodologies","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":300000,"maxValue":405000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_1507524b-770"},"title":"Research Engineer, Performance RL","description":"<p>We&#39;re hiring a Research Engineer to join our Code RL team within the RL organization. As a Research Engineer, you&#39;ll advance our models&#39; ability to safely write correct, fast code for accelerators.</p>\n<p>You&#39;ll need to know accelerator performance well to turn it into tasks and signals models can learn from. Specifically, you will:</p>\n<ul>\n<li>Invent, design and implement RL environments and evaluations.</li>\n<li>Conduct experiments and shape our research roadmap.</li>\n<li>Deliver your work into training runs.</li>\n<li>Collaborate with other researchers, engineers, and performance engineering specialists across and outside Anthropic.</li>\n</ul>\n<p>You may be a good fit if you:</p>\n<ul>\n<li>Have expertise with accelerators (CUDA, ROCm, Triton, Pallas), ML framework programming (JAX or PyTorch).</li>\n<li>Have worked across the stack – kernels, model code, distributed systems.</li>\n<li>Know how to balance research exploration with engineering implementation.</li>\n<li>Are passionate about AI&#39;s potential and committed to developing safe and beneficial systems.</li>\n</ul>\n<p>Strong candidates may also have:</p>\n<ul>\n<li>Experience with reinforcement learning.</li>\n<li>Experience porting ML workloads between different types of accelerators.</li>\n<li>Familiarity with LLM training methodologies.</li>\n</ul>\n<p>The annual compensation range for this role is $350,000-$850,000 USD.</p>\n<p>We&#39;re an extremely collaborative group, and we host frequent research discussions to ensure that we are pursuing the highest-impact work at any given time. As such, we greatly value communication skills.</p>\n<p>We believe that the highest-impact AI research will be big science. At Anthropic we work as a single cohesive team on just a few large-scale research efforts. And we value impact , advancing our long-term goals of steerable, trustworthy AI , rather than work on smaller and more specific puzzles. We view AI research as an empirical science, which has as much in common with physics and biology as with traditional efforts in computer science.</p>\n<p>We kitchen is a public benefit corporation headquartered in San Francisco. We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lovely office space in which to collaborate with colleagues.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_1507524b-770","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com/","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5160330008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$350,000-$850,000 USD","x-skills-required":["accelerators","ML framework programming","distributed systems","reinforcement learning","LLM training methodologies"],"x-skills-preferred":["CUDA","ROCm","Triton","Pallas","JAX","PyTorch"],"datePosted":"2026-04-18T15:42:09.925Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"accelerators, ML framework programming, distributed systems, reinforcement learning, LLM training methodologies, CUDA, ROCm, Triton, Pallas, JAX, PyTorch","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":350000,"maxValue":850000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_601a3593-052"},"title":"Research Engineer, Machine Learning (Reinforcement Learning)","description":"<p><strong>About Anthropic</strong></p>\n<p>Anthropic&#39;s mission is to create reliable, interpretable, and steerable AI systems. We want AI to be safe and beneficial for our users and for society as a whole. Our team is a quickly growing group of committed researchers, engineers, policy experts, and business leaders working together to build beneficial AI systems.</p>\n<p><strong>About the Role</strong></p>\n<p>As a Research Engineer within Reinforcement Learning, you will collaborate with a diverse group of researchers and engineers to advance the capabilities and safety of large language models. This role blends research and engineering responsibilities, requiring you to both implement novel approaches and contribute to the research direction. You&#39;ll work on fundamental research in reinforcement learning, creating &#39;agentic&#39; models via tool use for open-ended tasks such as computer use and autonomous software generation, improving reasoning abilities in areas such as mathematics, and developing prototypes for internal use, productivity, and evaluation.</p>\n<p><strong>Representative projects:</strong></p>\n<ul>\n<li>Architect and optimize core reinforcement learning infrastructure, from clean training abstractions to distributed experiment management across GPU clusters. Help scale our systems to handle increasingly complex research workflows.</li>\n<li>Design, implement, and test novel training environments, evaluations, and methodologies for reinforcement learning agents which push the state of the art for the next generation of models.</li>\n<li>Drive performance improvements across our stack through profiling, optimization, and benchmarking. Implement efficient caching solutions and debug distributed systems to accelerate both training and evaluation workflows.</li>\n<li>Collaborate across research and engineering teams to develop automated testing frameworks, design clean APIs, and build scalable infrastructure that accelerates AI research.</li>\n</ul>\n<p><strong>You may be a good fit if you:</strong></p>\n<ul>\n<li>Are proficient in Python and async/concurrent programming with frameworks like Trio</li>\n<li>Have experience with machine learning frameworks (PyTorch, TensorFlow, JAX)</li>\n<li>Have industry experience in machine learning research</li>\n<li>Can balance research exploration with engineering implementation</li>\n<li>Enjoy pair programming (we love to pair!)</li>\n<li>Care about code quality, testing, and performance</li>\n<li>Have strong systems design and communication skills</li>\n<li>Are passionate about the potential impact of AI and are committed to developing safe and beneficial systems</li>\n</ul>\n<p><strong>Strong candidates may have:</strong></p>\n<ul>\n<li>Familiarity with LLM architectures and training methodologies</li>\n<li>Experience with reinforcement learning techniques and environments</li>\n<li>Experience with virtualization and sandboxed code execution environments</li>\n<li>Experience with Kubernetes</li>\n<li>Experience with distributed systems or high-performance computing</li>\n<li>Experience with Rust and/or C++</li>\n</ul>\n<p><strong>Strong candidates need not have:</strong></p>\n<ul>\n<li>Formal certifications or education credentials</li>\n<li>Academic research experience or publication history</li>\n</ul>\n<p><strong>Logistics</strong></p>\n<p><strong>Education requirements:</strong> We require at least a Bachelor&#39;s degree in a related field or equivalent experience. <strong>Location-based hybrid policy:</strong> Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</p>\n<p><strong>Visa sponsorship:</strong> We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</p>\n<p><strong>We encourage you to apply even if you do not believe you meet every single qualification.</strong> Not all strong candidates will meet every single qualification as listed. Research shows that people who identify as being from underrepresented groups are more prone to experiencing imposter syndrome and doubting the strength of their candidacy, so we urge you not to exclude yourself prematurely and to submit an application if you&#39;re interested in this work. We think AI systems like the ones we&#39;re building have enormous social and ethical implications. We think this makes representation even more important, and we strive to include a range of diverse perspectives on our team.</p>\n<p><strong>Your safety matters to us.</strong> To protect yourself from potential</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_601a3593-052","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://www.anthropic.com","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/4613568008","x-work-arrangement":"hybrid","x-experience-level":"mid","x-job-type":"full-time","x-salary-range":"$500,000 - $850,000USD","x-skills-required":["Python","async/concurrent programming","Trio","PyTorch","TensorFlow","JAX","machine learning frameworks","reinforcement learning techniques","environments","virtualization","sandboxed code execution environments","Kubernetes","distributed systems","high-performance computing","Rust","C++"],"x-skills-preferred":["LLM architectures","training methodologies","reinforcement learning","distributed systems","high-performance computing"],"datePosted":"2026-03-08T13:49:41.142Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA | New York City, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Python, async/concurrent programming, Trio, PyTorch, TensorFlow, JAX, machine learning frameworks, reinforcement learning techniques, environments, virtualization, sandboxed code execution environments, Kubernetes, distributed systems, high-performance computing, Rust, C++, LLM architectures, training methodologies, reinforcement learning, distributed systems, high-performance computing","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":500000,"maxValue":850000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_b0188062-45f"},"title":"Research Engineer, Cybersecurity Reinforcement Learning","description":"<p><strong>About the role</strong></p>\n<p>We&#39;re hiring for the Cybersecurity RL team within Horizons. As a Research Engineer, you&#39;ll help to safely advance the capabilities of our models in secure coding, vulnerability remediation, and other areas of defensive cybersecurity.</p>\n<p>This role blends research and engineering, requiring you to both develop novel approaches and realise them in code. Your work will include designing and implementing RL environments, conducting experiments and evaluations, delivering your work into production training runs, and collaborating with other researchers, engineers, and cybersecurity specialists across and outside Anthropic.</p>\n<p><strong>You may be a good fit if you:</strong></p>\n<ul>\n<li>Have experience in cybersecurity research.</li>\n<li>Have experience with machine learning.</li>\n<li>Have strong software engineering skills.</li>\n<li>Can balance research exploration with engineering implementation.</li>\n<li>Are passionate about AI&#39;s potential and committed to developing safe and beneficial systems.</li>\n</ul>\n<p><strong>Strong candidates may also have:</strong></p>\n<ul>\n<li>Professional experience in security engineering, fuzzing, detection and response, or other applied defensive work.</li>\n<li>Experience participating in or building CTF competitions and cyber ranges.</li>\n<li>Academic research experience in cybersecurity.</li>\n<li>Familiarity with RL techniques and environments.</li>\n<li>Familiarity with LLM training methodologies.</li>\n</ul>\n<p><strong>Logistics</strong></p>\n<p><strong>Education requirements:</strong> We require at least a Bachelor&#39;s degree in a related field or equivalent experience. <strong>Location-based hybrid policy:</strong> Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</p>\n<p><strong>Visa sponsorship:</strong> We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</p>\n<p><strong>How we&#39;re different</strong></p>\n<p>We believe that the highest-impact AI research will be big science. At Anthropic we work as a single cohesive team on just a few large-scale research efforts. And we value impact — advancing our long-term goals of steerable, trustworthy AI — rather than work on smaller and more specific puzzles. We view AI research as an empirical science, which has as much in common with physics and biology as with traditional efforts in computer science. We&#39;re an extremely collaborative group, and we host frequent research discussions to ensure that we are pursuing the highest-impact work at any given time.</p>\n<p><strong>Come work with us!</strong></p>\n<p>Anthropic is a public benefit corporation headquartered in San Francisco. We offer competitive compensation and benefits, optional equity donation matching, generous vacation and parental leave, flexible working hours, and a lot more.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_b0188062-45f","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://job-boards.greenhouse.io","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5025624008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$300,000 - $405,000 USD","x-skills-required":["cybersecurity research","machine learning","software engineering","RL techniques and environments","LLM training methodologies"],"x-skills-preferred":["security engineering","fuzzing","detection and response","CTF competitions and cyber ranges","academic research in cybersecurity"],"datePosted":"2026-03-08T13:44:27.551Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"San Francisco, CA, New York City, NY"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"cybersecurity research, machine learning, software engineering, RL techniques and environments, LLM training methodologies, security engineering, fuzzing, detection and response, CTF competitions and cyber ranges, academic research in cybersecurity","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":300000,"maxValue":405000,"unitText":"YEAR"}}},{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_221e855f-2b9"},"title":"Research Engineer, Machine Learning (Reinforcement Learning)","description":"<p><strong>About the Role</strong></p>\n<p>As a Research Engineer within Reinforcement Learning, you will collaborate with a diverse group of researchers and engineers to advance the capabilities and safety of large language models. This role blends research and engineering responsibilities, requiring you to both implement novel approaches and contribute to the research direction. You&#39;ll work on fundamental research in reinforcement learning, creating &#39;agentic&#39; models via tool use for open-ended tasks such as computer use and autonomous software generation, improving reasoning abilities in areas such as mathematics, and developing prototypes for internal use, productivity, and evaluation.</p>\n<p><strong>Representative projects:</strong></p>\n<ul>\n<li>Architect and optimize core reinforcement learning infrastructure, from clean training abstractions to distributed experiment management across GPU clusters. Help scale our systems to handle increasingly complex research workflows.</li>\n</ul>\n<ul>\n<li>Design, implement, and test novel training environments, evaluations, and methodologies for reinforcement learning agents which push the state of the art for the next generation of models.</li>\n</ul>\n<ul>\n<li>Drive performance improvements across our stack through profiling, optimization, and benchmarking. Implement efficient caching solutions and debug distributed systems to accelerate both training and evaluation workflows.</li>\n</ul>\n<ul>\n<li>Collaborate across research and engineering teams to develop automated testing frameworks, design clean APIs, and build scalable infrastructure that accelerates AI research.</li>\n</ul>\n<p><strong>You may be a good fit if you:</strong></p>\n<ul>\n<li>Are proficient in Python and async/concurrent programming with frameworks like Trio</li>\n</ul>\n<ul>\n<li>Have experience with machine learning frameworks (PyTorch, TensorFlow, JAX)</li>\n</ul>\n<ul>\n<li>Have industry experience in machine learning research</li>\n</ul>\n<ul>\n<li>Can balance research exploration with engineering implementation</li>\n</ul>\n<ul>\n<li>Enjoy pair programming (we love to pair!)</li>\n</ul>\n<ul>\n<li>Care about code quality, testing, and performance</li>\n</ul>\n<ul>\n<li>Have strong systems design and communication skills</li>\n</ul>\n<ul>\n<li>Are passionate about the potential impact of AI and are committed to developing safe and beneficial systems</li>\n</ul>\n<p><strong>Strong candidates may have:</strong></p>\n<ul>\n<li>Familiarity with LLM architectures and training methodologies</li>\n</ul>\n<ul>\n<li>Experience with reinforcement learning techniques and environments</li>\n</ul>\n<ul>\n<li>Experience with virtualization and sandboxed code execution environments</li>\n</ul>\n<ul>\n<li>Experience with Kubernetes</li>\n</ul>\n<ul>\n<li>Experience with distributed systems or high-performance computing</li>\n</ul>\n<ul>\n<li>Experience with Rust and/or C++</li>\n</ul>\n<p><strong>Strong candidates need not have:</strong></p>\n<ul>\n<li>Formal certifications or education credentials</li>\n</ul>\n<ul>\n<li>Academic research experience or publication history</li>\n</ul>\n<p><strong>Deadline to apply:</strong> None. Applications will be reviewed on a rolling basis.</p>\n<p>The annual compensation range for this role is listed below.</p>\n<p>For sales roles, the range provided is the role’s On Target Earnings (&quot;OTE&quot;) range, meaning that the range includes both the sales commissions/sales bonuses target and annual base salary for the role.</p>\n<p>Annual Salary:</p>\n<p>£260,000 - £630,000GBP</p>\n<p><strong>Logistics</strong></p>\n<p><strong>Education requirements:</strong> We require at least a Bachelor&#39;s degree in a related field or equivalent experience.</p>\n<p><strong>Location-based hybrid policy:</strong> Currently, we expect all staff to be in one of our offices at least 25% of the time. However, some roles may require more time in our offices.</p>\n<p><strong>Visa sponsorship:</strong> We do sponsor visas! However, we aren&#39;t able to successfully sponsor visas for every role and every candidate. But if we make you an offer, we will make every reasonable effort to get you a visa, and we retain an immigration lawyer to help with this.</p>\n<p><strong>We encourage you to apply even if you do not believe you meet every single qualification.</strong> Not all strong candidates will meet every single qualification as listed. Research shows that people who identify as being from underrepresented groups are more prone to experiencing imposter syndrome and doubting the strength of their candidacy, so we urge you not to exclude yourself prematurely and to submit an application if you&#39;re interested in this work.</p>\n<p><strong>Your safety matters to us.</strong> To protect yourself from potential scams, remember that Anthropic is a legitimate company and we will never ask you to pay any fees or provide sensitive information via email or phone.</p>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_221e855f-2b9","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Anthropic","sameAs":"https://job-boards.greenhouse.io","logo":"https://logos.yubhub.co/anthropic.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/anthropic/jobs/5115935008","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"£260,000 - £630,000GBP","x-skills-required":["Python","async/concurrent programming","Trio","machine learning frameworks","PyTorch","TensorFlow","JAX","reinforcement learning techniques","environments","virtualization","sandboxed code execution environments","Kubernetes","distributed systems","high-performance computing","Rust","C++"],"x-skills-preferred":["LLM architectures","training methodologies","reinforcement learning techniques","environments","virtualization","sandboxed code execution environments","Kubernetes","distributed systems","high-performance computing","Rust","C++"],"datePosted":"2026-03-08T13:44:26.776Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"London, UK"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Python, async/concurrent programming, Trio, machine learning frameworks, PyTorch, TensorFlow, JAX, reinforcement learning techniques, environments, virtualization, sandboxed code execution environments, Kubernetes, distributed systems, high-performance computing, Rust, C++, LLM architectures, training methodologies, reinforcement learning techniques, environments, virtualization, sandboxed code execution environments, Kubernetes, distributed systems, high-performance computing, Rust, C++","baseSalary":{"@type":"MonetaryAmount","currency":"GBP","value":{"@type":"QuantitativeValue","minValue":260000,"maxValue":630000,"unitText":"YEAR"}}}]}