{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/skill/big-data-pipelines"},"x-facet":{"type":"skill","slug":"big-data-pipelines","display":"Big Data Pipelines","count":1},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_6aab7ed8-23a"},"title":"Senior Software Engineer - Data","description":"<p>We are seeking an experienced Senior Software Engineer (Data) to join our fast-paced, collaborative data team. In this role, you will have broad authority to drive the direction of our technographic data services, building world-class data pipelines and systems to process billions of signals and data points.</p>\n<p>This is an exciting opportunity to solve challenging problems and make a big impact as we invest in making technographics a first-class offering.</p>\n<p>Key Responsibilities:</p>\n<ul>\n<li>Build and optimize big data pipelines to extract and process signals from the web, job postings, and other sources</li>\n<li>Design and implement data architectures and storage solutions to efficiently handle massive data volumes</li>\n<li>Collaborate closely with data scientists to support and integrate ML models into data workflows</li>\n<li>Continuously improve data quality, performance, and scalability of our technographic data platform</li>\n<li>Drive technical strategy and roadmap for the data processing infrastructure</li>\n</ul>\n<p>Requirements:</p>\n<ul>\n<li>Extensive experience building and scaling big data pipelines and architectures from scratch</li>\n<li>Deep expertise in big data frameworks (Hadoop, Spark) and the JVM stack (Java, Scala)</li>\n<li>Strong software engineering fundamentals and ability to write efficient, high-quality code</li>\n<li>Experience with entity recognition and NLP techniques a plus</li>\n<li>Proven track record delivering results and driving projects in a fast-paced environment</li>\n<li>Excellent collaboration and communication skills to work with data scientists, analysts and product teams</li>\n<li>Passion for leveraging huge datasets to power valuable insights</li>\n</ul>\n<p>Ideal Background:</p>\n<ul>\n<li>8+ years of experience in software engineering roles</li>\n<li>Experience working with very large datasets and distributed systems</li>\n<li>Familiarity building data pipelines at large tech companies or data-driven organisations</li>\n<li>Bachelor&#39;s or advanced degree in Computer Science, Engineering or related technical field</li>\n</ul>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_6aab7ed8-23a","directApply":true,"hiringOrganization":{"@type":"Organization","name":"ZoomInfo","sameAs":"https://www.zoominfo.com/","logo":"https://logos.yubhub.co/zoominfo.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/zoominfo/jobs/8486808002","x-work-arrangement":"hybrid","x-experience-level":"senior","x-job-type":"full-time","x-salary-range":"$140,000-$220,000 USD","x-skills-required":["big data pipelines","data architectures","storage solutions","ML models","data quality","performance","scalability","data processing infrastructure","Hadoop","Spark","Java","Scala","entity recognition","NLP techniques"],"x-skills-preferred":[],"datePosted":"2026-04-18T15:49:24.766Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Bethesda, Maryland, United States; Waltham, Massachusetts, United States"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"big data pipelines, data architectures, storage solutions, ML models, data quality, performance, scalability, data processing infrastructure, Hadoop, Spark, Java, Scala, entity recognition, NLP techniques","baseSalary":{"@type":"MonetaryAmount","currency":"USD","value":{"@type":"QuantitativeValue","minValue":140000,"maxValue":220000,"unitText":"YEAR"}}}]}