{"version":"0.1","company":{"name":"YubHub","url":"https://yubhub.co","jobsUrl":"https://yubhub.co/jobs/title/staff-site-reliability-engineer-splunk-expert"},"x-facet":{"type":"title","slug":"staff-site-reliability-engineer-splunk-expert","display":"Staff Site Reliability Engineer- Splunk Expert","count":1},"x-feed-size-limit":100,"x-feed-sort":"enriched_at desc","x-feed-notice":"This feed contains at most 100 jobs (the most recently enriched). For the full corpus, use the paginated /stats/by-facet endpoint or /search.","x-generator":"yubhub-xml-generator","x-rights":"Free to redistribute with attribution: \"Data by YubHub (https://yubhub.co)\"","x-schema":"Each entry in `jobs` follows https://schema.org/JobPosting. YubHub-native raw fields carry `x-` prefix.","jobs":[{"@context":"https://schema.org","@type":"JobPosting","identifier":{"@type":"PropertyValue","name":"YubHub","value":"job_491db8e9-776"},"title":"Staff Site Reliability Engineer- Splunk Expert","description":"<p>We are seeking a highly technical Staff Site Reliability Engineer with deep expertise in Splunk and Grafana to own and evolve our observability ecosystem.</p>\n<p>As a Staff Site Reliability Engineer, you will move beyond simple monitoring to architect a comprehensive, scalable telemetry platform. You will be our subject-matter expert in Splunk optimisation, ensuring our logging architecture is performant, cost-effective, and deeply integrated with our automated workflows.</p>\n<p>Key responsibilities include:</p>\n<ul>\n<li>Splunk Architecture &amp; Optimisation: Lead the design and tuning of Splunk environments. Optimise indexer performance, search efficiency, and data models to ensure rapid troubleshooting and cost-efficiency.</li>\n</ul>\n<ul>\n<li>Advanced Visualisation: Architect and maintain sophisticated Grafana dashboards that correlate disparate data sources into a single pane of glass for real-time system health.</li>\n</ul>\n<ul>\n<li>Automated Infrastructure: Design, build, and maintain scalable observability infrastructure using tools like Terraform.</li>\n</ul>\n<ul>\n<li>Pipeline Engineering: Optimise the collection, processing, and storage of telemetry data (Metrics, Logs, Traces) to ensure high reliability and low latency.</li>\n</ul>\n<ul>\n<li>Workflow Automation: Develop custom Splunk workflows and integrations that trigger automated responses to system events, reducing Mean Time to Resolution (MTTR).</li>\n</ul>\n<ul>\n<li>Incident Response: Participate in on-call rotations and lead post-incident reviews to drive systemic improvements through &#39;observability-driven development.&#39;</li>\n</ul>\n<p>Required skills and experience include:</p>\n<ul>\n<li>Splunk Mastery: Deep, hands-on experience with Splunk administration, search optimisation (SPL), and architecting complex data pipelines.</li>\n</ul>\n<ul>\n<li>Grafana Expertise: Proven ability to build actionable, intuitive dashboards in Grafana that go beyond simple charts to provide deep operational insights.</li>\n</ul>\n<ul>\n<li>SRE Mindset: Minimum 8+ years of experience in an SRE, DevOps, or Systems Engineering role with a focus on high-availability systems.</li>\n</ul>\n<ul>\n<li>Programming Proficiency: Strong coding skills in Go, Python, or Ruby for building internal tools and automating observability workflows.</li>\n</ul>\n<ul>\n<li>Telemetry Standards: Hands-on experience with OpenTelemetry (OTel), Prometheus, or similar frameworks for instrumenting applications.</li>\n</ul>\n<ul>\n<li>Distributed Systems: Deep understanding of Linux internals, networking (TCP/IP, DNS, Load Balancing), and container orchestration (Kubernetes/EKS).</li>\n</ul>\n<p>Bonus skills include:</p>\n<ul>\n<li>Tracing: Implementation of distributed tracing (Jaeger, Tempo, or Honeycomb) to visualise request flow across microservices.</li>\n</ul>\n<ul>\n<li>Security Observability: Experience using Splunk for security orchestration (SOAR) or SIEM-related workflows.</li>\n</ul>\n<ul>\n<li>Cloud Platforms: Experience managing observability native tools within AWS, Azure, or GCP.</li>\n</ul>\n<p style=\"margin-top:24px;font-size:13px;color:#666;\">XML job scraping automation by <a href=\"https://yubhub.co\">YubHub</a></p>","url":"https://yubhub.co/jobs/job_491db8e9-776","directApply":true,"hiringOrganization":{"@type":"Organization","name":"Okta","sameAs":"https://www.okta.com/","logo":"https://logos.yubhub.co/okta.com.png"},"x-apply-url":"https://job-boards.greenhouse.io/okta/jobs/6874616?utm_source=yubhub.co&utm_medium=jobs_feed&utm_campaign=apply","x-work-arrangement":"hybrid","x-experience-level":"staff","x-job-type":"full-time","x-salary-range":null,"x-skills-required":["Splunk","Grafana","SRE","Go","Python","Ruby","OpenTelemetry","Prometheus","Linux","Networking","Container Orchestration"],"x-skills-preferred":["Tracing","Security Observability","Cloud Platforms"],"datePosted":"2026-04-18T15:54:34.221Z","jobLocation":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Bengaluru, India"}},"employmentType":"FULL_TIME","occupationalCategory":"Engineering","industry":"Technology","skills":"Splunk, Grafana, SRE, Go, Python, Ruby, OpenTelemetry, Prometheus, Linux, Networking, Container Orchestration, Tracing, Security Observability, Cloud Platforms"}]}