[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"sanity-pyMUById5SJRJDxwJeIiJ1Kf7Jve4jIbqiSRgEx6_d8":3,"sanity-HrmtkgqjXCVlb3wBci-_pKKahsFRcFLGkeye3ivyMok":495},{"data":4,"sourceMap":-1},{"latestPodcast":5,"latestReleases":14,"post":39,"recent":470},[6],{"_id":7,"publishedAt":8,"slug":9,"sponsored":12,"title":13},"4d0175f4-40a8-47eb-9bb3-a453b326aa7d","2026-07-03T07:40:00.000Z",{"_type":10,"current":11},"slug","the-good-the-bad-and-the-ai-apps",null,"The good, the bad, and the AI apps",[15,21,27,33],{"_id":16,"publishedAt":17,"slug":18,"title":20},"eb5b66eb-9410-4329-83bb-22bbff39402a","2026-04-28T13:00:00.000Z",{"_type":10,"current":19},"turn-scattered-knowledge-into-trusted-intelligence","Turning scattered knowledge into trusted intelligence: Stack Internal 2026.3",{"_id":22,"publishedAt":23,"slug":24,"title":26},"369c2401-b62e-4a37-8ff8-bf603023ecad","2026-03-02T15:03:00.988Z",{"_type":10,"current":25},"what-s-new-at-stack-overflow-march-2026","What’s new at Stack Overflow: March 2026",{"_id":28,"publishedAt":29,"slug":30,"title":32},"5e9053a4-07ea-447c-91ea-29e0b6228537","2026-02-02T15:00:00.000Z",{"_type":10,"current":31},"what-s-new-at-stack-overflow-february-2026","What’s new at Stack Overflow: February 2026",{"_id":34,"publishedAt":35,"slug":36,"title":38},"a1b538eb-a8a6-46d0-80a1-ac70ec9bb935","2026-01-05T10:00:00.000-05:00",{"_type":10,"current":37},"what-s-new-at-stack-overflow-january-2026","What’s new at Stack Overflow: January 2026",{"_createdAt":40,"_id":41,"_rev":42,"_type":43,"_updatedAt":44,"author":45,"body":71,"comments":418,"dateUrl":419,"excerpt":420,"image":421,"legacyBody":424,"product":12,"publishedAt":427,"slug":428,"sponsored":12,"tags":430,"title":469,"visible":418},"2023-05-24T12:51:02Z","wp-post-20628","07ZbrKPSUrjrV4wQ6fJAb4","blogPost","2023-07-13T14:56:27Z",[46,62],{"_createdAt":47,"_id":48,"_rev":49,"_type":50,"_updatedAt":51,"avatar":52,"bio":57,"employee":58,"name":59,"slug":60},"2023-05-23T16:27:18Z","wp-author-cap-20631","07ZbrKPSUrjrV4wQ6fDtDP","blogAuthor","2023-06-20T15:05:14Z",{"_type":53,"asset":54},"image",{"_ref":55,"_type":56},"image-2fa9fbd036a6e538f331c11680fa90475944c0dd-1068x1068-jpg","reference","Shrikant Desai is a Director of Machine Learning at Warner Bros. Discovery. He is heading recommendation, personalization and ML platform teams. His teams focus on powering personalized experiences and surface relevant content on streaming apps, using state-of-the-art ML techniques. \n\n\nShrikant has more than 15 years of experience, starting his career in the fintech industry, and the majority of his experience comes from e-commerce companies eBay and Zulily, where he built and led teams in cloud engineering and AI\u002F ML services including personalization and recommendation systems. ","none","Shrikant Desai",{"current":61},"shrikant-desai",{"_createdAt":47,"_id":63,"_rev":49,"_type":50,"_updatedAt":51,"avatar":64,"bio":67,"employee":58,"name":68,"slug":69},"wp-author-cap-20632",{"_type":53,"asset":65},{"_ref":66,"_type":56},"image-d4f2038b6043411580bdd6d74e3985bf32b8228e-150x150-png","Sowmya Subramanian serves as Executive Vice President of Engineering at Discovery Inc. In the role,\nSowmya is accelerating the technology transformation for Discovery, building the next generation of\nmedia streaming experiences and scaling the core platform to delight users globally with their rich and\ngrowing content. Sowmya is also establishing Discovery's San Francisco Bay Area presence, specializing\nin machine learning and content understanding, while continuing to expand their global footprint.\nSowmya is a technology executive with 25+ years of experience in the consumer, media, database, and\nsearch industries, known for driving growth through innovative product and technology experiences,\nscaling globally distributed organizations and fostering a fast-paced and inclusive culture. She has won\nseveral leadership and technology awards, including for her efforts in empowering women in technology.\nPrior to Discovery, Sowmya spent 15 years at Google. While there, she led the charge in defining Google\nSearch's path to modernize and transform their ecosystem strategy, launching a suite of products ranging\nfrom Google Web Stories to Journalist Studio to activating Health authorities for timey COVID-19\nresponse. She also founded and grew YouTube Kids, YouTube Music, YouTube Live and was critical to\nmaking paid subscriptions happen on YouTube. Prior to YouTube, Sowmya led engineering in Google\nMaps where she defined and executed on their UGC and local business efforts. She also co-led\nWomen@ ERG for Google North America for several years, served as a Google Tech Advisor for\ndeveloping Google executive leadership talent, pioneered Inclusive Design (to remove biases in products)\nacross Google and the tech industry, and forged university collaborations to increase women in\nengineering.\nSowmya serves on the Leukemia and Lymphoma Society Student of the Year board, the advisory board\nfor PBS Kids and iTVS Women &amp; Girls Lead Global, and recently joined the Jara Worldwide Board of\nDirectors. Prior to Google, Sowmya worked at Oracle and Microsoft. She earned a Bachelors in Computer\nScience at Mount Holyoke College, Massachusetts, on a full scholarship, and a Masters in Computer\nScience at University of Wisconsin, Madison on a fellowship.","Sowmya Subramanian",{"current":70},"sowmya-subramanian",[72,83,91,99,107,115,146,155,174,182,190,209,217,225,233,241,249,279,287,295,303,311,319,327,335,343,360,368,376,384,392,410],{"_key":73,"_type":74,"children":75,"markDefs":81,"style":82},"ade51d1a9ea7","block",[76],{"_key":77,"_type":78,"marks":79,"text":80},"ade51d1a9ea70","span",[],"You sit down in front of your television or flip to the streaming app on your smartphone. What do you choose to watch?",[],"normal",{"_key":84,"_type":74,"children":85,"markDefs":90,"style":82},"091dc0586afc",[86],{"_key":87,"_type":78,"marks":88,"text":89},"091dc0586afc0",[],"Determining what shows and movies ended up in front of a viewer used to be a very manual, human-led process. An individual would see what content was available, figure out what demographics watched when, and schedule shows and movies in time slots likely to have the right viewers.",[],{"_key":92,"_type":74,"children":93,"markDefs":98,"style":82},"0a4c70048230",[94],{"_key":95,"_type":78,"marks":96,"text":97},"0a4c700482300",[],"With a streaming service, however, there are no schedules. Everything is available anytime. Getting the right shows in front of the viewer when they’re ready to watch becomes the central problem.",[],{"_key":100,"_type":74,"children":101,"markDefs":106,"style":82},"39743591660e",[102],{"_key":103,"_type":78,"marks":104,"text":105},"39743591660e0",[],"What was once a purely human process has now evolved thanks to advancements in Machine learning technology. At Warner Bros. Discovery, we’ve been using machine learning to surface the movies and shows that will most resonate with our viewers. Our editorial teams have long picked what they thought were the best programs among our libraries, but one person’s favorite won’t always appeal to another person. So, like a lot of industries, we’ve turned to machine learning and user data to make our digital experiences better.",[],{"_key":108,"_type":74,"children":109,"markDefs":114,"style":82},"2b7db0dfc6bd",[110],{"_key":111,"_type":78,"marks":112,"text":113},"2b7db0dfc6bd0",[],"Our goal is always to make our viewers’ experiences easier and simpler so they find the content that they want to watch quickly. No one in the industry has fully cracked this problem, which is what makes it so exciting.",[],{"_key":116,"_type":74,"children":117,"markDefs":140,"style":82},"e6189268f75c",[118,122,127,131,136],{"_key":119,"_type":78,"marks":120,"text":121},"e6189268f75c0",[],"In this article, we’ll talk about what we’re doing with ML to ensure that your new favorite show is waiting for you when you start up ",{"_key":123,"_type":78,"marks":124,"text":126},"e6189268f75c1",[125],"d6755cb99948","Discovery+",{"_key":128,"_type":78,"marks":129,"text":130},"e6189268f75c2",[]," or ",{"_key":132,"_type":78,"marks":133,"text":135},"e6189268f75c3",[134],"2997ef24ebde","HBO Max",{"_key":137,"_type":78,"marks":138,"text":139},"e6189268f75c4",[],".",[141,144],{"_key":125,"_type":142,"href":143,"reference":12},"link","https:\u002F\u002Fwww.discoveryplus.com\u002F",{"_key":134,"_type":142,"href":145,"reference":12},"https:\u002F\u002Fwww.hbomax.com\u002F",{"_key":147,"_type":74,"children":148,"markDefs":153,"style":154},"36e9c6dd0608",[149],{"_key":150,"_type":78,"marks":151,"text":152},"36e9c6dd06080",[],"Moving from a human process to a machine process",[],"h2",{"_key":156,"_type":74,"children":157,"markDefs":171,"style":82},"cf7fc133c16f",[158,162,167],{"_key":159,"_type":78,"marks":160,"text":161},"cf7fc133c16f0",[],"At its most simple, recommendation is based on patterns. If you like science fiction, you’re likely to watch more science fiction movies. Based on our studies, we found that the average viewer sticks to five or six genres. They aren’t the same genres for every viewer, so coming up with a generic navigation sort—even an alphabetical one—can be difficult. You could just surface the most popular programs, but then you’d neglect your ",{"_key":163,"_type":78,"marks":164,"text":166},"cf7fc133c16f1",[165],"6c1b044989aa","long-tail",{"_key":168,"_type":78,"marks":169,"text":170},"cf7fc133c16f2",[]," content.",[172],{"_key":165,"_type":142,"href":173,"reference":12},"https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLong_tail",{"_key":175,"_type":74,"children":176,"markDefs":181,"style":82},"6dd2c8dadf30",[177],{"_key":178,"_type":78,"marks":179,"text":180},"6dd2c8dadf300",[],"The simplest automation we can do is ensure that a user’s favorite genres are easiest to access. We did this both on the browse page, when a user clicks into a list of TV shows and movies and sees the genres available, but also on the user’s home page. The construction of that home page needs to be personalized so that the user isn’t scrolling and scrolling to get to the genre of shows that they watch all the time.",[],{"_key":183,"_type":74,"children":184,"markDefs":189,"style":82},"d6bda881c0d3",[185],{"_key":186,"_type":78,"marks":187,"text":188},"d6bda881c0d30",[],"A human editor would go through those genres and pick the movies or shows they think are the best: the gems. But a single editor, no matter how great their taste, won’t be able to pick winners for everyone. We capture data on user’s histories, the interactions that they make on the site, and various other signals that tell us what they are interested in. We use deep learning algorithms that run these histories through sequence-based models to determine the probability of this viewer wanting to watch any given show. We then rank the content by how likely it is to appeal to the customer and send that ranking to them—that’s what their gems are, based on the data they are supplying us.",[],{"_key":191,"_type":74,"children":192,"markDefs":206,"style":82},"37ce0ce90387",[193,197,202],{"_key":194,"_type":78,"marks":195,"text":196},"37ce0ce903870",[],"Of course, we don’t just want to serve you the content that you already like. Human editors are very good at finding a wider group of connections between media. They’ll recommend something not because the metadata says there’s an action sequence here and a romantic sequence here, but because the editor is connecting dots that may not be easily translatable into labels. You enjoyed a film from this director; maybe you’ll like their work in a genre you don’t typically explore. Pandora tried this model for music by having ",{"_key":198,"_type":78,"marks":199,"text":201},"37ce0ce903871",[200],"42e6f280806d","human editors explicitly build in links",{"_key":203,"_type":78,"marks":204,"text":205},"37ce0ce903872",[]," between songs.",[207],{"_key":200,"_type":142,"href":208,"reference":12},"https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Finsights-teradata\u002F2019\u002F10\u002F01\u002Fhow-pandora-knows-what-you-want-to-hear-next\u002F?sh=3a7560293902",{"_key":210,"_type":74,"children":211,"markDefs":216,"style":82},"7431c4009ccb",[212],{"_key":213,"_type":78,"marks":214,"text":215},"7431c4009ccb0",[],"This web of connections that creates diverse content and delightful experiences is what we’re actively exploring now, except that we are trying to use our ML program to infer those connections. Whether that’s connecting watch patterns, looking at metadata, or extracting cues from the content itself, we want to create a richer pool of content than what would be available just from genre signals.",[],{"_key":218,"_type":74,"children":219,"markDefs":224,"style":82},"ceb36a6e78fb",[220],{"_key":221,"_type":78,"marks":222,"text":223},"ceb36a6e78fb0",[],"Warner Brothers Discovery is shifting overall from being very heavily editorial and human-driven to more ML-heavy. One of the places where we recently made in-roads into the editorial culture is in what we call the hero panel, the big panel at the top that shows a single preview for a featured show. Our editors have traditionally picked what goes there—no machine, just a constantly rotating set of picks. Right now, we’re turning this into a machine learning problem, trying to figure out how to personalize that space with a constantly rotating set of programs relevant to the person viewing it.",[],{"_key":226,"_type":74,"children":227,"markDefs":232,"style":154},"f670d571ce30",[228],{"_key":229,"_type":78,"marks":230,"text":231},"f670d571ce300",[],"The machines that recommend you movies",[],{"_key":234,"_type":74,"children":235,"markDefs":240,"style":82},"7e6fd43cdbb3",[236],{"_key":237,"_type":78,"marks":238,"text":239},"7e6fd43cdbb30",[],"There’s a lot of options and tooling to create ML solutions today. We’re mostly an AWS shop, and we started our ML journey using a lot of their services, including SageMaker for model training and deployment pipeline. We used AWS Personalize for our initial recommendation engines; it let us get started quickly and worked very well on most problems.",[],{"_key":242,"_type":74,"children":243,"markDefs":248,"style":82},"0fe6cdfad77e",[244],{"_key":245,"_type":78,"marks":246,"text":247},"0fe6cdfad77e0",[],"Now we’re building our own models in TensorFlow. If you want richer evaluation frameworks, faster turnaround times, and more control over the learning techniques and algorithms used, that’s the next step. Our custom models perform as well if not better with what the industry and AWS provided. And we’re looking to build ML pipelines that serve our specific use cases without relying on these generic frameworks.",[],{"_key":250,"_type":74,"children":251,"markDefs":274,"style":82},"cdcce1cbf249",[252,256,261,265,270],{"_key":253,"_type":78,"marks":254,"text":255},"cdcce1cbf2490",[],"We’re not looking to reinvent the wheel; there are a lot of open-source technologies and enterprise solutions that we’re considering adding to our stack. We’re looking at technologies like ",{"_key":257,"_type":78,"marks":258,"text":260},"cdcce1cbf2491",[259],"711a49f64e06","Feast",{"_key":262,"_type":78,"marks":263,"text":264},"cdcce1cbf2492",[]," for the ",{"_key":266,"_type":78,"marks":267,"text":269},"cdcce1cbf2493",[268],"e4b37b00ff68","feature store",{"_key":271,"_type":78,"marks":272,"text":273},"cdcce1cbf2494",[]," and inference engines like KServeand MLflow to manage our experiments and deployment pipeline. With our custom tooling and the excellent open-source technologies on the market, we can design ML solutions that handle our particular use cases.",[275,277],{"_key":259,"_type":142,"href":276,"reference":12},"https:\u002F\u002Ffeast.dev\u002F",{"_key":268,"_type":142,"href":278,"reference":12},"https:\u002F\u002Fwww.featurestore.org\u002F",{"_key":280,"_type":74,"children":281,"markDefs":286,"style":82},"eefd10d8d4de",[282],{"_key":283,"_type":78,"marks":284,"text":285},"eefd10d8d4de0",[],"In fact, ML tooling in general has come a very long way. The bar for getting started has been lowered so much over the last decade that you can build a very effective ML pipeline just using out-of-the-box tools. With hardware advances and the algorithms you can leverage, you can bootstrap a very effective solution that will make inferences in sub-milliseconds.",[],{"_key":288,"_type":74,"children":289,"markDefs":294,"style":82},"ba3ac0ae63d3",[290],{"_key":291,"_type":78,"marks":292,"text":293},"ba3ac0ae63d30",[],"If you want to develop a richer evaluation framework and go deeper into your training data sets, that’s when you can start diving into customization. We’ve been developing our own models and pipelines to give us more control over the learning techniques and enable faster turnaround times on our datasets. Then we can build on the solutions we’ve bootstrapped.",[],{"_key":296,"_type":74,"children":297,"markDefs":302,"style":82},"de3aa76ad9c3",[298],{"_key":299,"_type":78,"marks":300,"text":301},"de3aa76ad9c30",[],"Of course, the tooling, algorithms, and models aren’t the hardest parts about machine learning. It’s the data.",[],{"_key":304,"_type":74,"children":305,"markDefs":310,"style":154},"cffffc527151",[306],{"_key":307,"_type":78,"marks":308,"text":309},"cffffc5271510",[],"The real issue is the data",[],{"_key":312,"_type":74,"children":313,"markDefs":318,"style":82},"ca0ead5871a4",[314],{"_key":315,"_type":78,"marks":316,"text":317},"ca0ead5871a40",[],"ML code is a small part of a larger puzzle: the data. Combing through a massive pile of data and metadata to determine features and decide how to apply semantics is both difficult and essential. If you’ve ever gone through an ML tutorial, the data is provided to you. But in real applications, the data is never as high-quality as you’d like. You end up fighting around the data for your models and then training your models. But the data management part is where much of our time is spent.",[],{"_key":320,"_type":74,"children":321,"markDefs":326,"style":82},"882b916b243c",[322],{"_key":323,"_type":78,"marks":324,"text":325},"882b916b243c0",[],"Some of the open-source tools are so good that you could write two lines of code in Tensorflow and have yourself an ML application. But then you need to deploy it, and when you deploy in a real business scenario, you need to run through a series of checklists. The pipeline needs to operate in real time, scale quickly, be maintainable, and remain transparent enough for us to assess whether we’re following the right signals and encouraging users in a healthy direction.",[],{"_key":328,"_type":74,"children":329,"markDefs":334,"style":82},"70f5aa9832e9",[330],{"_key":331,"_type":78,"marks":332,"text":333},"70f5aa9832e90",[],"Take a simple signal: watch time. If a viewer watches more of a program, they probably like it, and we can use that to infer other programs that they might like. Pretty straightforward. But that data needs to flow back from the viewer to our systems. The content streams to the client, often buffering more than needed to prevent interruptions. For our recommendations to serve accurate content, this data needs to flow back in nearly real time. If the viewer hates a show and clicks back to the home page, that page needs to be ready to refresh with new recommendations.",[],{"_key":336,"_type":74,"children":337,"markDefs":342,"style":82},"d3bebc251d87",[338],{"_key":339,"_type":78,"marks":340,"text":341},"d3bebc251d870",[],"This ends up being petabytes of data on a daily basis, and this data needs to be aggregated and passed to our backend systems. That data coming from the client does not come in an easily consumable format, so massaging it into a format that could be aggregated and fed into our models was one of the most challenging tasks we faced.",[],{"_key":344,"_type":74,"children":345,"markDefs":359,"style":82},"b3ccabd5ab62",[346,350,355],{"_key":347,"_type":78,"marks":348,"text":349},"b3ccabd5ab620",[],"But percentage watched is a pretty basic metric, and it doesn’t tell us a whole lot about ",{"_key":351,"_type":78,"marks":352,"text":354},"b3ccabd5ab621",[353],"em","what ",{"_key":356,"_type":78,"marks":357,"text":358},"b3ccabd5ab622",[],"the viewer liked about the program. One of our big metrics is content return on investment: how much viewership a program is getting based on our investment in it. Part of what we want from the signals that viewers send back to us is the ability to better understand the content of the videos themselves without relying on a human curator. We’re only scratching the surface of extracting metadata and features from videos, and are actively trying to determine if there is more we can learn about our content from ML.",[],{"_key":361,"_type":74,"children":362,"markDefs":367,"style":82},"57eb1476ec24",[363],{"_key":364,"_type":78,"marks":365,"text":366},"57eb1476ec240",[],"Machine learning is always changing, as are our algorithms, so as we update models and iterate based on our data, we need a good way to evaluate whether the models and your changes are getting you the results that you want. We run a lot of experiments: side-by-side evaluations of models against various target metrics. As users interact with shows, genres, or sections of the app, we want to feed that information back into our models.",[],{"_key":369,"_type":74,"children":370,"markDefs":375,"style":82},"dfe424ae1324",[371],{"_key":372,"_type":78,"marks":373,"text":374},"dfe424ae13240",[],"The risk is always that we’re biasing too heavily on one metric or another. If our sole metric was watch time, then the algorithms would optimize for that, and those numbers would go up. But are the viewers picking content that is meaningful to them? Are we directing them to videos that they like, or are we just throwing a bunch of content at them until something sticks? Leaning too heavily on a single metric can cause you to neglect your overall macro health, which may have unintended second-order consequences for the rest of your content.",[],{"_key":377,"_type":74,"children":378,"markDefs":383,"style":154},"b2f8c2a6de96",[379],{"_key":380,"_type":78,"marks":381,"text":382},"b2f8c2a6de960",[],"Watching what you watch",[],{"_key":385,"_type":74,"children":386,"markDefs":391,"style":82},"fd151a3e403b",[387],{"_key":388,"_type":78,"marks":389,"text":390},"fd151a3e403b0",[],"Warner Bros Discovery has a content library that spans almost a hundred years, and we want to get our programs in front of people who will love them. Our ML program is trying to use the signals that viewers give us in order to give them their next favorite show.",[],{"_key":393,"_type":74,"children":394,"markDefs":407,"style":82},"29824b0f7859",[395,399,404],{"_key":396,"_type":78,"marks":397,"text":398},"29824b0f78590",[],"If you’re interested in being part of the next generation of ML-powered recommendation engines, ",{"_key":400,"_type":78,"marks":401,"text":403},"29824b0f78591",[402],"e1d7bf78efe8","we’re hiring",{"_key":405,"_type":78,"marks":406,"text":139},"29824b0f78592",[],[408],{"_key":402,"_type":142,"href":409,"reference":12},"https:\u002F\u002Fwbd.com\u002Fcareers\u002F",{"_key":411,"_type":74,"children":412,"markDefs":417,"style":82},"86cb51947da2",[413],{"_key":414,"_type":78,"marks":415,"text":416},"86cb51947da20",[353],"The Stack Overflow blog is committed to publishing interesting articles by developers, for developers. From time to time that means working with companies that are also clients of Stack Overflow’s through our advertising, talent, or teams business. When we publish work from clients, we’ll identify it as Partner Content with tags and by including this disclaimer at the bottom.",[],true,"2022\u002F09\u002F07","Curation at scale needs to process a lot of data with a good algorithm. ",{"_type":53,"asset":422},{"_ref":423,"_type":56},"image-f45fabba521c2cc9b945839862424927511d27bd-2560x1344-jpg",{"code":425,"language":426},"\u003C!-- wp:paragraph -->\n\u003Cp>You sit down in front of your television or flip to the streaming app on your smartphone. What do you choose to watch?&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Determining what shows and movies ended up in front of a viewer used to be a very manual, human-led process. An individual would see what content was available, figure out what demographics watched when, and schedule shows and movies in time slots likely to have the right viewers.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>With a streaming service, however, there are no schedules. Everything is available anytime. Getting the right shows in front of the viewer when they’re ready to watch becomes the central problem.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>What was once a purely human process has now evolved thanks to advancements in Machine learning technology. At Warner Bros. Discovery, we’ve been using machine learning to surface the movies and shows that will most resonate with our viewers. Our editorial teams have long picked what they thought were the best programs among our libraries, but one person’s favorite won’t always appeal to another person. So, like a lot of industries, we’ve turned to machine learning and user data to make our digital experiences better.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Our goal is always to make our viewers’ experiences easier and simpler so they find the content that they want to watch quickly. No one in the industry has fully cracked this problem, which is what makes it so exciting.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>In this article, we’ll talk about what we’re doing with ML to ensure that your new favorite show is waiting for you when you start up \u003Ca href=\"https:\u002F\u002Fwww.discoveryplus.com\u002F\">Discovery+\u003C\u002Fa> or \u003Ca href=\"https:\u002F\u002Fwww.hbomax.com\u002F\">HBO Max\u003C\u002Fa>.\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:heading -->\n\u003Ch2 id=\"h-moving-from-a-human-process-to-a-machine-process\">Moving from a human process to a machine process\u003C\u002Fh2>\n\u003C!-- \u002Fwp:heading -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>At its most simple, recommendation is based on patterns. If you like science fiction, you’re likely to watch more science fiction movies. Based on our studies, we found that the average viewer sticks to five or six genres. They aren’t the same genres for every viewer, so coming up with a generic navigation sort—even an alphabetical one—can be difficult. You could just surface the most popular programs, but then you’d neglect your \u003Ca href=\"https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLong_tail\">long-tail\u003C\u002Fa> content.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>The simplest automation we can do is ensure that a user’s favorite genres are easiest to access. We did this both on the browse page, when a user clicks into a list of TV shows and movies and sees the genres available, but also on the user’s home page. The construction of that home page needs to be personalized so that the user isn’t scrolling and scrolling to get to the genre of shows that they watch all the time.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>A human editor would go through those genres and pick the movies or shows they think are the best: the gems. But a single editor, no matter how great their taste, won’t be able to pick winners for everyone. We capture data on user’s histories, the interactions that they make on the site, and various other signals that tell us what they are interested in. We use deep learning algorithms that run these histories through sequence-based models to determine the probability of this viewer wanting to watch any given show. We then rank the content by how likely it is to appeal to the customer and send that ranking to them—that’s what their gems are, based on the data they are supplying us.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Of course, we don’t just want to serve you the content that you already like. Human editors are very good at finding a wider group of connections between media. They’ll recommend something not because the metadata says there’s an action sequence here and a romantic sequence here, but because the editor is connecting dots that may not be easily translatable into labels. You enjoyed a film from this director; maybe you’ll like their work in a genre you don’t typically explore. Pandora tried this model for music by having \u003Ca href=\"https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Finsights-teradata\u002F2019\u002F10\u002F01\u002Fhow-pandora-knows-what-you-want-to-hear-next\u002F?sh=3a7560293902\">human editors explicitly build in links\u003C\u002Fa> between songs.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>This web of connections that creates diverse content and delightful experiences is what we’re actively exploring now, except that we are trying to use our ML program to infer those connections. Whether that’s connecting watch patterns, looking at metadata, or extracting cues from the content itself, we want to create a richer pool of content than what would be available just from genre signals.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Warner Brothers Discovery is shifting overall from being very heavily editorial and human-driven to more ML-heavy. One of the places where we recently made in-roads into the editorial culture is in what we call the hero panel, the big panel at the top that shows a single preview for a featured show. Our editors have traditionally picked what goes there—no machine, just a constantly rotating set of picks. Right now, we’re turning this into a machine learning problem, trying to figure out how to personalize that space with a constantly rotating set of programs relevant to the person viewing it.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:heading -->\n\u003Ch2 id=\"h-the-machines-that-recommend-you-movies\">The machines that recommend you movies\u003C\u002Fh2>\n\u003C!-- \u002Fwp:heading -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>There’s a lot of options and tooling to create ML solutions today. We’re mostly an AWS shop, and we started our ML journey using a lot of their services, including SageMaker for model training and deployment pipeline. We used AWS Personalize for our initial recommendation engines; it let us get started quickly and worked very well on most problems.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Now we’re building our own models in TensorFlow. If you want richer evaluation frameworks, faster turnaround times, and more control over the learning techniques and algorithms used, that’s the next step. Our custom models perform as well if not better with what the industry and AWS provided. And we’re looking to build ML pipelines that serve our specific use cases without relying on these generic frameworks.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>We’re not looking to reinvent the wheel; there are a lot of open-source technologies and enterprise solutions that we’re considering adding to our stack. We’re looking at technologies like \u003Ca href=\"https:\u002F\u002Ffeast.dev\u002F\">Feast\u003C\u002Fa> for the \u003Ca href=\"https:\u002F\u002Fwww.featurestore.org\u002F\">feature store\u003C\u002Fa> and inference engines like KServeand MLflow to manage our experiments and deployment pipeline. With our custom tooling and the excellent open-source technologies on the market, we can design ML solutions that handle our particular use cases.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>In fact, ML tooling in general has come a very long way. The bar for getting started has been lowered so much over the last decade that you can build a very effective ML pipeline just using out-of-the-box tools. With hardware advances and the algorithms you can leverage, you can bootstrap a very effective solution that will make inferences in sub-milliseconds.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>If you want to develop a richer evaluation framework and go deeper into your training data sets, that’s when you can start diving into customization. We’ve been developing our own models and pipelines to give us more control over the learning techniques and enable faster turnaround times on our datasets. Then we can build on the solutions we’ve bootstrapped.\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Of course, the tooling, algorithms, and models aren’t the hardest parts about machine learning. It’s the data.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:heading -->\n\u003Ch2 id=\"h-the-real-issue-is-the-data\">The real issue is the data\u003C\u002Fh2>\n\u003C!-- \u002Fwp:heading -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>ML code is a small part of a larger puzzle: the data. Combing through a massive pile of data and metadata to determine features and decide how to apply semantics is both difficult and essential. If you’ve ever gone through an ML tutorial, the data is provided to you. But in real applications, the data is never as high-quality as you’d like. You end up fighting around the data for your models and then training your models. But the data management part is where much of our time is spent.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Some of the open-source tools are so good that you could write two lines of code in Tensorflow and have yourself an ML application. But then you need to deploy it, and when you deploy in a real business scenario, you need to run through a series of checklists. The pipeline needs to operate in real time, scale quickly, be maintainable, and remain transparent enough for us to assess whether we’re following the right signals and encouraging users in a healthy direction.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Take a simple signal: watch time. If a viewer watches more of a program, they probably like it, and we can use that to infer other programs that they might like. Pretty straightforward. But that data needs to flow back from the viewer to our systems. The content streams to the client, often buffering more than needed to prevent interruptions. For our recommendations to serve accurate content, this data needs to flow back in nearly real time. If the viewer hates a show and clicks back to the home page, that page needs to be ready to refresh with new recommendations.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>This ends up being petabytes of data on a daily basis, and this data needs to be aggregated and passed to our backend systems. That data coming from the client does not come in an easily consumable format, so massaging it into a format that could be aggregated and fed into our models was one of the most challenging tasks we faced.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>But percentage watched is a pretty basic metric, and it doesn’t tell us a whole lot about \u003Cem>what \u003C\u002Fem>the viewer liked about the program. One of our big metrics is content return on investment: how much viewership a program is getting based on our investment in it. Part of what we want from the signals that viewers send back to us is the ability to better understand the content of the videos themselves without relying on a human curator. We’re only scratching the surface of extracting metadata and features from videos, and are actively trying to determine if there is more we can learn about our content from ML.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Machine learning is always changing, as are our algorithms, so as we update models and iterate based on our data, we need a good way to evaluate whether the models and your changes are getting you the results that you want. We run a lot of experiments: side-by-side evaluations of models against various target metrics. As users interact with shows, genres, or sections of the app, we want to feed that information back into our models.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>The risk is always that we’re biasing too heavily on one metric or another. If our sole metric was watch time, then the algorithms would optimize for that, and those numbers would go up. But are the viewers picking content that is meaningful to them? Are we directing them to videos that they like, or are we just throwing a bunch of content at them until something sticks? Leaning too heavily on a single metric can cause you to neglect your overall macro health, which may have unintended second-order consequences for the rest of your content.&nbsp;&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:heading -->\n\u003Ch2 id=\"h-watching-what-you-watch\">Watching what you watch\u003C\u002Fh2>\n\u003C!-- \u002Fwp:heading -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>Warner Bros Discovery has a content library that spans almost a hundred years, and we want to get our programs in front of people who will love them. Our ML program is trying to use the signals that viewers give us in order to give them their next favorite show.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>If you’re interested in being part of the next generation of ML-powered recommendation engines, \u003Ca href=\"https:\u002F\u002Fwbd.com\u002Fcareers\u002F\">we’re hiring\u003C\u002Fa>.&nbsp;\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->\n\n\u003C!-- wp:separator -->\n\u003Chr class=\"wp-block-separator has-alpha-channel-opacity\"\u002F>\n\u003C!-- \u002Fwp:separator -->\n\n\u003C!-- wp:paragraph -->\n\u003Cp>\u003Cem>The Stack Overflow blog is committed to publishing interesting articles by developers, for developers. From time to time that means working with companies that are also clients of Stack Overflow’s through our advertising, talent, or teams business. When we publish work from clients, we’ll identify it as Partner Content with tags and by including this disclaimer at the bottom.\u003C\u002Fem>\u003C\u002Fp>\n\u003C!-- \u002Fwp:paragraph -->","html","2022-09-07T14:00:00.000Z",{"current":429},"how-machine-learning-algorithms-figure-out-what-you-should-watch-next",[431,440,446,451,456,460,465],{"_createdAt":432,"_id":433,"_rev":434,"_type":435,"_updatedAt":436,"slug":437,"title":439},"2023-05-23T16:43:21Z","wp-tagcat-ai","fpDTFQqIDjNJIbHDKPBGpV","blogTag","2025-01-30T16:19:01Z",{"current":438},"ai","AI",{"_createdAt":432,"_id":441,"_rev":442,"_type":435,"_updatedAt":432,"slug":443,"title":445},"wp-tagcat-code-for-a-living","9HpbCsT2tq0xwozQfkc4ih",{"current":444},"code-for-a-living","Code for a Living",{"_createdAt":432,"_id":447,"_rev":442,"_type":435,"_updatedAt":432,"slug":448,"title":450},"wp-tagcat-machine-learning",{"current":449},"machine-learning","machine learning",{"_createdAt":432,"_id":452,"_rev":442,"_type":435,"_updatedAt":432,"slug":453,"title":455},"wp-tagcat-partner-content",{"current":454},"partner-content","Partner Content",{"_createdAt":432,"_id":457,"_rev":442,"_type":435,"_updatedAt":432,"slug":458,"title":459},"wp-tagcat-partnercontent",{"current":459},"partnercontent",{"_createdAt":432,"_id":461,"_rev":442,"_type":435,"_updatedAt":432,"slug":462,"title":464},"wp-tagcat-recommendation-engines",{"current":463},"recommendation-engines","recommendation engines",{"_createdAt":432,"_id":466,"_rev":442,"_type":435,"_updatedAt":432,"slug":467,"title":468},"wp-tagcat-streaming",{"current":468},"streaming","How machine learning algorithms figure out what you should watch next",[471,477,483,489],{"_id":472,"publishedAt":473,"slug":474,"sponsored":12,"title":476},"28e560af-f0aa-4d46-bd90-f435ad604aa7","2026-06-26T14:00:27.102Z",{"_type":10,"current":475},"paging-charity-how-can-engineering-leaders-avoid-becoming-bond-villains","Paging Charity! How can engineering leaders avoid becoming Bond villains?",{"_id":478,"publishedAt":479,"slug":480,"sponsored":12,"title":482},"4b22c2a3-3779-4966-93eb-5230391dbdce","2026-06-23T14:08:58.595Z",{"_type":10,"current":481},"your-ai-shipped-a-backend-that-boots-that-is-the-whole-problem","Your AI shipped a backend that boots. That is the whole problem.",{"_id":484,"publishedAt":485,"slug":486,"sponsored":12,"title":488},"5cf362e1-fe7b-45af-b69c-914731c6a052","2026-06-23T14:00:00.000Z",{"_type":10,"current":487},"the-2026-developer-survey-is-now-open-for-human-developers-only","The 2026 Developer Survey is now open (for human developers only)!",{"_id":490,"publishedAt":491,"slug":492,"sponsored":12,"title":494},"30b995f7-7cb9-4dd8-bf71-d0685940a32b","2026-06-19T14:00:00.000Z",{"_type":10,"current":493},"dispatches-from-o-reilly-from-capabilities-to-responsibilities","Dispatches from O'Reilly: From capabilities to responsibilities",{"data":496,"sourceMap":-1},{"count":497,"lastTimestamp":498},5,"2023-05-25T09:47:55Z"]