diff --git a/docs/docs/integrations/document_loaders/web_base.ipynb b/docs/docs/integrations/document_loaders/web_base.ipynb index 52589cf4b2a6f..01bd1d72a23a6 100644 --- a/docs/docs/integrations/document_loaders/web_base.ipynb +++ b/docs/docs/integrations/document_loaders/web_base.ipynb @@ -59,14 +59,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "00b6de21", "metadata": {}, "outputs": [], "source": [ "from langchain_community.document_loaders import WebBaseLoader\n", "\n", - "loader = WebBaseLoader(\"https://www.espn.com/\")" + "loader = WebBaseLoader(\"https://www.example.com/\")" ] }, { @@ -85,12 +85,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "e86c5d40", "metadata": {}, "outputs": [], "source": [ - "loader_multiple_pages = WebBaseLoader([\"https://www.espn.com/\", \"https://google.com\"])" + "loader_multiple_pages = WebBaseLoader(\n", + " [\"https://www.example.com/\", \"https://google.com\"]\n", + ")" ] }, { @@ -103,17 +105,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "f06bdc4e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Document(metadata={'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}, page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBAMLBOlympicsSoccerWNBA…BoxingCFLNCAACricketF1GolfHorseLLWSMMANASCARNBA G LeagueNBA Summer LeagueNCAAFNCAAMNCAAWNHLNWSLPLLProfessional WrestlingRacingRN BBRN FBRugbySports BettingTennisX GamesUFLMore ESPNFantasyWatchESPN BETESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSubscribe Now\\n\\n\\n\\n\\n\\nBoxing: Crawford vs. Madrimov (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nPFL Playoffs: Heavyweights & Women's Flyweights\\n\\n\\n\\n\\n\\n\\n\\nMLB\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball: Regionals\\n\\n\\n\\n\\n\\n\\n\\nIn The Arena: Serena Williams\\n\\n\\n\\n\\n\\n\\n\\nThe 30 College Football Playoff Contenders\\n\\n\\nQuick Links\\n\\n\\n\\n\\n2024 Paris Olympics\\n\\n\\n\\n\\n\\n\\n\\nOlympics: Everything to Know\\n\\n\\n\\n\\n\\n\\n\\nMLB Standings\\n\\n\\n\\n\\n\\n\\n\\nSign up: Fantasy Football\\n\\n\\n\\n\\n\\n\\n\\nWNBA Rookie Tracker\\n\\n\\n\\n\\n\\n\\n\\nNBA Free Agency Buzz\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball, Softball\\n\\n\\n\\n\\n\\n\\n\\nESPN Radio: Listen Live\\n\\n\\n\\n\\n\\n\\n\\nWatch Golf on ESPN\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNCreate AccountLog InFantasy\\n\\n\\n\\n\\nFootball\\n\\n\\n\\n\\n\\n\\n\\nBaseball\\n\\n\\n\\n\\n\\n\\n\\nBasketball\\n\\n\\n\\n\\n\\n\\n\\nHockey\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\n\\n\\n\\n\\n\\nTournament Challenge\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nX/Twitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nTikTok\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\nCollege football's most entertaining conference? Why the 16-team Big 12 is wiiiiiide open this seasonLong known as one of the sport's most unpredictable conferences, the new-look Big 12 promises another dose of chaos.11hBill ConnellyScott Winters/Icon SportswireUSC, Oregon and the quest to bulk up for the Big TenTo improve on D, the Trojans wanted to bulk up for a new league. They're not the only team trying to do that.10hAdam RittenbergThe 30 teams that can reach the CFPConnelly's best games of the 2024 seasonTOP HEADLINESTeam USA sets world record in 4x400 mixed relayGermany beats France, undefeated in men's hoopsU.S. men's soccer exits Games after Morocco routHungary to protest Khelif's Olympic participationKobe's Staples Center locker sells for record $2.9MDjokovic, Alcaraz to meet again, this time for goldKerr: Team USA lineups based on players' rolesMarchand wins 200m IM; McEvoy takes 50 freeScouting Shedeur Sanders' NFL futureLATEST FROM PARISBreakout stars, best moments and what comes next: Breaking down the Games halfway throughAt the midpoint of the Olympics, we look back at some of the best moments and forward at what's still to come in the second week.35mESPNMustafa Yalcin/Anadolu via Getty ImagesThe numbers behind USA's world record in 4x400 mixed relay4h0:46Medal trackerFull resultsFull coverage of the OlympicsPRESEASON HAS BEGUN!New kickoff rules on display to start Hall of Fame Game19h0:41McAfee on NFL's new kickoff: It looks like a practice drill6h1:11NFL's new kickoff rules debut to mixed reviewsTexans-Bears attracts more bets than MLBTOP RANK BOXINGSATURDAY ON ESPN+ PPVWhy Terence Crawford is playing the long game for a chance to face CaneloCrawford is approaching Saturday's marquee matchup against Israil Madrimov with his sights set on landing a bigger one next: against Canelo Alvarez.10hMike CoppingerMark Robinson/Matchroom BoxingBradley's take: Crawford's power vs. Israil Madrimov's disciplined styleTimothy Bradley Jr. breaks down the junior middleweight title fight.2dTimothy Bradley Jr.Buy Crawford vs. Madrimov on ESPN+ PPVChance to impress for Madrimov -- and UzbekistanHOW FRIDAY WENTMORE FROM THE PARIS OLYMPICSGrant Fisher makes U.S. track history, Marchand wins 4th gold and more Friday at the Paris GamesSha'Carri Richardson made her long-awaited Olympic debut during the women's 100m preliminary round on Friday. Here's everything else you might have missed from Paris.27mESPNGetty ImagesU.S. men's loss to Morocco is a wake-up call before World CupOutclassed by Morocco, the U.S. men's Olympic team can take plenty of lessons with the World Cup on the horizon.4hSam BordenFull coverage of the OlympicsOLYMPIC MEN'S HOOPS SCOREBOARDFRIDAY'S GAMESOLYMPIC STANDOUTSWhy Simone Biles is now Stephen A.'s No. 1 Olympian7h0:58Alcaraz on the cusp of history after securing spot in gold medal match8h0:59Simone Biles' gymnastics titles: Olympics, Worlds, more statsOLYMPIC MEN'S SOCCER SCOREBOARDFRIDAY'S GAMESTRADE DEADLINE FALLOUTOlney: Eight big questions for traded MLB playersCan Jazz Chisholm Jr. handle New York? Is Jack Flaherty healthy? Will Jorge Soler's defense play? Key questions for players in new places.10hBuster OlneyWinslow Townson/Getty ImagesRanking the top prospects who changed teams at the MLB trade deadlineYou know the major leaguers who moved by now -- but what about the potential stars of tomorrow?1dKiley McDanielMLB Power RankingsSeries odds: Dodgers still on top; Phillies, Yanks behind them Top HeadlinesTeam USA sets world record in 4x400 mixed relayGermany beats France, undefeated in men's hoopsU.S. men's soccer exits Games after Morocco routHungary to protest Khelif's Olympic participationKobe's Staples Center locker sells for record $2.9MDjokovic, Alcaraz to meet again, this time for goldKerr: Team USA lineups based on players' rolesMarchand wins 200m IM; McEvoy takes 50 freeScouting Shedeur Sanders' NFL futureFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNCreate AccountLog InICYMI0:47Nelson Palacio rips an incredible goal from outside the boxNelson Palacio scores an outside-of-the-box goal for Real Salt Lake in the 79th minute. \\n\\n\\nMedal Tracker\\n\\n\\n\\nCountries\\nAthletes\\n\\nOverall Medal Leaders43USA36FRA31CHNIndividual Medal LeadersGoldCHN 13FRA 11AUS 11SilverUSA 18FRA 12GBR 10BronzeUSA 16FRA 13CHN 9Overall Medal Leaders4MarchandMarchand3O'CallaghanO'Callaghan3McIntoshMcIntoshIndividual Medal LeadersGoldMarchand 4O'Callaghan 3McIntosh 2SilverSmith 3Huske 2Walsh 2BronzeYufei 3Bhaker 2Haughey 2\\n\\n\\nFull Medal Tracker\\n\\n\\nBest of ESPN+ESPNCollege Football Playoff 2024: 30 teams can reach postseasonHeather Dinich analyzes the teams with at least a 10% chance to make the CFP.AP Photo/Ross D. FranklinNFL Hall of Fame predictions: Who will make the next 10 classes?When will Richard Sherman and Marshawn Lynch make it? Who could join Aaron Donald in 2029? Let's map out each Hall of Fame class until 2034.Thearon W. Henderson/Getty ImagesMLB trade deadline 2024: Ranking prospects who changed teamsYou know the major leaguers who moved by now -- but what about the potential stars of tomorrow who went the other way in those deals? Trending NowIllustration by ESPNRanking the top 100 professional athletes since 2000Who tops our list of the top athletes since 2000? We're unveiling the top 25, including our voters' pick for the No. 1 spot.Photo by Kevin C. Cox/Getty Images2024 NFL offseason recap: Signings, coach moves, new rulesThink you missed something in the NFL offseason? We've got you covered with everything important that has happened since February.Stacy Revere/Getty ImagesTop 25 college football stadiums: Rose Bowl, Michigan and moreFourteen of ESPN's college football writers rank the 25 best stadiums in the sport. Who's No. 1, who missed the cut and what makes these stadiums so special?ESPNInside Nate Robinson's silent battle -- and his fight to liveFor nearly 20 years Nate Robinson has been fighting a silent battle -- one he didn't realize until recently could end his life. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivate A LeagueMock Draft NowSign up for FREE!Create A LeagueJoin a Public LeagueReactivate a LeaguePractice With a Mock DraftSign up for FREE!Create A LeagueJoin a Public LeagueReactivate a LeaguePractice with a Mock DraftGet a custom ESPN experienceEnjoy the benefits of a personalized accountSelect your favorite leagues, teams and players and get the latest scores, news and updates that matter most to you. \\n\\nESPN+\\n\\n\\n\\n\\nBoxing: Crawford vs. Madrimov (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nPFL Playoffs: Heavyweights & Women's Flyweights\\n\\n\\n\\n\\n\\n\\n\\nMLB\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball: Regionals\\n\\n\\n\\n\\n\\n\\n\\nIn The Arena: Serena Williams\\n\\n\\n\\n\\n\\n\\n\\nThe 30 College Football Playoff Contenders\\n\\n\\nQuick Links\\n\\n\\n\\n\\n2024 Paris Olympics\\n\\n\\n\\n\\n\\n\\n\\nOlympics: Everything to Know\\n\\n\\n\\n\\n\\n\\n\\nMLB Standings\\n\\n\\n\\n\\n\\n\\n\\nSign up: Fantasy Football\\n\\n\\n\\n\\n\\n\\n\\nWNBA Rookie Tracker\\n\\n\\n\\n\\n\\n\\n\\nNBA Free Agency Buzz\\n\\n\\n\\n\\n\\n\\n\\nLittle League Baseball, Softball\\n\\n\\n\\n\\n\\n\\n\\nESPN Radio: Listen Live\\n\\n\\n\\n\\n\\n\\n\\nWatch Golf on ESPN\\n\\n\\nFantasy\\n\\n\\n\\n\\nFootball\\n\\n\\n\\n\\n\\n\\n\\nBaseball\\n\\n\\n\\n\\n\\n\\n\\nBasketball\\n\\n\\n\\n\\n\\n\\n\\nHockey\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\n\\n\\n\\n\\n\\nTournament Challenge\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nX/Twitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nTikTok\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCorrectionsESPN BET is owned and operated by PENN Entertainment, Inc. and its subsidiaries ('PENN'). ESPN BET is available in states where PENN is licensed to offer sports wagering. Must be 21+ to wager. If you or someone you know has a gambling problem and wants help, call 1-800-GAMBLER.Copyright: © 2024 ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\")" + "Document(metadata={'source': 'https://www.example.com/', 'title': 'Example Domain', 'language': 'No language found.'}, page_content='\\n\\n\\nExample Domain\\n\\n\\n\\n\\n\\n\\n\\nExample Domain\\nThis domain is for use in illustrative examples in documents. You may use this\\n domain in literature without prior coordination or asking for permission.\\nMore information...\\n\\n\\n\\n')" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -126,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "a390d79f", "metadata": {}, "outputs": [ @@ -134,7 +136,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}\n" + "{'source': 'https://www.example.com/', 'title': 'Example Domain', 'language': 'No language found.'}\n" ] } ], @@ -156,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "9f9cf30f", "metadata": {}, "outputs": [ @@ -164,7 +166,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: nest_asyncio in /Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages (1.5.6)\n" + "Note: you may need to restart the kernel to use updated packages.\n" ] } ], @@ -179,24 +181,31 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "49586eac", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Fetching pages: 100%|###########################################################################| 2/2 [00:00<00:00, 8.28it/s]\n" + ] + }, { "data": { "text/plain": [ - "[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBANCAAMNCAAWNHLSoccer…MLBNCAAFGolfTennisSports BettingBoxingCFLNCAACricketF1HorseLLWSMMANASCARNBA G LeagueOlympic SportsRacingRN BBRN FBRugbyWNBAWorld Baseball ClassicWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nAre you ready for Opening Day? Here's your guide to MLB's offseason chaosWait, Jacob deGrom is on the Rangers now? Xander Bogaerts and Trea Turner signed where? And what about Carlos Correa? Yeah, you're going to need to read up before Opening Day.12hESPNIllustration by ESPNEverything you missed in the MLB offseason3h2:33World Series odds, win totals, props for every teamPlay fantasy baseball for free!TOP HEADLINESQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersLAMAR WANTS OUT OF BALTIMOREMarcus Spears identifies the two teams that need Lamar Jackson the most7h2:00Would Lamar sit out? Will Ravens draft a QB? Jackson trade request insightsLamar Jackson has asked Baltimore to trade him, but Ravens coach John Harbaugh hopes the QB will be back.3hJamison HensleyBallard, Colts will consider trading for QB JacksonJackson to Indy? Washington? Barnwell ranks the QB's trade fitsSNYDER'S TUMULTUOUS 24-YEAR RUNHow Washington’s NFL franchise sank on and off the field under owner Dan SnyderSnyder purchased one of the NFL's marquee franchises in 1999. Twenty-four years later, and with the team up for sale, he leaves a legacy of on-field futility and off-field scandal.13hJohn KeimESPNIOWA STAR STEPS UP AGAINJ-Will: Caitlin Clark is the biggest brand in college sports right now8h0:47'The better the opponent, the better she plays': Clark draws comparisons to TaurasiCaitlin Clark's performance on Sunday had longtime observers going back decades to find comparisons.16hKevin PeltonWOMEN'S ELITE EIGHT SCOREBOARDMONDAY'S GAMESCheck your bracket!NBA DRAFTHow top prospects fared on the road to the Final FourThe 2023 NCAA tournament is down to four teams, and ESPN's Jonathan Givony recaps the players who saw their NBA draft stock change.11hJonathan GivonyAndy Lyons/Getty ImagesTALKING BASKETBALLWhy AD needs to be more assertive with LeBron on the court9h1:33Why Perk won't blame Kyrie for Mavs' woes8h1:48WHERE EVERY TEAM STANDSNew NFL Power Rankings: Post-free-agency 1-32 poll, plus underrated offseason movesThe free agent frenzy has come and gone. Which teams have improved their 2023 outlook, and which teams have taken a hit?12hNFL Nation reportersIllustration by ESPNTHE BUCK STOPS WITH BELICHICKBruschi: Fair to criticize Bill Belichick for Patriots' struggles10h1:27 Top HeadlinesQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InMarch Madness LiveESPNMarch Madness LiveWatch every men's NCAA tournament game live! ICYMI1:42Austin Peay's coach, pitcher and catcher all ejected after retaliation pitchAustin Peay's pitcher, catcher and coach were all ejected after a pitch was thrown at Liberty's Nathan Keeter, who earlier in the game hit a home run and celebrated while running down the third-base line. Men's Tournament ChallengeIllustration by ESPNMen's Tournament ChallengeCheck your bracket(s) in the 2023 Men's Tournament Challenge, which you can follow throughout the Big Dance. Women's Tournament ChallengeIllustration by ESPNWomen's Tournament ChallengeCheck your bracket(s) in the 2023 Women's Tournament Challenge, which you can follow throughout the Big Dance. Best of ESPN+AP Photo/Lynne SladkyFantasy Baseball ESPN+ Cheat Sheet: Sleepers, busts, rookies and closersYou've read their names all preseason long, it'd be a shame to forget them on draft day. The ESPN+ Cheat Sheet is one way to make sure that doesn't happen.Steph Chambers/Getty ImagesPassan's 2023 MLB season preview: Bold predictions and moreOpening Day is just over a week away -- and Jeff Passan has everything you need to know covered from every possible angle.Photo by Bob Kupbens/Icon Sportswire2023 NFL free agency: Best team fits for unsigned playersWhere could Ezekiel Elliott land? Let's match remaining free agents to teams and find fits for two trade candidates.Illustration by ESPN2023 NFL mock draft: Mel Kiper's first-round pick predictionsMel Kiper Jr. makes his predictions for Round 1 of the NFL draft, including projecting a trade in the top five. Trending NowAnne-Marie Sorvin-USA TODAY SBoston Bruins record tracker: Wins, points, milestonesThe B's are on pace for NHL records in wins and points, along with some individual superlatives as well. Follow along here with our updated tracker.Mandatory Credit: William Purnell-USA TODAY Sports2023 NFL full draft order: AFC, NFC team picks for all roundsStarting with the Carolina Panthers at No. 1 overall, here's the entire 2023 NFL draft broken down round by round. How to Watch on ESPN+Gregory Fisher/Icon Sportswire2023 NCAA men's hockey: Results, bracket, how to watchThe matchups in Tampa promise to be thrillers, featuring plenty of star power, high-octane offense and stellar defense.(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+.Hailie Lynch/XFLHow to watch the XFL: 2023 schedule, teams, players, news, moreEvery XFL game will be streamed on ESPN+. Find out when and where else you can watch the eight teams compete. Sign up to play the #1 Fantasy Baseball GameReactivate A LeagueCreate A LeagueJoin a Public LeaguePractice With a Mock DraftSports BettingAP Photo/Mike KropfMarch Madness betting 2023: Bracket odds, lines, tips, moreThe 2023 NCAA tournament brackets have finally been released, and we have everything you need to know to make a bet on all of the March Madness games. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivateMock Draft Now\\n\\nESPN+\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", lookup_str='', metadata={'source': 'https://www.espn.com/'}, lookup_index=0),\n", - " Document(page_content='GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\xa0Advanced searchAdvertisingBusiness SolutionsAbout Google© 2023 - Privacy - Terms ', lookup_str='', metadata={'source': 'https://google.com'}, lookup_index=0)]" + "[Document(metadata={'source': 'https://www.example.com/', 'title': 'Example Domain', 'language': 'No language found.'}, page_content='\\n\\n\\nExample Domain\\n\\n\\n\\n\\n\\n\\n\\nExample Domain\\nThis domain is for use in illustrative examples in documents. You may use this\\n domain in literature without prior coordination or asking for permission.\\nMore information...\\n\\n\\n\\n'),\n", + " Document(metadata={'source': 'https://google.com', 'title': 'Google', 'description': \"Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for.\", 'language': 'en'}, page_content='GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\xa0Advanced search5 ways Gemini can help during the HolidaysAdvertisingBusiness SolutionsAbout Google© 2024 - Privacy - Terms ')]" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "loader = WebBaseLoader([\"https://www.espn.com/\", \"https://google.com\"])\n", + "loader = WebBaseLoader([\"https://www.example.com/\", \"https://google.com\"])\n", "loader.requests_per_second = 1\n", "docs = loader.aload()\n", "docs" @@ -214,17 +223,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "id": "16530c50", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content='\\n\\n10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n§ 431.86\\nSection § 431.86\\n\\nEnergy\\nDEPARTMENT OF ENERGY\\nENERGY CONSERVATION\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\nCommercial Packaged Boilers\\nTest Procedures\\n\\n\\n\\n\\n§\\u2009431.86\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\n\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\n\\nEquipment category\\nSubcategory\\nCertified rated inputBtu/h\\n\\nStandards efficiency metric(§\\u2009431.87)\\n\\nTest procedure(corresponding to\\nstandards efficiency\\nmetric required\\nby §\\u2009431.87)\\n\\n\\n\\nHot Water\\nGas-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nGas-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nHot Water\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nOil-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nSteam\\nGas-fired (all*)\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nGas-fired (all*)\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3 with Section 2.4.3.2.\\n\\n\\n\\nSteam\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nOil-fired\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3. with Section 2.4.3.2.\\n\\n\\n\\n*\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\n\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\n[81 FR 89305, Dec. 9, 2016]\\n\\n\\nEnergy Efficiency Standards\\n\\n', lookup_str='', metadata={'source': 'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml'}, lookup_index=0)]" + "[Document(metadata={'source': 'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml'}, page_content='\\n\\n10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n§ 431.86\\nSection § 431.86\\n\\nEnergy\\nDEPARTMENT OF ENERGY\\nENERGY CONSERVATION\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\nCommercial Packaged Boilers\\nTest Procedures\\n\\n\\n\\n\\n§\\u2009431.86\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\n\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\n\\nEquipment category\\nSubcategory\\nCertified rated inputBtu/h\\n\\nStandards efficiency metric(§\\u2009431.87)\\n\\nTest procedure(corresponding to\\nstandards efficiency\\nmetric required\\nby §\\u2009431.87)\\n\\n\\n\\nHot Water\\nGas-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nGas-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nHot Water\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nOil-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nSteam\\nGas-fired (all*)\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nGas-fired (all*)\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3 with Section 2.4.3.2.\\n\\n\\n\\nSteam\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nOil-fired\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3. with Section 2.4.3.2.\\n\\n\\n\\n*\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\n\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\n[81 FR 89305, Dec. 9, 2016]\\n\\n\\nEnergy Efficiency Standards\\n\\n')]" ] }, - "execution_count": 2, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -250,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "id": "303d2f4e", "metadata": {}, "outputs": [ @@ -260,66 +269,74 @@ "text": [ "\n", "\n", + "10\n", + "Energy\n", + "3\n", + "2018-01-01\n", + "2018-01-01\n", + "false\n", + "Uniform test method for the measurement of energy efficien\n", + "{'source': 'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml'}\n" + ] + } + ], + "source": [ + "pages = []\n", + "for doc in loader.lazy_load():\n", + " pages.append(doc)\n", + "\n", + "print(pages[0].page_content[:100])\n", + "print(pages[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "id": "20e4cf2a-2e5d-4a1f-8c99-526d36e4b873", + "metadata": {}, + "source": [ + "### Async" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0ab4286f-7b3c-4e4b-890d-7b39082b61b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Fetching pages: 100%|###########################################################################| 1/1 [00:00<00:00, 10.51it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "ESPN - Serving Sports Fans. Anytime. Anywhere.\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "{'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'}\n" + "10\n", + "Energy\n", + "3\n", + "2018-01-01\n", + "2018-01-01\n", + "false\n", + "Uniform test method for the measurement of energy efficien\n", + "{'source': 'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] } ], "source": [ "pages = []\n", - "for doc in loader.lazy_load():\n", + "async for doc in loader.alazy_load():\n", " pages.append(doc)\n", "\n", "print(pages[0].page_content[:100])\n", @@ -390,7 +407,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.1" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/document_loaders/web_base.py b/libs/community/langchain_community/document_loaders/web_base.py index 94d46c55ea7f5..d84f585b2709a 100644 --- a/libs/community/langchain_community/document_loaders/web_base.py +++ b/libs/community/langchain_community/document_loaders/web_base.py @@ -3,10 +3,11 @@ import asyncio import logging import warnings -from typing import Any, Dict, Iterator, List, Optional, Sequence, Union +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Sequence, Union import aiohttp import requests +from langchain_core._api import deprecated from langchain_core.documents import Document from langchain_community.document_loaders.base import BaseLoader @@ -78,12 +79,7 @@ class WebBaseLoader(BaseLoader): .. code-block:: python docs = [] - docs_lazy = loader.lazy_load() - - # async variant: - # docs_lazy = await loader.alazy_load() - - for doc in docs_lazy: + for doc in loader.lazy_load(): docs.append(doc) print(docs[0].page_content[:100]) print(docs[0].metadata) @@ -98,7 +94,9 @@ class WebBaseLoader(BaseLoader): Async load: .. code-block:: python - docs = await loader.aload() + docs = [] + async for doc in loader.alazy_load(): + docs.append(doc) print(docs[0].page_content[:100]) print(docs[0].metadata) @@ -108,6 +106,37 @@ class WebBaseLoader(BaseLoader): {'source': 'https://www.espn.com/', 'title': 'ESPN - Serving Sports Fans. Anytime. Anywhere.', 'description': 'Visit ESPN for live scores, highlights and sports news. Stream exclusive games on ESPN+ and play fantasy sports.', 'language': 'en'} + .. versionchanged:: 0.3.14 + + Deprecated ``aload`` (which was not async) and implemented a native async + ``alazy_load``. Expand below for more details. + + .. dropdown:: How to update ``aload`` + + Instead of using ``aload``, you can use ``load`` for synchronous loading or + ``alazy_load`` for asynchronous lazy loading. + + Example using ``load`` (synchronous): + + .. code-block:: python + + docs: List[Document] = loader.load() + + Example using ``alazy_load`` (asynchronous): + + .. code-block:: python + + docs: List[Document] = [] + async for doc in loader.alazy_load(): + docs.append(doc) + + This is in preparation for accommodating an asynchronous ``aload`` in the + future: + + .. code-block:: python + + docs: List[Document] = await loader.aload() + """ # noqa: E501 def __init__( @@ -279,11 +308,12 @@ def _check_parser(parser: str) -> None: "`parser` must be one of " + ", ".join(valid_parsers) + "." ) - def scrape_all(self, urls: List[str], parser: Union[str, None] = None) -> List[Any]: - """Fetch all urls, then return soups for all results.""" + def _unpack_fetch_results( + self, results: Any, urls: List[str], parser: Union[str, None] = None + ) -> List[Any]: + """Unpack fetch results into BeautifulSoup objects.""" from bs4 import BeautifulSoup - results = asyncio.run(self.fetch_all(urls)) final_results = [] for i, result in enumerate(results): url = urls[i] @@ -294,9 +324,20 @@ def scrape_all(self, urls: List[str], parser: Union[str, None] = None) -> List[A parser = self.default_parser self._check_parser(parser) final_results.append(BeautifulSoup(result, parser, **self.bs_kwargs)) - return final_results + def scrape_all(self, urls: List[str], parser: Union[str, None] = None) -> List[Any]: + """Fetch all urls, then return soups for all results.""" + results = asyncio.run(self.fetch_all(urls)) + return self._unpack_fetch_results(results, urls, parser=parser) + + async def ascrape_all( + self, urls: List[str], parser: Union[str, None] = None + ) -> List[Any]: + """Async fetch all urls, then return soups for all results.""" + results = await self.fetch_all(urls) + return self._unpack_fetch_results(results, urls, parser=parser) + def _scrape( self, url: str, @@ -339,6 +380,22 @@ def lazy_load(self) -> Iterator[Document]: metadata = _build_metadata(soup, path) yield Document(page_content=text, metadata=metadata) + async def alazy_load(self) -> AsyncIterator[Document]: + """Async lazy load text from the url(s) in web_path.""" + results = await self.ascrape_all(self.web_paths) + for path, soup in zip(self.web_paths, results): + text = soup.get_text(**self.bs_get_text_kwargs) + metadata = _build_metadata(soup, path) + yield Document(page_content=text, metadata=metadata) + + @deprecated( + since="0.3.14", + removal="1.0", + message=( + "See API reference for updated usage: " + "https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html" # noqa: E501 + ), + ) def aload(self) -> List[Document]: # type: ignore """Load text from the urls in web_path async into Documents.""" diff --git a/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py b/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py index 9220033f01b35..66f908f29a008 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py +++ b/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py @@ -102,8 +102,8 @@ def test_falkordbvector() -> None: pre_delete_collection=True, ) output = docsearch.similarity_search("foo", k=1) - assert type(output) is list - assert type(output[0]) is Document + assert isinstance(output, list) + assert isinstance(output[0], Document) assert output[0].page_content == "foo" drop_vector_indexes(docsearch) @@ -121,8 +121,8 @@ def test_falkordbvector_embeddings() -> None: pre_delete_collection=True, ) output = docsearch.similarity_search("foo", k=1) - assert type(output) is list - assert type(output[0]) is Document + assert isinstance(output, list) + assert isinstance(output[0], Document) assert output[0].page_content == "foo" drop_vector_indexes(docsearch) @@ -168,8 +168,8 @@ def test_falkordbvector_with_metadatas() -> None: pre_delete_collection=True, ) output = docsearch.similarity_search("foo", k=1) - assert type(output) is list - assert type(output[0]) is Document + assert isinstance(output, list) + assert isinstance(output[0], Document) assert output[0].metadata.get("page") == "0" drop_vector_indexes(docsearch) diff --git a/libs/community/tests/unit_tests/document_loaders/test_web_base.py b/libs/community/tests/unit_tests/document_loaders/test_web_base.py index 529c19b4c1f4b..950542303543d 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_web_base.py +++ b/libs/community/tests/unit_tests/document_loaders/test_web_base.py @@ -62,6 +62,52 @@ def test_lazy_load(mock_get: Any) -> None: assert results[0].page_content == "This is a div with a special class" +@pytest.mark.requires("bs4") +@patch("aiohttp.ClientSession.get") +async def test_alazy_load(mock_get: Any) -> None: + async def mock_text() -> str: + return "
Test content
" + + import bs4 + + mock_response = MagicMock() + mock_response.text = mock_text + mock_get.return_value.__aenter__.return_value = mock_response + + loader = WebBaseLoader(web_paths=["https://www.example.com"]) + results = [] + async for result in loader.alazy_load(): + results.append(result) + # mock_get.assert_called_with("https://www.example.com") + assert len(results) == 1 + assert results[0].page_content == "Test content" + + # Test bs4 kwargs + async def mock_text_bs4() -> str: + return dedent(""" + + +Test content
+