Skip to content

Commit

Permalink
Merge branch 'run-llama:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
xjtushilei authored Dec 25, 2024
2 parents bd93582 + 33bfc37 commit bd268ce
Show file tree
Hide file tree
Showing 10 changed files with 1,059 additions and 1,012 deletions.
1,399 changes: 708 additions & 691 deletions docs/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ idna = "3.7"
ipykernel = "6.29.3"
ipython = "8.22.2"
jedi = "0.19.1"
jinja2 = "3.1.4"
jinja2 = "^3.1.5"
jsonschema = "4.21.1"
jsonschema-specifications = "2023.12.1"
jupyter-client = "8.6.1"
Expand Down
575 changes: 304 additions & 271 deletions llama-index-core/poetry.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -337,12 +337,7 @@ def converse_with_retry(
{
k: v
for k, v in kwargs.items()
if (
k != "tools"
or k != "guardrail_identifier"
or k != "guardrail_version"
or k != "trace"
)
if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
},
)

Expand Down Expand Up @@ -395,12 +390,7 @@ async def converse_with_retry_async(
{
k: v
for k, v in kwargs.items()
if (
k != "tools"
or k != "guardrail_identifier"
or k != "guardrail_version"
or k != "trace"
)
if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-llms-bedrock-converse"
readme = "README.md"
version = "0.4.2"
version = "0.4.3"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@


class KnowledgeBaseWebReader(BaseReader):
"""Knowledge base reader.
"""
Knowledge base reader.
Crawls and reads articles from a knowledge base/help center with Playwright.
Tested on Zendesk and Intercom CMS, may work on others.
Expand Down Expand Up @@ -36,6 +37,7 @@ def __init__(
title_selector: Optional[str] = None,
subtitle_selector: Optional[str] = None,
body_selector: Optional[str] = None,
max_depth: int = 100,
) -> None:
"""Initialize with parameters."""
self.root_url = root_url
Expand All @@ -44,6 +46,7 @@ def __init__(
self.title_selector = title_selector
self.subtitle_selector = subtitle_selector
self.body_selector = body_selector
self.max_depth = max_depth

def load_data(self) -> List[Document]:
"""Load data from the knowledge base."""
Expand All @@ -54,9 +57,7 @@ def load_data(self) -> List[Document]:

# Crawl
article_urls = self.get_article_urls(
browser,
self.root_url,
self.root_url,
browser, self.root_url, self.root_url, self.max_depth
)

# Scrape
Expand All @@ -82,7 +83,8 @@ def scrape_article(
browser: Any,
url: str,
) -> Dict[str, str]:
"""Scrape a single article url.
"""
Scrape a single article url.
Args:
browser (Any): a Playwright Chromium browser.
Expand Down Expand Up @@ -125,9 +127,10 @@ def scrape_article(
return {"title": title, "subtitle": subtitle, "body": body, "url": url}

def get_article_urls(
self, browser: Any, root_url: str, current_url: str
self, browser: Any, root_url: str, current_url: str, max_depth: int = 100
) -> List[str]:
"""Recursively crawl through the knowledge base to find a list of articles.
"""
Recursively crawl through the knowledge base to find a list of articles.
Args:
browser (Any): a Playwright Chromium browser.
Expand Down Expand Up @@ -158,7 +161,9 @@ def get_article_urls(

for link in links:
url = root_url + page.evaluate("(node) => node.getAttribute('href')", link)
article_urls.extend(self.get_article_urls(browser, root_url, url))
article_urls.extend(
self.get_article_urls(browser, root_url, url, max_depth)
)

page.close()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ license = "MIT"
maintainers = ["HawkClaws", "Hironsan", "NA", "an-bluecat", "bborn", "jasonwcfan", "kravetsmic", "pandazki", "ruze00", "selamanse", "thejessezhang"]
name = "llama-index-readers-web"
readme = "README.md"
version = "0.3.2"
version = "0.3.3"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ def _is_efficient_filtering_enabled(self) -> bool:
ef_enabled = False
else:
self._os_version = self._get_opensearch_version()
major, minor, patch = self.os_version.split(".")
major, minor, patch = self._os_version.split(".")
ef_enabled = int(major) >= 2 and int(minor) >= 9
return ef_enabled

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-opensearch"
readme = "README.md"
version = "0.5.1"
version = "0.5.2"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
52 changes: 27 additions & 25 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bd268ce

Please sign in to comment.