Skip to content

Commit

Permalink
move hf logic to python script
Browse files Browse the repository at this point in the history
  • Loading branch information
wjayesh committed Jan 9, 2025
1 parent 2980046 commit 5da0a0e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 31 deletions.
31 changes: 1 addition & 30 deletions .github/workflows/docs_summarization_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,33 +57,4 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
# Process OpenAI batch results
python scripts/check_batch_output.py
# Upload all files to HuggingFace
python -c '
from huggingface_hub import HfApi
import os
api = HfApi()
# Upload OpenAI summary
api.upload_file(
token=os.environ["HF_TOKEN"],
repo_id="zenml/llms.txt",
repo_type="dataset",
path_in_repo="how-to-guides.txt",
path_or_fileobj="zenml_docs.txt",
)
# Upload repomix outputs
for filename in ["component-guide.txt", "basics.txt", "llms-full.txt"]:
api.upload_file(
token=os.environ["HF_TOKEN"],
repo_id="zenml/llms.txt",
repo_type="dataset",
path_in_repo=filename,
path_or_fileobj=f"repomix-outputs/{filename}",
)
'
run: python scripts/check_batch_output.py
30 changes: 29 additions & 1 deletion scripts/check_batch_output.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json
from openai import OpenAI
from huggingface_hub import HfApi
import os

client = OpenAI()

def main():
def process_batch_output():
# Read the batch ID from file
with open("batch_id.txt", "r") as f:
batch_id = f.read().strip()
Expand All @@ -30,5 +32,31 @@ def main():
f.write(json_line["response"]["body"]["choices"][0]["message"]["content"])
f.write("\n\n" + "="*80 + "\n\n")

def upload_to_huggingface():
api = HfApi()

# Upload OpenAI summary
api.upload_file(
token=os.environ["HF_TOKEN"],
repo_id="zenml/llms.txt",
repo_type="dataset",
path_in_repo="how-to-guides.txt",
path_or_fileobj="zenml_docs.txt",
)

# Upload repomix outputs
for filename in ["component-guide.txt", "basics.txt", "llms-full.txt"]:
api.upload_file(
token=os.environ["HF_TOKEN"],
repo_id="zenml/llms.txt",
repo_type="dataset",
path_in_repo=filename,
path_or_fileobj=f"repomix-outputs/{filename}",
)

def main():
process_batch_output()
upload_to_huggingface()

if __name__ == "__main__":
main()

0 comments on commit 5da0a0e

Please sign in to comment.