-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
179 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: disk-usage-report-job | ||
spec: | ||
template: | ||
metadata: | ||
labels: | ||
app: disk-usage-report | ||
spec: | ||
containers: | ||
- name: disk-usage-report | ||
image: IMAGE_PLACEHOLDER | ||
restartPolicy: Never | ||
nodeSelector: | ||
NodeGroupType: default | ||
NodePool: default | ||
hub.jupyter.org/node-purpose: user | ||
tolerations: | ||
- key: "hub.jupyter.org/dedicated" | ||
operator: "Equal" | ||
value: "user" | ||
effect: "NoSchedule" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import subprocess | ||
import sys | ||
import json | ||
|
||
OUTPUT_FILE = "du_report.json" | ||
SIZE_THRESHOLD_GB = 1 | ||
SIZE_THRESHOLD_BYTES = SIZE_THRESHOLD_GB * 1024 * 1024 * 1024 | ||
|
||
# Function to calculate disk usage of a directory in bytes | ||
def get_disk_usage_bytes(path): | ||
result = subprocess.run(['du', '-sb', path], capture_output=True, text=True) | ||
size_str = result.stdout.split()[0] # Get the size in bytes (du -sb gives size in bytes) | ||
return int(size_str) | ||
|
||
# Function to convert bytes to a human-readable format (e.g., KB, MB, GB) | ||
def bytes_to_human_readable(size_in_bytes): | ||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']: | ||
if size_in_bytes < 1024: | ||
return f"{size_in_bytes:.2f} {unit}" | ||
size_in_bytes /= 1024 | ||
|
||
def prepare_report(directory): | ||
report = {} | ||
# List user home dirs in the directory and calculate disk usage | ||
for user_dir in os.listdir(directory): | ||
user_path = os.path.join(directory, user_dir) | ||
if os.path.isdir(user_path): | ||
disk_usage_bytes = get_disk_usage_bytes(user_path) | ||
report[user_dir] = { | ||
"disk_usage_bytes": disk_usage_bytes | ||
} | ||
if disk_usage_bytes > SIZE_THRESHOLD_BYTES: | ||
# TODO: Placeholder for other actions | ||
report[user_dir]["action"] = f"Directory size exceeds {SIZE_THRESHOLD_BYTES / (1024**3):.2f}GB, further action taken." | ||
else: | ||
report[user_dir]["action"] = "No action required." | ||
|
||
for user, data in report.items(): | ||
data["disk_usage_human_readable"] = bytes_to_human_readable(data["disk_usage_bytes"]) | ||
|
||
with open(OUTPUT_FILE, 'w') as f: | ||
json.dump(report, f, indent=4) | ||
|
||
print(f"Disk usage report generated at {OUTPUT_FILE}") | ||
|
||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) != 2: | ||
print("Usage: du.py <directory_to_check>") | ||
else: | ||
directory = sys.argv[1] | ||
prepare_report(directory) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,52 +13,83 @@ jobs: | |
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Log in to DockerHub | ||
uses: docker/login-action@v2 | ||
with: | ||
username: ${{ secrets.DOCKERHUB_USERNAME }} | ||
password: ${{ secrets.DOCKERHUB_TOKEN }} | ||
|
||
- name: Build and push Docker image | ||
uses: docker/build-push-action@v3 | ||
with: | ||
context: . | ||
file: images/Dockerfile.dandihub_report_generator | ||
push: true | ||
tags: ${{ secrets.DOCKERHUB_USERNAME }}/dandihub_report_generator:latest | ||
|
||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v3 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
# TODO param region | ||
aws-region: us-east-2 | ||
|
||
- name: Assume JupyterhubProvisioningRole | ||
# TODO param ProvisioningRoleARN and name ^ | ||
- name: Assume ProvisioningRole | ||
run: | | ||
ROLE_ARN="arn:aws:iam::278212569472:role/JupyterhubProvisioningRole" | ||
CREDS=$(aws sts assume-role --role-arn $ROLE_ARN --role-session-name "GitHubActionsSession") | ||
CREDS=$(aws sts assume-role --role-arn ${{ secrets.AWS_PROVISIONING_ROLE_ARN }} --role-session-name "GitHubActionsSession") | ||
export AWS_ACCESS_KEY_ID=$(echo $CREDS | jq -r '.Credentials.AccessKeyId') | ||
export AWS_SECRET_ACCESS_KEY=$(echo $CREDS | jq -r '.Credentials.SecretAccessKey') | ||
export AWS_SESSION_TOKEN=$(echo $CREDS | jq -r '.Credentials.SessionToken') | ||
- name: Configure kubectl with AWS EKS | ||
# TODO param name, region role-arn | ||
run: | | ||
aws eks update-kubeconfig --name eks-dandihub --region us-east-2 --role-arn arn:aws:iam::278212569472:role/JupyterhubProvisioningRole | ||
aws eks update-kubeconfig --name eks-dandihub --region us-east-2 --role-arn ${{ secrets.AWS_PROVISIONING_ROLE_ARN }} | ||
# TODO remove | ||
- name: Sanity check | ||
run: | | ||
kubectl get pods -n jupyterhub | ||
# Step 4: Deploy Hello World Pod from manifest | ||
- name: Deploy Hello World Pod | ||
- name: Replace image placeholder in manifest | ||
run: | | ||
sed -i 's|IMAGE_PLACEHOLDER|'"${{ secrets.DOCKERHUB_USERNAME }}/disk_usage_report:latest"'|' .github/manifests/disk-usage-report-job.yaml | ||
- name: Deploy Disk Usage Report Job | ||
run: | | ||
kubectl apply -f .github/manifests/disk-usage-report-job.yaml | ||
# TODO should timeout be longer? | ||
- name: Wait for Disk Usage Report Job to complete | ||
run: | | ||
kubectl wait --for=condition=complete job/disk-usage-report-job --timeout=300s | ||
continue-on-error: true | ||
|
||
- name: Save Pod logs to file | ||
run: | | ||
POD_NAME=$(kubectl get pods --selector=job-name=disk-usage-report-job -o jsonpath='{.items[0].metadata.name}') | ||
kubectl logs $POD_NAME > disk_usage_report.log | ||
continue-on-error: true | ||
|
||
# continue-on-error for previous steps so we delete the job | ||
- name: Delete Disk Usage Report Job | ||
run: | | ||
kubectl apply -f .github/manifests/hello-world-pod.yaml | ||
kubectl delete job disk-usage-report-job | ||
# Step 5: Wait for Pod to Complete | ||
- name: Wait for Hello World Pod to complete | ||
- name: Clone dandi-hub-usage-reports repository | ||
run: | | ||
kubectl wait --for=condition=Ready pod/hello-world-pod --timeout=300s # 5 minutes | ||
continue-on-error: true # Allow the workflow to continue even if this step fails | ||
git clone https://github.com/dandi/dandi-hub-usage-reports.git | ||
cd dandi-hub-usage-reports | ||
# Step 6: Get Pod Logs to verify it ran successfully, only if Step 5 succeeds | ||
- name: Get Hello World Pod logs | ||
- name: Copy log file to repository | ||
run: | | ||
kubectl logs hello-world-pod | ||
if: ${{ success() }} # Only run this step if the previous step was successful | ||
DATE=$(date +'%Y-%m-%d') | ||
mv ../disk_usage_report.log $DATE_disk_usage_report.log | ||
# Step 7: Cleanup - Always run this step, even if previous steps fail | ||
- name: Delete Hello World Pod | ||
# Step 13: Commit and push logs to the repository | ||
- name: Commit and push logs | ||
run: | | ||
kubectl delete pod hello-world-pod | ||
if: ${{ always() }} # Always run this step, even if other steps fail | ||
git config --global user.name "GitHub Actions" | ||
git config --global user.email "[email protected]" | ||
git add disk_usage_report.log | ||
git commit -m "Add disk usage report log" | ||
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/dandi/dandi-hub-usage-reports.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
DONE | ||
- Set AWS_ROLE ARN secret | ||
- AWS_ACCESS_KEY_ID | ||
- AWS_SECRET_ACCESS_KEY | ||
|
||
TODO: | ||
- Create Dockerhub Service account | ||
- set username & token as secrets | ||
- Create Github CI account | ||
- Docker Image Tagging: | ||
- The Docker image is tagged with latest. For better version control, consider using commit SHA or version numbers. | ||
- Log Retrieval: | ||
- The logs from the pod are retrieved to help you verify the script's output. | ||
- Cleanup: | ||
- Deleting the Job ensures that no resources are left running after the workflow completes. | ||
|
||
By making these updates, your workflow will now: | ||
|
||
Include your du.py script in a Docker image. | ||
Build and push this image to DockerHub. | ||
Deploy a Kubernetes Job to your EKS cluster that runs the script. | ||
Wait for the Job to complete and retrieve logs. | ||
Clean up resources after execution. | ||
|
||
Feel free to ask if you need further assistance or clarification on any of these steps! | ||
|
||
|
||
- Get image pushing | ||
- create private gh repository under dandi org for reports | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
FROM python:3.9-slim | ||
|
||
# Set the working directory | ||
WORKDIR /app | ||
|
||
# Copy the du.py script into the container | ||
COPY .github/scripts/du.py /app/du.py | ||
|
||
# Install required packages | ||
RUN apt-get update \ | ||
&& apt-get install -y coreutils \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Set the entrypoint to the script | ||
ENTRYPOINT ["python3", "/app/du.py"] |