From c948193a57c2e68626ac7f219c1b035fa5351026 Mon Sep 17 00:00:00 2001 From: Quanyi Ma Date: Wed, 16 Oct 2024 11:02:44 +0800 Subject: [PATCH] Upgrade README and random for crates-sync script Signed-off-by: Quanyi Ma --- README.md | 39 +++++++++-------- scripts/crates-sync/crates-sync.py | 67 +++++++++++++++++------------- 2 files changed, 60 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 9466c5ee..19dee003 100644 --- a/README.md +++ b/README.md @@ -37,40 +37,45 @@ For now, the entire open source community base on Git and GitHub. It's centraliz For now, the monorepo engine could be deployed on your host machine or insulated into containers. For deploying through docker, follow the steps below: 1. Clone the project and build the docker images + ```bash -git clone https://github.com/web3infra-foundation/mega.git -cd mega -docker buildx build -t mono-pg:0.1-pre-release -f ./docker/mono-pg-dockerfile . -docker buildx build -t mono-engine:0.1-pre-release -f ./docker/mono-engine-dockerfile . -docker buildx build -t mono-ui:0.1-pre-release -f ./docker/mono-ui-dockerfile . +$ git clone https://github.com/web3infra-foundation/mega.git +$ cd mega +$ git submodule update --init --recursive +$ docker buildx build -t mono-pg:0.1-pre-release -f ./docker/mono-pg-dockerfile . +$ docker buildx build -t mono-engine:0.1-pre-release -f ./docker/mono-engine-dockerfile . +$ docker buildx build -t mono-ui:0.1-pre-release -f ./docker/mono-ui-dockerfile . ``` 2. Initialize for mono-engine and PostgreSQL + ```bash # Linux or MacOS -./docker/init-volume.sh /mnt/data ./docker/config.toml +$ ./docker/init-volume.sh /mnt/data ./docker/config.toml ``` 3. Run the mono-engine and PostgreSQL with docker, and open the mono-ui in your browser with `http://localhost:3000`. + ```bash # create network -docker network create mono-network +$ docker network create mono-network # run postgres -docker run --rm -it -d --name mono-pg --network mono-network -v /tmp/data/mono/pg-data:/var/lib/postgresql/data -p 5432:5432 mono-pg:0.1-pre-release -docker run --rm -it -d --name mono-engine --network mono-network -v /tmp/data/mono/mono-data:/opt/mega -p 8000:8000 mono-engine:0.1-pre-release -docker run --rm -it -d --name mono-ui --network mono-network -e MEGA_INTERNAL_HOST=http://mono-engine:8000 -e MEGA_HOST=http://localhost:8000 -p 3000:3000 mono-ui:0.1-pre-release +$ docker run --rm -it -d --name mono-pg --network mono-network -v /tmp/data/mono/pg-data:/var/lib/postgresql/data -p 5432:5432 mono-pg:0.1-pre-release +$ docker run --rm -it -d --name mono-engine --network mono-network -v /tmp/data/mono/mono-data:/opt/mega -p 8000:8000 mono-engine:0.1-pre-release +$ docker run --rm -it -d --name mono-ui --network mono-network -e MEGA_INTERNAL_HOST=http://mono-engine:8000 -e MEGA_HOST=http://localhost:8000 -p 3000:3000 mono-ui:0.1-pre-release ``` 4. Try to upload a repository to mono-engine + ```bash -git clone http://localhost:8000/project.git -cd project -git clone https://github.com/dagrs-dev/dagrs.git -sudo rm -r dagrs/.git -git add . -git commit -a -m"Initial the dagrs project" -git push +$ git clone http://localhost:8000/project.git +$ cd project +$ git clone https://github.com/dagrs-dev/dagrs.git +$ sudo rm -r dagrs/.git +$ git add . +$ git commit -a -m"Initial the dagrs project" +$ git push ``` 5. Check the repository in UI diff --git a/scripts/crates-sync/crates-sync.py b/scripts/crates-sync/crates-sync.py index c9daad57..11095b21 100755 --- a/scripts/crates-sync/crates-sync.py +++ b/scripts/crates-sync/crates-sync.py @@ -5,6 +5,7 @@ import tarfile import subprocess import shutil +import random from collections import defaultdict from datetime import datetime, timedelta @@ -184,28 +185,14 @@ def process_crate(num, crate_name, versions, crates_dir, git_repos_dir, dl_base_ print_blue(f"Finished processing {crate_name}") -def scan_and_process_crates(index_path, crates_dir, git_repos_dir, git_base_url, lfs_url): - # Scan the crates.io index and process all crates - crates = defaultdict(set) - dl_base_url = None + return num - # Check if the directories exist - for path in [index_path, crates_dir, git_repos_dir]: - if not os.path.isdir(path): - print_red(f"Error: The directory {path} does not exist.") - sys.exit(1) +def scan_crates_index(index_path): + crates = defaultdict(set) - # Read the config.json to get the dl base URL - config_path = os.path.join(index_path, 'config.json') - try: - with open(config_path, 'r') as config_file: - config = json.load(config_file) - dl_base_url = config.get('dl') - if not dl_base_url: - print_red("Error: 'dl' key not found in config.json") - sys.exit(1) - except Exception as e: - print_red(f"Error reading config.json: {str(e)}") + # Check if the directory exists + if not os.path.isdir(index_path): + print_red(f"Error: The directory {index_path} does not exist.") sys.exit(1) # Walk through the index directory @@ -224,20 +211,42 @@ def scan_and_process_crates(index_path, crates_dir, git_repos_dir, git_base_url, line = line.strip() if line: crate_info = json.loads(line) - crate_name = crate_info['name'] - crate_version = crate_info['vers'] - crates[crate_name].add(crate_version) + crates[crate_info['name']].add(crate_info['vers']) except Exception as e: print_red(f"Error processing file {full_path}: {str(e)}") - num = 0 + return crates + +def scan_and_process_crates(index_path, crates_dir, git_repos_dir, git_base_url, lfs_url): + # Scan the crates.io index + print_blue("Scanning crates.io index...") + crates = scan_crates_index(index_path) + print_blue(f"Found {len(crates)} crates.") + + # Shuffle the crates items + print_blue("Shuffling crates list...") + crates_items = list(crates.items()) + random.shuffle(crates_items) + + # Read the config.json to get the dl base URL + config_path = os.path.join(index_path, 'config.json') + try: + with open(config_path, 'r') as config_file: + config = json.load(config_file) + dl_base_url = config.get('dl') + if not dl_base_url: + print_red("Error: 'dl' key not found in config.json") + sys.exit(1) + except Exception as e: + print_red(f"Error reading config.json: {str(e)}") + sys.exit(1) - # Process each crate - for crate_name, versions in crates.items(): - process_crate(num, crate_name, versions, crates_dir, git_repos_dir, dl_base_url, git_base_url, lfs_url) - num += len(versions) + # Process crates + print_blue("Starting to process crates...") + num = 0 + for crate_name, versions in crates_items: + num = process_crate(num, crate_name, versions, crates_dir, git_repos_dir, dl_base_url, git_base_url, lfs_url) - return len(crates) def main(): # Record start time for the entire process