Skip to content

Commit

Permalink
Merge pull request #14 from script-money/fix/check
Browse files Browse the repository at this point in the history
change upload method and fix bugs
  • Loading branch information
script-money authored Sep 14, 2022
2 parents bf16f08 + 502e5bd commit 5d0eadb
Show file tree
Hide file tree
Showing 38 changed files with 180 additions and 70 deletions.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
28 changes: 14 additions & 14 deletions ratio.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ parts,Background,green,1
parts,Background,purple,1
parts,Background,red,1
parts,Background,white,1
parts,First Letter,B,1
parts,First Letter,C,1
parts,First Letter,D,1
parts,First Letter,E,1
parts,First Letter,F,1
parts,First Letter,G,1
parts,First Letter,H,1
parts,First Letter,I,1
parts,First Letter,J,1
parts,First Letter,K,1
parts,Second Letter,A,1
parts,Second Letter,Q,1
parts,Second Letter,R,1
Expand All @@ -14,20 +24,10 @@ parts,Second Letter,X,1
parts,Second Letter,Y,1
parts,Second Letter,Z,1
parts,Second Letter,p,1
parts,First Latter,B,1
parts,First Latter,C,1
parts,First Latter,D,1
parts,First Latter,E,1
parts,First Latter,F,1
parts,First Latter,G,1
parts,First Latter,H,1
parts,First Latter,I,1
parts,First Latter,J,1
parts,First Latter,K,1
parts2,Background,black,1
parts2,First Letter,1,1
parts2,First Letter,2,1
parts2,First Letter,3,1
parts2,First Letter,empty,1
parts2,Second Letter,empty,1
parts2,Second Letter,¥,1
parts2,First Latter,1,1
parts2,First Latter,2,1
parts2,First Latter,3,1
parts2,First Latter,empty,1
1 change: 0 additions & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ class Quality(Enum):


# upload----------------------------------------------------------------------------------------------
READ_IMAGES = False # if read images from image_ipfs_data.backup, set True
UPLOAD_METADATA = False # set False if don't want to upload metadata
PIN_FILES = False # if want to upload permanently, set to True
PROXIES = {
Expand Down
3 changes: 1 addition & 2 deletions src/fresh_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import List
from httpx import Timeout, Client, AsyncClient
import time
import asyncio
Expand Down Expand Up @@ -145,7 +144,7 @@ def opensea_refresh(client: Client, id: int) -> bool:
return False


async def ipfs_query_tasks(request_urls: List[str]):
async def ipfs_query_tasks(request_urls: list[str]):
"""
create asyncio tasks for ipfs query
Expand Down
3 changes: 1 addition & 2 deletions src/generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
from typing import List
from PIL import Image
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -194,7 +193,7 @@ def generate_images(
return df_attr


def check_values_valid(df: pd.DataFrame, select_columns: List, all_values: List):
def check_values_valid(df: pd.DataFrame, select_columns: list, all_values: list):
"""
Check selected column values is in the list
Expand Down
5 changes: 3 additions & 2 deletions src/get_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
import shutil
from config import FOLDERS, EXTENSION, W, H, WEIGHTS
from math import fsum


def get_files_path(folders=FOLDERS):
Expand Down Expand Up @@ -46,7 +47,7 @@ def get_files_path(folders=FOLDERS):

# Validate weights
assert (
sum(WEIGHTS) == 1
fsum(WEIGHTS) == 1
), f"sum of PARTS_DICT's value in config.py should be 1, now is {sum(WEIGHTS)}"

# Validate image format and size
Expand All @@ -70,7 +71,7 @@ def get_files_path(folders=FOLDERS):
# Validate path name has -
for path in files_path:
assert (
"-" not in path.split("/")[-1]
"-" not in path.split(os.sep)[-1]
), f"{path} is invalid, files should not have '-' symbol"

folder_set = set()
Expand Down
210 changes: 161 additions & 49 deletions src/upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import asyncio
import os
import httpx
from httpx import AsyncClient, Limits, ReadTimeout, Client
import json
import pandas as pd
import random
from config import (
IMAGES,
METADATA,
Expand All @@ -9,13 +12,65 @@
PROJECT_ID,
PROJECT_SECRET,
PROXIES,
READ_IMAGES,
UPLOAD_METADATA,
PIN_FILES,
)
import pandas as pd
from final_check import RENAME_DF, START_ID
import random


async def upload_task(files_path_chunk: list[str], wait_seconds: int) -> list[dict]:
"""
upload task for asyncio, a task process 10 files
Args:
files_path_chunk (list[str]): a list contain 10 files path
wait_seconds (int): because infura limit, 1 second can post 10 times, add wait_seconds to wait
Returns:
list[dict]: 10 files ipfs info
"""
await asyncio.sleep(wait_seconds)
async with AsyncClient(
proxies=PROXIES, limits=Limits(max_connections=10), timeout=60
) as client:
loop = asyncio.get_running_loop()
tasks = [
loop.create_task(upload_single_async(client, file_path))
for file_path in files_path_chunk
]
result = await asyncio.gather(*tasks)
return result


async def upload_single_async(client: AsyncClient, file_path: str) -> dict:
"""
upload folder to ipfs
Args:
client (AsyncClient): httpx.asyncClient instance
file_path (str): path of file want to upload
Returns:
dict: ipfs info json
"""
retry = 0
while retry < 5:
try:
response = await client.post(
f"https://ipfs.infura.io:5001/api/v0/add",
params={
"pin": "true" if PIN_FILES else "false"
}, # pin=true if want to pin files
auth=(PROJECT_ID, PROJECT_SECRET),
files={"file": open(file_path, "rb")},
)
res_json = response.json()
if res_json["Name"] != "":
return res_json
except Exception as e:
if isinstance(e, ReadTimeout):
print(f"upload {file_path.split('-')[0]} timeout, retry {retry}")
retry += 1


def upload_folder(
Expand All @@ -32,39 +87,24 @@ def upload_folder(
tuple[str, list[dict]]: (folder_hash, images_dict_list)
"""
files = []
if content_type == "image/png":
files = [
(
folder_name.split(os.sep)[-1],
(file, open(os.path.join(folder_name, file), "rb"), content_type),
)
for file in list(
filter(lambda i: i.split(".")[-1] == "png", os.listdir(folder_name))
)
]
elif content_type == "image/gif":
files = [
(
folder_name.split(os.sep)[-1],
(file, open(os.path.join(folder_name, file), "rb"), content_type),
)
for file in list(
filter(lambda i: i.split(".")[-1] == "gif", os.listdir(folder_name))
)
]
elif content_type == "application/json":
files = [
(
folder_name.split(os.sep)[-1],
(file, open(os.path.join(folder_name, file), "rb"), content_type),
)
for file in list(filter(lambda i: "." not in i, os.listdir(folder_name)))
]
extension = content_type.split(os.sep)[-1]

with httpx.Client(proxies=PROXIES) as client:
files = [
(file, open(os.path.join(folder_name, file), "rb"))
for file in list(
filter(lambda i: i.split(".")[-1] == extension, os.listdir(folder_name))
)
]

with Client(proxies=PROXIES, timeout=None) as client:
response = client.post(
f"https://ipfs.infura.io:5001/api/v0/add?pin={'true' if PIN_FILES else 'false'}&recursive=true&wrap-with-directory=true", # pin=true if want to pin files
files=files,
f"https://ipfs.infura.io:5001/api/v0/add",
params={
"pin": "true" if PIN_FILES else "false",
"recursive": "true",
"wrap-with-directory": "true",
},
files=files, # files should be List[filename, bytes]
auth=(PROJECT_ID, PROJECT_SECRET),
)
upload_folder_res_list = response.text.strip().split("\n")
Expand All @@ -85,9 +125,49 @@ def upload_folder(
return (folder_hash, images_dict_list)


def upload_files(folder_name: str, content_type: str = "image/png") -> list[dict]:
"""
upload files in a folder to ipfs
Args:
folder_name (str): files in folder
content_type (str, optional): mime file type. Defaults to "image/png".
Returns:
list[dict]: ipfs info list, example: [{ 'Name': str, 'Hash': str, 'Size': str }]
"""
extension = content_type.split(os.sep)[-1]
file_paths = [
os.path.join(folder_name, file_path)
for file_path in list(
filter(lambda i: i.split(".")[-1] == extension, os.listdir(folder_name))
)
]
file_count = len(file_paths)
chunk_size = 10 # 10 per second for infura
chunks = [file_paths[i : i + chunk_size] for i in range(0, file_count, chunk_size)]
tasks = []
results = []

def complete_batch_callback(images_ipfs_data):
results.append(images_ipfs_data.result())
print(f"complete {len(results)/len(chunks):.2%}")

loop = asyncio.get_event_loop()
print(f"Total {len(file_count)} files to upload, estimate time: {len(chunks)+10}s")
for epoch, path_chunk in enumerate(chunks):
task = loop.create_task(upload_task(path_chunk, epoch))
tasks.append(task)
task.add_done_callback(complete_batch_callback)

loop.run_until_complete(asyncio.wait(tasks))
print(f"upload {len(results)} files complete.")
return results


def generate_metadata(
df: pd.DataFrame,
image_ipfs_data: dict,
image_ipfs_data: list[dict],
start_id: int = 0,
image_folder: str = IMAGES,
metadata_folder: str = METADATA,
Expand Down Expand Up @@ -133,26 +213,58 @@ def generate_metadata(
"attributes": attributes,
}
info_json = json.dumps(info_dict)
with open(os.path.join(metadata_folder, str(index)), "w") as f:
with open(os.path.join(metadata_folder, str(index) + ".json"), "w") as f:
f.write(info_json)

return (start_id, start_id + len(df) - 1)


def read_images_from_local() -> list[dict]:
"""
read images from local pickle
Returns:
list[dict]: images ipfs info
"""
with open("image_ipfs_data.backup", "r") as f:
result = json.loads(f.read())
print(f"read {len(result)} ipfs data from local")
return result


def download_and_save():
"""
upload images and get ipfs info
Returns:
list[dict]: images ipfs info
"""
all_ipfs_info_batch = upload_files(IMAGES)
image_ipfs_data = []
for batch_info in all_ipfs_info_batch:
for single_info in batch_info:
image_ipfs_data.append(single_info)
with open("image_ipfs_data.backup", "w") as f:
f.write(json.dumps(image_ipfs_data))
print("save image_ipfs_data to image_ipfs_data.backup")
return image_ipfs_data


if __name__ == "__main__":
df = RENAME_DF
if not READ_IMAGES:
image_ipfs_root, image_ipfs_data = upload_folder(IMAGES)
print(f"image_ipfs_root: {image_ipfs_root}")
# backup file use for debug upload images data
with open("image_ipfs_data.backup", "w") as f:
f.write(json.dumps(image_ipfs_data))
print("save image_ipfs_data to image_ipfs_data.backup")
if not PIN_FILES:
print(
f"Pin file is {PIN_FILES}, set PIN_FILES=True in config.py if want to pin files"
)
if os.path.exists("image_ipfs_data.backup"):
use_local = input("image_ipfs_data.backup exist, load from local? (y/n)")
if use_local == "y":
image_ipfs_data = read_images_from_local()
else:
image_ipfs_data = download_and_save()
else:
# if read images hashes from backup
with open("image_ipfs_data.backup", "r") as j:
image_ipfs_data = json.loads(j.read())
image_ipfs_data = download_and_save()

df = RENAME_DF
start, end = generate_metadata(df, image_ipfs_data, START_ID)
print(f"Generate metadata complete, Index from {start} to {end}")

Expand All @@ -161,5 +273,5 @@ def generate_metadata(
metadata_root, _ = upload_folder(METADATA, "application/json")
print(f"upload metadatas complete")
print(
f"Source url is {metadata_root}, you can visit ipfs://{metadata_root}/{start} to check"
f"Source url is {metadata_root}, you can visit ipfs://{metadata_root}/{start}.json to check"
)

0 comments on commit 5d0eadb

Please sign in to comment.