Skip to content

Commit

Permalink
[DEVX:179]: Reuse Dataset Upload examples (#146)
Browse files Browse the repository at this point in the history
* reuse dataset upload examples
  • Loading branch information
sainivedh authored Aug 21, 2023
1 parent 177113e commit 61c41b3
Show file tree
Hide file tree
Showing 43 changed files with 571 additions and 0 deletions.
31 changes: 31 additions & 0 deletions clarifai/datasets/upload/examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
## Dataset upload from local directory

Examples of how to upload your local directory datasets into clarifai app using `module_dir` feature from `Dataset`.

**Note:**
**Note:**

- Ensure that the `CLARIFAI_PAT` environment variable is set.
- Ensure that the appropriate base workflow is being set for indexing respective input type.


## Image Classification - Cifar10
```python
from clarifai.client.dataset import Dataset
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
dataset.upload_dataset(task="visual_classification", split="train", module_dir="path_to_cifar10_module")
```

## Image Classification - [Food-101](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/)
```python
from clarifai.client.dataset import Dataset
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
dataset.upload_dataset(task="visual_classification", split="train", module_dir="path_to_food-101_module")
```

## Text Classification - IMDB Reviews
```python
from clarifai.client.dataset import Dataset
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
dataset.upload_dataset(task="text_clf", split="train", module_dir="path_to_imdb_reviews_module")
```
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
image_path,label
images/test_batch_700.jpg,horse
images/test_batch_701.jpg,bird
images/test_batch_702.jpg,deer
images/test_batch_703.jpg,ship
images/test_batch_704.jpg,horse
images/test_batch_705.jpg,deer
images/test_batch_706.jpg,bird
images/test_batch_707.jpg,truck
images/test_batch_709.jpg,ship
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
image_path,label
images/test_batch_700.jpg,horse
images/test_batch_701.jpg,bird
images/test_batch_702.jpg,deer
images/test_batch_703.jpg,ship
images/test_batch_704.jpg,horse
images/test_batch_705.jpg,deer
images/test_batch_706.jpg,bird
images/test_batch_707.jpg,truck
images/test_batch_709.jpg,ship
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#! Cifar10 Dataset

import csv
import os

from clarifai.datasets.upload.base import ClarifaiDataLoader
from clarifai.datasets.upload.features import VisualClassificationFeatures


class Cifar10DataLoader(ClarifaiDataLoader):
"""Cifar10 Dataset."""

def __init__(self, split: str = "train"):
"""Initialize dataset params.
Args:
split: "train" or "test"
"""
self.split = split
self.data_dirs = {
"train": os.path.join(os.path.dirname(__file__), "cifar_small_train.csv"),
"test": os.path.join(os.path.dirname(__file__), "cifar_small_test.csv")
}
self.data = self.load_data()

def load_data(self):
data = []
with open(self.data_dirs[self.split]) as _file:
reader = csv.reader(_file)
next(reader, None) # skip header
for review in reader:
data.append((review[0], review[1]))
return data

def __getitem__(self, index):
item = self.data[index]
return VisualClassificationFeatures(
image_path=os.path.join(os.path.dirname(__file__), item[0]),
label=item[1],
id=os.path.basename(item[0]).split(".")[0])

def __len__(self):
return len(self.data)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os

from clarifai.datasets.upload.base import ClarifaiDataLoader
from clarifai.datasets.upload.features import VisualClassificationFeatures


class Food101DataLoader(ClarifaiDataLoader):
"""Food-101 Image Classification Dataset."""

def __init__(self, split: str = "train"):
"""Initialize dataset params.
Args:
split: "train" or "test"
"""
self.split = split
self.image_dir = {"train": os.path.join(os.path.dirname(__file__), "images")}
self.load_data()

def load_data(self):
"""Load data for the food-101 dataset."""
self.data = []
class_names = os.listdir(self.image_dir[self.split])
for class_name in class_names:
for image in os.listdir(os.path.join(self.image_dir[self.split], class_name)):
image_path = os.path.join(self.image_dir[self.split], class_name, image)
self.data.append({
"image_path": image_path,
"class_name": class_name,
})

def __getitem__(self, idx):
data_item = self.data[idx]
image_path = data_item["image_path"]
class_name = data_item["class_name"]
return VisualClassificationFeatures(
image_path=image_path, label=class_name, id=os.path.basename(image_path).split(".")[0])

def __len__(self):
return len(self.data)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import csv
import os

from clarifai.datasets.upload.base import ClarifaiDataLoader
from clarifai.datasets.upload.features import TextFeatures


class IMDBMovieReviewsDataLoader(ClarifaiDataLoader):
"""IMDB 50K Movie Reviews Dataset."""

def __init__(self, split: str = "train"):
"""Initialize dataset params.
Args:
split: "train" or "test"
"""
self.split = split
self.data_dirs = {
"train": os.path.join(os.path.dirname(__file__), "train.csv"),
"test": os.path.join(os.path.dirname(__file__), "test.csv")
}
self.data = []

self.load_data()

def load_data(self):
with open(self.data_dirs[self.split]) as _file:
reader = csv.reader(_file)
next(reader, None) # skip header
for review in reader:
self.data.append({"text": review[0], "labels": review[1], "id": None})

def __getitem__(self, idx):
item = self.data[idx]
return TextFeatures(text=item["text"], labels=item["labels"], id=item["id"])

def __len__(self):
return len(self.data)
Loading

0 comments on commit 61c41b3

Please sign in to comment.