-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo_pytorch.py
82 lines (61 loc) · 2.59 KB
/
demo_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
##
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
"""
Install: PyCylon (Follow: https://cylondata.org/docs/)
Run Program: python demo_pytorch.py
"""
import os
import numpy as np
import pandas as pd
from pycylon import CylonContext
from pycylon import Table
from pycylon.csv import csv_reader
ctx: CylonContext = CylonContext(config='mpi')
base_path = "/tmp"
rank = ctx.get_rank()
user_devices_file = os.path.join(base_path, f'user_device_tm_{rank+1}.csv')
user_usage_file = os.path.join(base_path, f'user_usage_tm_{rank+1}.csv')
user_devices_data: Table = csv_reader.read(ctx, user_devices_file, ',')
user_usage_data: Table = csv_reader.read(ctx, user_usage_file, ',')
user_devices_df: pd.DataFrame = user_devices_data.to_pandas()
user_usage_df: pd.DataFrame = user_usage_data.to_pandas()
print(f"User Devices Data Rows:{user_devices_data.rows}, Columns: {user_devices_data.columns}")
print(f"User Usage Data Rows:{user_usage_data.rows}, Columns: {user_usage_data.columns}")
print("--------------------------------")
print("Before Join")
print("--------------------------------")
user_devices_data.show_by_range(1, 5, 0, 4)
print("-------------------------------------")
user_usage_data.show_by_range(1, 5, 0, 4)
new_tb: Table = user_devices_data.distributed_join(ctx, table=user_usage_data, join_type='inner', algorithm='sort', left_col=0,
right_col=3)
print("----------------------")
print("New Table After Join (5 Records)")
new_tb.show_by_range(0, 5, 0, 8)
print("----------------------")
data_ar: np.ndarray = new_tb.to_numpy()
data_features: np.ndarray = data_ar[:, 2:6]
data_learner: np.ndarray = data_ar[:, 6:7]
x_train, y_train = data_features[0:100], data_learner[0:100]
x_test, y_test = data_features[100:], data_learner[100:]
x_train = np.asarray(x_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
x_test = np.asarray(x_test, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)
import torch
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test)
ctx.finalize()