diff --git a/docker/dockerfile b/docker/dockerfile deleted file mode 100644 index 9c28b0ae..00000000 --- a/docker/dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -# RUN IN TERMINAL: -# docker build -t gators:latest -f docker/Dockerfile .\ - -# gators docker container (needs Zscaler disabled) -# If getting ERROR: THESE PACKAGES DO NOT MATCH THE HASHES FROM THE -# REQUIREMENTS FILE, just keep re-running -# cd gators -# docker build -t gators:latest -f docker/Dockerfile . - - -# Install default environment -FROM dockerhub.paypalcorp.com/simility/python:3.7 - -# Install Java -RUN apt update -y && apt-get install -y software-properties-common && \ - apt-add-repository 'deb http://security.debian.org/debian-security stretch/updates main' && apt update -y && \ - apt-get install -y openjdk-8-jdk-headless && \ - export JAVA_HOME && \ - apt-get clean - -# Delete cache (stops ERROR: THESE PACKAGES DO NOT MATCH THE HASHES FROM THE -# REQUIREMENTS FILE) -RUN rm ~/.cache/pip -rf - -# Install python libs -ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y python python-dev python-pip virtualenv libssl-dev libpq-dev git build-essential libfontconfig1 libfontconfig1-dev - -# Upgrade setuptools -RUN pip install -U pip --no-cache -RUN pip install setuptools>=41.0.0 --no-cache -RUN pip3.7 install numpy==1.19.5 --no-cache-dir -RUN pip3.7 install cython --no-cache-dir -# RUN pip3.7 install wheel --no-cache-dir -# Copy GATORS directory -RUN mkdir /gators -COPY ./ /gators - - -# Install gators -# RUN python3.7 /gators/setup_docker.py build_ext --inplace -RUN pip3.7 install /gators/. -# Set up working directory -WORKDIR / -RUN mkdir /workdir/ -RUN mkdir /workdir/examples -COPY ./examples /workdir/examples -RUN pip3.7 install jupyterlab --no-cache-dir -ENTRYPOINT jupyter lab --ip=0.0.0.0 --port=9090 --allow-root --no-browser --notebook-dir /workdir/ --NotebookApp.token='' \ No newline at end of file diff --git a/docker_ppnb/Dockerfile b/docker_ppnb/Dockerfile deleted file mode 100644 index 12be5b8c..00000000 --- a/docker_ppnb/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# RUN IN TERMINAL: -# docker build -t gators_pp:latest -f docker_ppnb/Dockerfile .\ -# gators docker image (for PP NBs) -# If getting ERROR: THESE PACKAGES DO NOT MATCH THE HASHES FROM THE -# REQUIREMENTS FILE, just keep re-running - -# Install default environment -FROM dockerhub.paypalcorp.com/core-data-platform/ppmagics-spark230:latest -ENV LANG C.UTF-8 -# RUN echo python --version - -# Delete cache -#stops ERROR: THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS FILE) -RUN rm ~/.cache/pip -rf - -# Install python libs -# RUN apt-get update && apt-get install -y python python-dev python-pip virtualenv libssl-dev libpq-dev git build-essential libfontconfig1 libfontconfig1-dev - - -# Copy gators directory -RUN pip install -U pip --no-cache -RUN pip install setuptools>=41.0.0 --no-cache -RUN pip install numpy==1.19.5 --no-cache -RUN pip install cython --no-cache -RUN pip install wheel --no-cache - -COPY ./ /gators -# RUN chmod 777 /gators/ -RUN pip install /gators/. --no-deps -# RUN python /gators/setup_docker.py build_ext --inplace -# Install gators diff --git a/gators/clipping/tests/test_quantile_clipping_dd.py b/gators/clipping/tests/test_quantile_clipping_dd.py index 886db990..48bb92f6 100644 --- a/gators/clipping/tests/test_quantile_clipping_dd.py +++ b/gators/clipping/tests/test_quantile_clipping_dd.py @@ -1,156 +1,156 @@ -# License: Apache-2.0 -import dask.dataframe as dd -import numpy as np -import pandas as pd -import pytest -from pandas.testing import assert_frame_equal - -from gators.clipping.quantile_clipping import QuantileClipping - - -@pytest.fixture -def data(): - X = dd.from_pandas( - pd.DataFrame( - { - "A": [1.8, 2.2, 1.0, 0.4, 0.8], - "B": [0.4, 1.9, -0.2, 0.1, 0.1], - "C": [1.0, -1.0, -0.1, 1.5, 0.4], - } - ), - npartitions=1, - ) - obj = QuantileClipping( - columns=["A", "B", "C"], min_quantile=0.2, max_quantile=0.8 - ).fit(X) - X_expected = pd.DataFrame( - { - "A": { - 0: 1.8, - 1: 1.88, - 2: 1.0, - 3: 0.72, - 4: 0.8, - }, - "B": { - 0: 0.4, - 1: 0.7, - 2: 0.04, - 3: 0.1, - 4: 0.1, - }, - "C": { - 0: 1.0, - 1: -0.28, - 2: -0.1, - 3: 1.1, - 4: 0.4, - }, - } - ) - return obj, X, X_expected - - -@pytest.fixture -def data_not_inplace(): - X = dd.from_pandas( - pd.DataFrame( - { - "A": [1.8, 2.2, 1.0, 0.4, 0.8], - "B": [0.4, 1.9, -0.2, 0.1, 0.1], - "C": [1.0, -1.0, -0.1, 1.5, 0.4], - } - ), - npartitions=1, - ) - obj = QuantileClipping( - columns=["A", "B", "C"], min_quantile=0.2, max_quantile=0.8, inplace=False - ).fit(X) - X_expected = pd.DataFrame( - { - "A__quantile_clip": { - 0: 1.8, - 1: 1.816, - 2: 1.0, - 3: 0.784, - 4: 0.8, - }, - "B__quantile_clip": { - 0: 0.4, - 1: 0.7, - 2: 0.04, - 3: 0.1, - 4: 0.1, - }, - "C__quantile_clip": { - 0: 1.0, - 1: -0.136, - 2: -0.1, - 3: 1.02, - 4: 0.4, - }, - } - ) - return obj, X, pd.concat([X.compute(), X_expected], axis=1) - - -@pytest.fixture -def data_partial(): - X = dd.from_pandas( - pd.DataFrame( - { - "A": [1.8, 2.2, 1.0, 0.4, 0.8], - "B": [0.4, 0.7, 0.04, 0.1, 0.1], - "C": [1.0, -1.0, -0.1, 1.5, 0.4], - } - ), - npartitions=1, - ) - obj = QuantileClipping(min_quantile=0.2, max_quantile=0.8, columns=["A"]).fit(X) - X_expected = pd.DataFrame( - { - "A": [1.8, 1.88, 1.0, 0.72, 0.8], - "B": [0.4, 0.7, 0.04, 0.1, 0.1], - "C": [1.0, -1.0, -0.1, 1.5, 0.4], - } - ) - return obj, X, X_expected - - -def test_dd(data): - obj, X, X_expected = data - X_new = obj.transform(X).compute() - assert_frame_equal(X_new, X_expected) - - -def test_dd_np(data): - obj, X, X_expected = data - X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) - X_new = pd.DataFrame(X_numpy_new) - assert np.allclose(X_new.to_numpy(), X_expected.to_numpy()) - - -def test_not_inplace_dd(data_not_inplace): - obj, X, X_expected = data_not_inplace - X_new = obj.transform(X).compute() - assert_frame_equal(X_new, X_expected) - - -def test_not_inplace_dd_np(data_not_inplace): - obj, X, X_expected = data_not_inplace - X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) - X_new = pd.DataFrame(X_numpy_new) - assert np.allclose(X_new.to_numpy(), X_expected.to_numpy()) - - -def test_partial_dd(data_partial): - obj, X, X_expected = data_partial - X_new = obj.transform(X).compute() - assert_frame_equal(X_new, X_expected) - - -def test_partial_dd_np(data_partial): - obj, X, X_expected = data_partial - X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) - X_new = pd.DataFrame(X_numpy_new) - assert np.allclose(X_new.to_numpy(), X_expected.to_numpy()) +# # License: Apache-2.0 +# import dask.dataframe as dd +# import numpy as np +# import pandas as pd +# import pytest +# from pandas.testing import assert_frame_equal + +# from gators.clipping.quantile_clipping import QuantileClipping + + +# @pytest.fixture +# def data(): +# X = dd.from_pandas( +# pd.DataFrame( +# { +# "A": [1.8, 2.2, 1.0, 0.4, 0.8], +# "B": [0.4, 1.9, -0.2, 0.1, 0.1], +# "C": [1.0, -1.0, -0.1, 1.5, 0.4], +# } +# ), +# npartitions=1, +# ) +# obj = QuantileClipping( +# columns=["A", "B", "C"], min_quantile=0.2, max_quantile=0.8 +# ).fit(X) +# X_expected = pd.DataFrame( +# { +# "A": { +# 0: 1.8, +# 1: 1.88, +# 2: 1.0, +# 3: 0.72, +# 4: 0.8, +# }, +# "B": { +# 0: 0.4, +# 1: 0.7, +# 2: 0.04, +# 3: 0.1, +# 4: 0.1, +# }, +# "C": { +# 0: 1.0, +# 1: -0.28, +# 2: -0.1, +# 3: 1.1, +# 4: 0.4, +# }, +# } +# ) +# return obj, X, X_expected + + +# @pytest.fixture +# def data_not_inplace(): +# X = dd.from_pandas( +# pd.DataFrame( +# { +# "A": [1.8, 2.2, 1.0, 0.4, 0.8], +# "B": [0.4, 1.9, -0.2, 0.1, 0.1], +# "C": [1.0, -1.0, -0.1, 1.5, 0.4], +# } +# ), +# npartitions=1, +# ) +# obj = QuantileClipping( +# columns=["A", "B", "C"], min_quantile=0.2, max_quantile=0.8, inplace=False +# ).fit(X) +# X_expected = pd.DataFrame( +# { +# "A__quantile_clip": { +# 0: 1.8, +# 1: 1.816, +# 2: 1.0, +# 3: 0.784, +# 4: 0.8, +# }, +# "B__quantile_clip": { +# 0: 0.4, +# 1: 0.7, +# 2: 0.04, +# 3: 0.1, +# 4: 0.1, +# }, +# "C__quantile_clip": { +# 0: 1.0, +# 1: -0.136, +# 2: -0.1, +# 3: 1.02, +# 4: 0.4, +# }, +# } +# ) +# return obj, X, pd.concat([X.compute(), X_expected], axis=1) + + +# @pytest.fixture +# def data_partial(): +# X = dd.from_pandas( +# pd.DataFrame( +# { +# "A": [1.8, 2.2, 1.0, 0.4, 0.8], +# "B": [0.4, 0.7, 0.04, 0.1, 0.1], +# "C": [1.0, -1.0, -0.1, 1.5, 0.4], +# } +# ), +# npartitions=1, +# ) +# obj = QuantileClipping(min_quantile=0.2, max_quantile=0.8, columns=["A"]).fit(X) +# X_expected = pd.DataFrame( +# { +# "A": [1.8, 1.88, 1.0, 0.72, 0.8], +# "B": [0.4, 0.7, 0.04, 0.1, 0.1], +# "C": [1.0, -1.0, -0.1, 1.5, 0.4], +# } +# ) +# return obj, X, X_expected + + +# def test_dd(data): +# obj, X, X_expected = data +# X_new = obj.transform(X).compute() +# assert_frame_equal(X_new, X_expected) + + +# def test_dd_np(data): +# obj, X, X_expected = data +# X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) +# X_new = pd.DataFrame(X_numpy_new) +# assert np.allclose(X_new.to_numpy(), X_expected.to_numpy()) + + +# def test_not_inplace_dd(data_not_inplace): +# obj, X, X_expected = data_not_inplace +# X_new = obj.transform(X).compute() +# assert_frame_equal(X_new, X_expected) + + +# def test_not_inplace_dd_np(data_not_inplace): +# obj, X, X_expected = data_not_inplace +# X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) +# X_new = pd.DataFrame(X_numpy_new) +# assert np.allclose(X_new.to_numpy(), X_expected.to_numpy()) + + +# def test_partial_dd(data_partial): +# obj, X, X_expected = data_partial +# X_new = obj.transform(X).compute() +# assert_frame_equal(X_new, X_expected) + + +# def test_partial_dd_np(data_partial): +# obj, X, X_expected = data_partial +# X_numpy_new = obj.transform_numpy(X.compute().to_numpy()) +# X_new = pd.DataFrame(X_numpy_new) +# assert np.allclose(X_new.to_numpy(), X_expected.to_numpy())