diff --git a/.gitignore b/.gitignore index 482942b..31ea801 100644 --- a/.gitignore +++ b/.gitignore @@ -43,8 +43,11 @@ pip-delete-this-directory.txt htmlcov/ .tox/ .coverage +.coveragerc +build .coverage.* .cache +.settings nosetests.xml coverage.xml *,cover diff --git a/README.rst b/README.rst index 3c6f113..dee5b94 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ scikit-neuralnetwork ==================== -Deep neural network implementation without the learning cliff! This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface. Oh, and it runs on your GPU by default. +Deep neural network implementation without the learning cliff! This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface. **NOTE**: This project is possible thanks to the `nucl.ai Conference `_ on **July 20-22**. Join us in **Vienna**! @@ -19,7 +19,7 @@ Thanks to the underlying ``pylearn2`` implementation, this library supports the * Linear: ``Linear``, ``Gaussian``, ``Softmax``. * **Layer Types —** ``Convolution`` (greyscale and color, 2D), ``Dense`` (standard, 1D). * **Learning Rules —** ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``rmsprop``. -* **Dataset Types —** ``numpy.ndarray``, coming soon ``scipy.sparse``. +* **Dataset Types —** ``numpy.ndarray``, ``scipy.sparse``, coming soon: iterators. If a feature you need is missing, consider opening a `GitHub Issue `_ with a detailed explanation about the use case and we'll see what we can do. diff --git a/docs/guide.rst b/docs/guide.rst index 40e0e84..ea57163 100644 --- a/docs/guide.rst +++ b/docs/guide.rst @@ -128,3 +128,20 @@ Here's how to setup such a pipeline with a multi-layer perceptron as a classifie pipeline.fit(X_train, y_train) You can thes use the pipeline as you would the neural network, or any other standard API from scikit-learn. + + +GPU Backend +----------- + +To setup the library to use your GPU or CPU explicitly in 32-bit or 64-bit mode, you can use the ``backend`` pseudo-module. It's a syntactic helper to setup ``THEANO_FLAGS`` in a Pythonic way, for example: + +.. code:: python + + # Use the GPU in 32-bit mode, falling back otherwise. + from sknn.backend import gpu32 + + # Use the CPU in 64-bit mode. + from sknn.backend import cpu64 + + +WARNING: This will only work if your program has not yet imported the ``theano`` module, due to the way the library is designed. If ``THEANO_FLAGS`` are set on the command-line, they are not overwridden. diff --git a/examples/plot_mlp.py b/examples/plot_mlp.py index 4ddd3f1..4e9ee09 100644 --- a/examples/plot_mlp.py +++ b/examples/plot_mlp.py @@ -11,8 +11,8 @@ import logging import argparse import itertools -import numpy as np +import numpy from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap @@ -24,8 +24,10 @@ import logging logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout) +from sknn.backend import gpu32 from sknn import mlp + # All possible parameter options that can be plotted, separately or combined. PARAMETERS = { 'activation': ['Rectifier', 'Tanh', 'Sigmoid', 'Maxout'], @@ -74,7 +76,7 @@ seed = int(time.time()) X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1) -rng = np.random.RandomState(seed+1) +rng = numpy.random.RandomState(seed+1) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) @@ -94,8 +96,8 @@ # Prepare coordinates of 2D grid to be visualized. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 - xx, yy = np.meshgrid(np.arange(x_min, x_max, GRID_RESOLUTION), - np.arange(y_min, y_max, GRID_RESOLUTION)) + xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, GRID_RESOLUTION), + numpy.arange(y_min, y_max, GRID_RESOLUTION)) # Plot the dataset on its own first. cm = plt.cm.get_cmap("PRGn") @@ -118,7 +120,7 @@ # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. - Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] + Z = clf.predict_proba(numpy.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) diff --git a/sknn/__init__.py b/sknn/__init__.py index 0307f4e..a31ad2d 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -1,4 +1,55 @@ -from __future__ import (absolute_import, unicode_literals) +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) __author__ = 'ssamot, alexjc' __version__ = '0.1' + + +import os +import sys +import logging + + +class TheanoConfigurator(object): + + def __init__(self): + self.configured = False + self.log = logging.getLogger('sknn') + + def configure(self, flags): + if self.configured is True: + return + self.configured = True + + if 'theano' in sys.modules: + self.log.warning('Theano was already imported and cannot be reconfigured.') + return + + os.environ.setdefault('THEANO_FLAGS', flags+',print_active_device=False') + cuda = logging.getLogger('theano.sandbox.cuda') + cuda.setLevel(logging.CRITICAL) + import theano + cuda.setLevel(logging.WARNING) + + try: + import theano.sandbox.cuda as cd + self.log.info('Using device gpu%i: %s', cd.active_device_number(), cd.active_device_name()) + except AttributeError: + self.log.info('Using device cpu0, with %r.', theano.config.floatX) + + def __getattr__(self, name): + flags = '' + if name.endswith('32'): + flags = ',floatX=float32' + if name.endswith('64'): + flags = ',floatX=float64' + + if name.startswith('cpu'): + return self.configure('device=cpu'+flags) + if name.startswith('gpu'): + return self.configure('device=gpu'+flags) + + return getattr(sys.modules['sknn'], name) + + +sys.modules['sknn.backend'] = TheanoConfigurator() diff --git a/sknn/dataset.py b/sknn/dataset.py index d4954cf..7e1894a 100644 --- a/sknn/dataset.py +++ b/sknn/dataset.py @@ -7,9 +7,7 @@ from pylearn2.utils.iteration import (FiniteDatasetIterator, resolve_iterator_class) import functools - import theano -floatX = theano.config.floatX class SparseDesignMatrix(Dataset): @@ -83,7 +81,7 @@ def iterator(self, mode=None, batch_size=None, num_batches=None, sub_spaces = space.components sub_sources = source - conv_fn = lambda x: x.todense().astype(floatX) + conv_fn = lambda x: x.todense().astype(theano.config.floatX) convert = [] for sp, src in safe_zip(sub_spaces, sub_sources): convert.append(conv_fn if src in ('features', 'targets') else None) diff --git a/sknn/mlp.py b/sknn/mlp.py index 174a692..e87648a 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -11,17 +11,8 @@ log = logging.getLogger('sknn') -# By default, we force Theano to use a GPU and fallback to CPU, using 32-bits. -# This must be done in the code before Theano is imported for the first time. -os.environ['THEANO_FLAGS'] = "device=gpu,floatX=float32" - -cuda = logging.getLogger('theano.sandbox.cuda') -cuda.setLevel(logging.CRITICAL) -import theano -cuda.setLevel(logging.WARNING) - - import numpy +import theano import sklearn.base import sklearn.pipeline import sklearn.preprocessing diff --git a/sknn/tests/test_backend.py b/sknn/tests/test_backend.py new file mode 100644 index 0000000..63cf846 --- /dev/null +++ b/sknn/tests/test_backend.py @@ -0,0 +1,63 @@ +import unittest +from nose.tools import (assert_in, assert_equal) + +import io +import os +import sys +import logging + +import sknn + + +class TestBackendPseudoModule(unittest.TestCase): + + def setUp(self): + if 'THEANO_FLAGS' in os.environ: + del os.environ['THEANO_FLAGS'] + + import theano + + self.removed = {} + for name in list(sys.modules.keys()): + if name.startswith('theano'): + self.removed[name] = sys.modules[name] + del sys.modules[name] + sys.modules['sknn.backend'].configured = False + + self.buf = io.StringIO() + self.hnd = logging.StreamHandler(self.buf) + logging.getLogger('sknn').addHandler(self.hnd) + logging.getLogger().setLevel(logging.WARNING) + + def tearDown(self): + for name, module in self.removed.items(): + sys.modules[name] = module + logging.getLogger('sknn').removeHandler(self.hnd) + + def test_TheanoWarning(self): + import theano + from sknn.backend import cpu + assert_equal('Theano was already imported and cannot be reconfigured.\n', + self.buf.getvalue()) + + def _check(self, flags): + assert_in('THEANO_FLAGS', os.environ) + variable = os.environ['THEANO_FLAGS'] + for f in flags: + assert_in(f, variable) + + def test_FlagsGPU32(self): + from sknn.backend import gpu32 + self._check(['floatX=float32','device=gpu']) + + def test_FlagsCPU32(self): + from sknn.backend import cpu32 + self._check(['floatX=float32','device=cpu']) + + def test_FlagsGPU64(self): + from sknn.backend import gpu64 + self._check(['floatX=float64','device=gpu']) + + def test_FlagsCPU64(self): + from sknn.backend import cpu64 + self._check(['floatX=float64','device=cpu'])