diff --git a/.gitignore b/.gitignore
index 482942b..31ea801 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,8 +43,11 @@ pip-delete-this-directory.txt
 htmlcov/
 .tox/
 .coverage
+.coveragerc
+build
 .coverage.*
 .cache
+.settings
 nosetests.xml
 coverage.xml
 *,cover
diff --git a/README.rst b/README.rst
index 3c6f113..dee5b94 100644
--- a/README.rst
+++ b/README.rst
@@ -1,7 +1,7 @@
 scikit-neuralnetwork
 ====================
 
-Deep neural network implementation without the learning cliff!  This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface. Oh, and it runs on your GPU by default.
+Deep neural network implementation without the learning cliff!  This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface.
 
 **NOTE**: This project is possible thanks to the `nucl.ai Conference <http://nucl.ai/>`_ on **July 20-22**. Join us in **Vienna**!
 
@@ -19,7 +19,7 @@ Thanks to the underlying ``pylearn2`` implementation, this library supports the
     * Linear: ``Linear``, ``Gaussian``, ``Softmax``.
 * **Layer Types —** ``Convolution`` (greyscale and color, 2D), ``Dense`` (standard, 1D).
 * **Learning Rules —** ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``rmsprop``.
-* **Dataset Types —** ``numpy.ndarray``, coming soon ``scipy.sparse``.
+* **Dataset Types —** ``numpy.ndarray``, ``scipy.sparse``, coming soon: iterators.
 
 If a feature you need is missing, consider opening a `GitHub Issue <https://github.com/aigamedev/scikit-neuralnetwork/issues>`_ with a detailed explanation about the use case and we'll see what we can do.
 
diff --git a/docs/guide.rst b/docs/guide.rst
index 40e0e84..ea57163 100644
--- a/docs/guide.rst
+++ b/docs/guide.rst
@@ -128,3 +128,20 @@ Here's how to setup such a pipeline with a multi-layer perceptron as a classifie
     pipeline.fit(X_train, y_train)
 
 You can thes use the pipeline as you would the neural network, or any other standard API from scikit-learn.
+
+
+GPU Backend
+-----------
+
+To setup the library to use your GPU or CPU explicitly in 32-bit or 64-bit mode, you can use the ``backend`` pseudo-module.  It's a syntactic helper to setup ``THEANO_FLAGS`` in a Pythonic way, for example:
+
+.. code:: python
+
+    # Use the GPU in 32-bit mode, falling back otherwise.
+    from sknn.backend import gpu32
+    
+    # Use the CPU in 64-bit mode.
+    from sknn.backend import cpu64
+    
+
+WARNING: This will only work if your program has not yet imported the ``theano`` module, due to the way the library is designed.  If ``THEANO_FLAGS`` are set on the command-line, they are not overwridden.
diff --git a/examples/plot_mlp.py b/examples/plot_mlp.py
index 4ddd3f1..4e9ee09 100644
--- a/examples/plot_mlp.py
+++ b/examples/plot_mlp.py
@@ -11,8 +11,8 @@
 import logging
 import argparse
 import itertools
-import numpy as np
 
+import numpy
 from matplotlib import pyplot as plt
 from matplotlib.colors import ListedColormap
 
@@ -24,8 +24,10 @@
 import logging
 logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout)
 
+from sknn.backend import gpu32
 from sknn import mlp
 
+
 # All possible parameter options that can be plotted, separately or combined.
 PARAMETERS = {
     'activation': ['Rectifier', 'Tanh', 'Sigmoid', 'Maxout'],
@@ -74,7 +76,7 @@
 seed = int(time.time())
 X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                            random_state=0, n_clusters_per_class=1)
-rng = np.random.RandomState(seed+1)
+rng = numpy.random.RandomState(seed+1)
 X += 2 * rng.uniform(size=X.shape)
 linearly_separable = (X, y)
 
@@ -94,8 +96,8 @@
     # Prepare coordinates of 2D grid to be visualized.
     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
-    xx, yy = np.meshgrid(np.arange(x_min, x_max, GRID_RESOLUTION),
-                         np.arange(y_min, y_max, GRID_RESOLUTION))
+    xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, GRID_RESOLUTION),
+                            numpy.arange(y_min, y_max, GRID_RESOLUTION))
 
     # Plot the dataset on its own first.
     cm = plt.cm.get_cmap("PRGn")
@@ -118,7 +120,7 @@
         # Plot the decision boundary. For that, we will assign a color to each
         # point in the mesh [x_min, m_max]x[y_min, y_max].
 
-        Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+        Z = clf.predict_proba(numpy.c_[xx.ravel(), yy.ravel()])[:, 1]
 
         # Put the result into a color plot
         Z = Z.reshape(xx.shape)
diff --git a/sknn/__init__.py b/sknn/__init__.py
index 0307f4e..a31ad2d 100644
--- a/sknn/__init__.py
+++ b/sknn/__init__.py
@@ -1,4 +1,55 @@
-from __future__ import (absolute_import, unicode_literals)
+# -*- coding: utf-8 -*-
+from __future__ import (absolute_import, unicode_literals, print_function)
 
 __author__ = 'ssamot, alexjc'
 __version__ = '0.1'
+
+
+import os
+import sys
+import logging
+
+
+class TheanoConfigurator(object):
+
+    def __init__(self):
+        self.configured = False
+        self.log = logging.getLogger('sknn')
+
+    def configure(self, flags):
+        if self.configured is True:
+            return
+        self.configured = True
+        
+        if 'theano' in sys.modules:
+            self.log.warning('Theano was already imported and cannot be reconfigured.')
+            return
+
+        os.environ.setdefault('THEANO_FLAGS', flags+',print_active_device=False')
+        cuda = logging.getLogger('theano.sandbox.cuda')
+        cuda.setLevel(logging.CRITICAL)
+        import theano
+        cuda.setLevel(logging.WARNING)
+
+        try:
+            import theano.sandbox.cuda as cd
+            self.log.info('Using device gpu%i: %s', cd.active_device_number(), cd.active_device_name())
+        except AttributeError:
+            self.log.info('Using device cpu0, with %r.', theano.config.floatX)
+
+    def __getattr__(self, name):
+        flags = ''
+        if name.endswith('32'):
+            flags = ',floatX=float32'
+        if name.endswith('64'):
+            flags = ',floatX=float64'
+
+        if name.startswith('cpu'):
+            return self.configure('device=cpu'+flags)
+        if name.startswith('gpu'):
+            return self.configure('device=gpu'+flags)
+
+        return getattr(sys.modules['sknn'], name)
+
+
+sys.modules['sknn.backend'] = TheanoConfigurator()
diff --git a/sknn/dataset.py b/sknn/dataset.py
index d4954cf..7e1894a 100644
--- a/sknn/dataset.py
+++ b/sknn/dataset.py
@@ -7,9 +7,7 @@
 from pylearn2.utils.iteration import (FiniteDatasetIterator, resolve_iterator_class)
 
 import functools
-
 import theano
-floatX = theano.config.floatX
 
 
 class SparseDesignMatrix(Dataset):
@@ -83,7 +81,7 @@ def iterator(self, mode=None, batch_size=None, num_batches=None,
         sub_spaces = space.components
         sub_sources = source
 
-        conv_fn = lambda x: x.todense().astype(floatX)
+        conv_fn = lambda x: x.todense().astype(theano.config.floatX)
         convert = []
         for sp, src in safe_zip(sub_spaces, sub_sources):
             convert.append(conv_fn if src in ('features', 'targets') else None)
diff --git a/sknn/mlp.py b/sknn/mlp.py
index 174a692..e87648a 100644
--- a/sknn/mlp.py
+++ b/sknn/mlp.py
@@ -11,17 +11,8 @@
 log = logging.getLogger('sknn')
 
 
-# By default, we force Theano to use a GPU and fallback to CPU, using 32-bits.
-# This must be done in the code before Theano is imported for the first time.
-os.environ['THEANO_FLAGS'] = "device=gpu,floatX=float32"
-
-cuda = logging.getLogger('theano.sandbox.cuda')
-cuda.setLevel(logging.CRITICAL)
-import theano
-cuda.setLevel(logging.WARNING)
-
-
 import numpy
+import theano
 import sklearn.base
 import sklearn.pipeline
 import sklearn.preprocessing
diff --git a/sknn/tests/test_backend.py b/sknn/tests/test_backend.py
new file mode 100644
index 0000000..63cf846
--- /dev/null
+++ b/sknn/tests/test_backend.py
@@ -0,0 +1,63 @@
+import unittest
+from nose.tools import (assert_in, assert_equal)
+
+import io
+import os
+import sys
+import logging
+
+import sknn
+
+
+class TestBackendPseudoModule(unittest.TestCase):
+
+    def setUp(self):
+        if 'THEANO_FLAGS' in os.environ:
+            del os.environ['THEANO_FLAGS']
+        
+        import theano
+
+        self.removed = {}
+        for name in list(sys.modules.keys()):
+            if name.startswith('theano'):
+                self.removed[name] = sys.modules[name]
+                del sys.modules[name]
+        sys.modules['sknn.backend'].configured = False
+
+        self.buf = io.StringIO()
+        self.hnd = logging.StreamHandler(self.buf)
+        logging.getLogger('sknn').addHandler(self.hnd)
+        logging.getLogger().setLevel(logging.WARNING)
+
+    def tearDown(self):
+        for name, module in self.removed.items():
+            sys.modules[name] = module
+        logging.getLogger('sknn').removeHandler(self.hnd)
+
+    def test_TheanoWarning(self):
+        import theano
+        from sknn.backend import cpu
+        assert_equal('Theano was already imported and cannot be reconfigured.\n',
+                     self.buf.getvalue())
+
+    def _check(self, flags):
+        assert_in('THEANO_FLAGS', os.environ)
+        variable = os.environ['THEANO_FLAGS']
+        for f in flags:
+            assert_in(f, variable)
+
+    def test_FlagsGPU32(self):
+        from sknn.backend import gpu32
+        self._check(['floatX=float32','device=gpu'])
+
+    def test_FlagsCPU32(self):
+        from sknn.backend import cpu32
+        self._check(['floatX=float32','device=cpu'])
+
+    def test_FlagsGPU64(self):
+        from sknn.backend import gpu64
+        self._check(['floatX=float64','device=gpu'])
+
+    def test_FlagsCPU64(self):
+        from sknn.backend import cpu64
+        self._check(['floatX=float64','device=cpu'])