Skip to content

Commit

Permalink
Squashed 'libs/blocks/' changes from 72bb20b..38535d8
Browse files Browse the repository at this point in the history
38535d8 Merge remote-tracking branch 'blocks-origin/master' into new_blocks
ed481ef Merge pull request #953 from dwf/shut_up_sqlite
417dbdf Skip test_save_the_best with sqlite.
f5ee622 Merge pull request #946 from vdumoulin/abstract_conv
648646d Add test for ConvolutionalTransposeActivation
87d143f Reorder constructor args to help remove duplicate code
843d3ff Replace deprecated `image_shape` kwarg with `input_shape`
65e9435 Clarify parameter description
6712e44 Make ConvolutionalTranspose inherit from Convolutional
ff08eb8 Reduce repeated code in constructors.
abf41f8 Make imshp not depend on input_
7c895aa Construct grad op using constant values rather than depend on a shared variable
2a68d89 Fix superclass call
836267d Revert to not forcing compilation mode
c237b51 Comment on the use of FAST_RUN mode in blocks.bricks.conv tests
fa4f314 Fix conv tests failing to compile Theano functions
c985b30 Fix import error
ee5efc6 Fix oversights
071ad2b Add ConvolutionTranspose and ConvolutionTransposeActivation
9255647 Use Theano's new abstract_conv interface
348de31 Adapt Pooling to Theano's downsample -> pool renaming
98797a1 Merge pull request #941 from dwf/batch_norm_bricks
90cc8bb Fix StepRules to honour broadcastable.
d9171ff Move __init__ logic to _allocate in SBN.
a88b193 batch_normalization_updates -> get_batch_normalization_updates
725ec01 Idiomatize test.
4b39c93 Remove from 'with' for clarity.
e714903 Make duplicate updates docs more explicit.
333b49c Rename W, b -> scale, shift.
09da496 Add batch_normalization_updates.
d81c03c Annotate population parameters with brick.
474d3f1 Fix subtle bug with mutable metadata.
b03fffc Fix typo in epsilon assignment.
6895b87 Remove needless properties.
f41c286 Improve _add_batch_axis.
cf4f4db Ensure correct nesting of context managers.
9805b73 Improve docs. Add example for apply_...
7efa956 Correct the assertion.
5ba9b38 Add comment and assertion following review.
c70a26b Fix Scrutinizer error from 6ba3364.
2ea41a8 Improve robustness of context manager logic.
e22b151 Add an assertion following review.
a26e2c6 Simplify generator expression after review.
d35c22b Amend error message following review.
2e7cd0e Rename save_memory -> conserve_memory.
3dc9135 More tests, move graph tests to tests/graph.
ad3a84d Fix doctest.
73a6902 Comments explaining tests.
93e55f6 Big refactor to enable context manager operation.
59fd7cd Add brick filter and smoke test for bug fixed.
b358849 Remove leading batch axis from BN shared variables.
02982e9 Correctly crawl the graph backward.
f26c1e5 Expose save_memory in BatchNormalizedMLP.
7ede89b Make batch_normalize return the pop->minibatch map.
9ff63b0 Add tests.
6b52b35 Refactor batchnorm + graph module.
9b94116 Reorganize bricks subpackage.
0f6623f Initial batch-normalization implementation.
2046525 Merge pull request #944 from mila-udem/bound_application_error_msg
07d43ae Correct error message in filter.py.
07cec2c Merge pull request #774 from lamblin/update_doc
25c5141 Merge pull request #940 from rizar/without_namespace_packages
c0640b0 Disable namespace packages suppport
83c5d2a Merge pull request #838 from matthiasreisser/docstring_additions
260296d Merge pull request #929 from dwf/conv_doc_fixes
acf9c80 Merge pull request #930 from r2007/patch-1
353d525 Update bricks_overview.rst
506dfe1 Merge pull request #928 from dwf/shared_floatx_kwargs
b7f5df3 Pass along kwargs in shared_like.
fce7f6b Merge pull request #925 from dwf/get_output_shape_update
ff70906 More robust get_output_shape call.
928dcbb Better document get_output_shape.
c6cd413 Update ConvolutionalSequence docs.
5928f34 Pass shared_floatx kwargs to theano.shared.
3eddcf8 Merge pull request #921 from dwf/die_conv_layer_die
65e9952 Replace deprecated getOutputShape interface.
589543e Remove mention of `ConvolutionalLayer` from a docstring.
3cd7bfd Get rid of ConvolutionalLayer. Fixes #912.
df5c55c Merge pull request #924 from dwf/fix_doctest_debugprint
eedab38 Fix debugprint output for Theano/Theano#1953.
6827a9e Merge pull request #891 from dmitriy-serdyuk/release-doc
1d63fa8 Merge pull request #899 from dwf/conv_improvements
769eb70 Test for overzealous support code.
6177388 Making unpickling robust to MaxPooling change.
24a489a Merge pull request #909 from akhti/doc_fix
7734be3 Increase font size in the generator scheme
04783b7 Merge pull request #886 from sotelo/return_states_seq_gen
f7866d8 Merge pull request #892 from akhti/fix_bidir
05fea66 Merge pull request #907 from dmitriy-serdyuk/fix-log-doc
7576bcc Add API sections for two log backends
b87ad2e Make ignore_border=True the default.
c374557 Test Pooling Bricks work in ConvolutionalSequence.
27d71a5 Tests for AveragePooling, new MaxPooling flags.
f1e5908 AveragePooling Brick.
ac9f095 ConvolutionalSequence: use num_output_channels.
dbb407c Refactor MaxPooling.
414519d Merge pull request #903 from dwf/fix_travis_again
e185f3d Hardcode a prefix for Travis Miniconda install.
7e10ed5 Merge pull request #897 from dwf/custom_scaling_dropout
c4426c3 Notes on dropout usage.
07c98e2 Add dropout tests, including custom_divisor.
d7d8b62 apply_dropout: support a custom divisor.
9ffc3e1 Improve apply_dropout documentation.
23d94b2 Fix label in sequence generator scheme
6750051 Merge pull request #896 from rizar/add_original_svg
d7eb341 Update after review.
8950f73 Backport fixes done to Fuel's install doc
68842e7 Update developer doc to mention Blocks and Fuel
ea34217 Add the original for SequenceGenerator picture
455d65f Improve docs for making new release
bc4f62c Fix too long line error
46652f7 Add test for a stack of bidirectional layers
98aed8e Add instructions to make a new release
3ec837a Merge pull request #887 from rizar/fix_confpy_and_bump_version
4f74b4c Merge pull request #888 from akhti/fix_bidir
69bc613 Add get_dim to Bidirectional
0a4e3b3 Fixes conf.py and bumps version to 0.1.1
afaa45f Added the final values of states as auxiliary variables in a sequence generator so they can be reused.
12e50d9 Merge pull request #772 from adbrebs/doc_brick
92654e1 Merge pull request #881 from mila-udem/correct_main_loop_error_message
5dc2bdd brick tutorial improvements
168c7a8 Merge pull request #878 from rizar/release-0.1
f275332 Merge pull request #879 from sotelo/generator_cost_documentation
e43cfbe Correct main loop message for the interrupt
6c99076 Added the missing links.
c11698c Merge pull request #880 from sotelo/missing_apply_decorator_documentation
679b01c Improved the error message when an application decorator is missing.
efe3585 Solved the flake8 mistake.
6ebcb37 Added documentation about the emitter cost in sequence generators.
a06878f Bump version number
0f889aa Merge pull request #875 from dwf/use_bias_convolutional_activation_layer
0ca4e86 Merge pull request #873 from dwf/border_mode_conv_sequence
d1af6c9 ConvolutionalSequence: propagate use_bias.
7bc11d4 Convolutional{Activation,Layer}: respect use_bias.
457049d Convolutional{Activation,Layer}: Refactor alloc.
6dec566 ConvolutionalSequence: Don't auto push border_mode.
b3754dc Merge pull request #849 from mila-udem/redefine_modelr
837da08 Merge pull request #868 from lukemetz/lm/set_parameter_values
8a06204 warn if setting wrong shape parameters
c843fa8 Merge pull request #867 from dwf/conv2d_impl
620bc7b Additional developer documentation.
8a8e8c1 Fix Scrutinizer complaint about whitespace.
c2ebc25 Make Convolutional's conv implementation hackable.
c427fa7 Merge pull request #864 from mila-udem/rtd2
377688c Add mocking back in
2fc06f3 Merge pull request #861 from mila-udem/rtd
c5b9f1c Changes
743cbf3 Add a test and fix imports
fe9daeb Back to items()
bb81302 Call __init__ and add iteritems
a9751cb Merge pull request #852 from rizar/testing_utils
5772cc3 Fix formatting
8fd7da4 Improve documentation
0cc3031 Fix imports in doctests
3100e11 Move testing utilities to make them importable from outside
5899425 Merge pull request #844 from mila-udem/mention_extras_and_examples
b46dbe0 Refactor model and write a bit of documentation
f6a99bc Mention examples in the documentation as well.
f38881a Add forgotten period<F2>
3c1fdb5 Also refer to Fuel
96d63bf Better looking reference names
bb3f6c8 Mention blocks-extras and blocks-examples in README.md
8bf07e7 typos
4b83783 Merge pull request #839 from mila-udem/deps
1436c25 Add nose
59a9553 Update deps
f295d76 Added line break
cba09d2 Changed docstring to plural
bba2e2d added docstring to apply method of LSTM
d576831 Small fixes in create your own brick tutorial
bdff06b Merge pull request #1 from dmitriy-serdyuk/doc_brick
5da4696 Separate paragraph
536bf18 Make small fixes in create your own brick
e377e43 Merge pull request #834 from yingzha/dev
bd12f44 Rephrase docs
f6e9896 Merge pull request #777 from galv/master
03a491b Add new line to comment block.
ecd121d Pass input from ComputationGraph.get_theano_function() to theano.function()
53979de Example of lazy brick with get_dim
5628491 More explanations in the tutorial to create a custom brick
627da45 Refactoring. Brick example with children.
ca1c7cf Very first draft of the tutorial to create a brick.
7e2535d Merge pull request #835 from johnarevalo/patch-2
e844b92 Allow uint type for lookup indexing
cebec4e Replace uses of named_copy with var.copy(name=...)
7ff0f6b Merge pull request #827 from vzhong/conv-1d
bc140ba fix for case in which no image size is specified for Convolution #825
9e4d0e4 Merge pull request #826 from lukemetz/lm/algorithm_roles
a976b9f add roles and names to algorithm shared variables
98ed3d1 Merge pull request #823 from rizar/sgd_profile
d335fd0 use parens
1f16ba5 add test
a4b50e0 add theano_func_kwargs to algorithm init and use respectively
3403846 add kwargs to base class initialize func
53f292e remove unnecessary vars
3b0bbd0 add profile via kwargs to gradient descent initialize
0f6b4e0 Merge pull request #819 from rizar/fix_iterate_false
67f7388 Remove old code
2a8dff3 Arguments for SimpleRecurrent should be required
69b8ce2 Merge pull request #818 from ASalvail/master
436134a Reformated modified doc for docstrings requirements.
0e15726 Reformated modified doc for docstrings requirements.
e5e14a4 Elaborate LSTM inputs argument to apply method
da5d320 Merge pull request #815 from yingzha/dev
f88a638 Merge pull request #2 from dwf/yingzha_ccw
79243e5 Simplify documentation.
dce9c48 Merge pull request #813 from dwf/parallel_routing
c7d3540 Use OrderedDict per Dima's review.
b17fc13 Refactor into utils func, per Dima's request.
23fd90a Merge pull request #1 from dwf/yingzha_ccw
d3c5caa Add test for argument Parallel.apply argument validation.
16c50ab Fix Parallel.apply argument routing logic.
e26d04d Fix for str config with default of None.
3144fc9 remove dir argument in test_config
0fe9a92 fixed flake8/pep errors
29099d3 Override base path for temporary file creation
f2c9f3e Merge pull request #798 from ASalvail/master
760b144 Merge pull request #808 from mila-udem/selector_docs
d7bf7f7 Remove unnecessary import in doctest.
45c400a Eschew needless initialization.
4f3a075 Reword per Dima's review.
f95f919 Improve Selector.get_parameter documentation.
325a431 Merge pull request #805 from mila-udem/cost_cost_matrix_args_kwargs
83151ca Make Cost and CostMatrix more general.
9c2a1a1 Merge pull request #803 from Tejas-Khot/Tejas-Khot-patch-1
5c1062b made line length less than 75 characters
d299457 corrected trailing whitespace problem
968edbc added docstring for tied_biases
0a8cc3d Corrected docstring of Softplus.
e48bbb9 Add activation tests (Softplus, Softmax, Logistic)
9ad61bc Softplus docs and reference.
fab1fbe Added a Softplus brick.
c72a833 Merge pull request #759 from rizar/good_softmax_for_all
4da97cd Merge pull request #790 from mila-udem/linear_docs
c689353 Make Linear docstring clearer.

git-subtree-dir: libs/blocks
git-subtree-split: 38535d89ec166d8b4bef912d4745a51231cdc6da
  • Loading branch information
rizar committed Jan 27, 2016
1 parent 9ce2ce1 commit a629e64
Show file tree
Hide file tree
Showing 60 changed files with 4,703 additions and 1,407 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ before_install:
- # Setup Python environment with BLAS libraries
- wget -q http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- ./miniconda.sh -b -p $HOME/miniconda
- export PATH=$HOME/miniconda/bin:$PATH
- conda update -q --yes conda
- export FUEL_DATA_PATH=$TRAVIS_BUILD_DIR/data
install:
# Install all Python dependencies
- conda install -q --yes python=$TRAVIS_PYTHON_VERSION mkl --file req-travis-conda.txt
- conda install -q --yes python=$TRAVIS_PYTHON_VERSION --file req-travis-conda.txt
- pip install -q -r req-travis-pip.txt
script:
- pip install -e . -r requirements.txt # Tests setup.py
Expand Down
8 changes: 8 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ In the future we also hope to support:

* Dimension, type and axes-checking

See Also:
* `Fuel`_, the data processing engine developed primarily for Blocks.
* `Blocks-examples`_ for maintained examples of scripts using Blocks.
* `Blocks-extras`_ for semi-maintained additional Blocks components.

Citing Blocks
If you use Blocks or Fuel in your work, we'd really appreciate it if you could cite the following paper:

Expand All @@ -47,3 +52,6 @@ Contributing
.. _documentation: http://blocks.readthedocs.org
.. _developer guidelines: http://blocks.readthedocs.org/en/latest/development/index.html
.. _Blocks and Fuel\: Frameworks for deep learning: http://arxiv.org/abs/1506.00619
.. _Blocks-examples: https://github.com/mila-udem/blocks-examples
.. _Blocks-extras: https://github.com/mila-udem/blocks-extras
.. _Fuel: https://github.com/mila-udem/fuel
5 changes: 1 addition & 4 deletions blocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
"""The blocks library for parametrized Theano ops."""
# Scary warning: Adding code to this file can break namespace packages
# See https://pythonhosted.org/setuptools/setuptools.html#namespace-packages
__import__("pkg_resources").declare_namespace(__name__)
__version__ = '0.0.1'
__version__ = '0.1.1'
76 changes: 51 additions & 25 deletions blocks/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
from theano import tensor

from blocks.graph import ComputationGraph
from blocks.utils import dict_subset, named_copy, pack, shared_floatx
from blocks.roles import add_role, ALGORITHM_HYPERPARAMETER, ALGORITHM_BUFFER
from blocks.theano_expressions import l2_norm
from blocks.utils import (dict_subset, pack, shared_floatx,
shared_floatx_zeros_matching)

logger = logging.getLogger(__name__)

Expand All @@ -30,7 +32,7 @@ class TrainingAlgorithm(object):
"""
@abstractmethod
def initialize(self):
def initialize(self, **kwargs):
"""Initialize the training algorithm."""
pass

Expand Down Expand Up @@ -191,6 +193,10 @@ class GradientDescent(DifferentiableCostMinimizer):
be backpropagated. Only makes sense when `gradients` is `None`.
on_unused_sources : str, one of 'raise' (default), 'ignore', 'warn'
Controls behavior when not all sources are used.
theano_func_kwargs : dict, optional
A passthrough to `theano.function` for additional arguments.
Useful for passing `profile` or `mode` arguments to the theano
function that will be compiled for the algorithm.
Attributes
----------
Expand All @@ -201,9 +207,8 @@ class GradientDescent(DifferentiableCostMinimizer):
"""
def __init__(self, step_rule=None, gradients=None, known_grads=None,
consider_constant=None,
on_unused_sources='raise',
**kwargs):
consider_constant=None, on_unused_sources='raise',
theano_func_kwargs=None, **kwargs):
if gradients:
kwargs.setdefault("parameters", gradients.keys())
super(GradientDescent, self).__init__(**kwargs)
Expand All @@ -226,13 +231,15 @@ def __init__(self, step_rule=None, gradients=None, known_grads=None,
"gradients are passed in")
self.step_rule = step_rule if step_rule else Scale()

self.total_gradient_norm = named_copy(l2_norm(self.gradients.values()),
"total_gradient_norm")
self.total_gradient_norm = l2_norm(
self.gradients.values()).copy(name="total_gradient_norm")
self.steps, self.step_rule_updates = (
self.step_rule.compute_steps(self.gradients))
self.total_step_norm = named_copy(l2_norm(self.steps.values()),
"total_step_norm")
self.total_step_norm = l2_norm(
self.steps.values()).copy(name="total_step_norm")
self.on_unused_sources = on_unused_sources
self.theano_func_kwargs = (theano_func_kwargs if theano_func_kwargs
is not None else dict())

def initialize(self):
logger.info("Initializing the training algorithm")
Expand All @@ -243,7 +250,8 @@ def initialize(self):
for parameter in self.parameters:
all_updates.append((parameter, parameter - self.steps[parameter]))
all_updates += self.step_rule_updates
self._function = theano.function(self.inputs, [], updates=all_updates)
self._function = theano.function(
self.inputs, [], updates=all_updates, **self.theano_func_kwargs)
logger.info("The training algorithm is initialized")

def _validate_source_names(self, batch):
Expand Down Expand Up @@ -386,7 +394,8 @@ class Scale(StepRule):
"""
def __init__(self, learning_rate=1.0):
self.learning_rate = shared_floatx(learning_rate)
self.learning_rate = shared_floatx(learning_rate, "learning_rate")
add_role(self.learning_rate, ALGORITHM_HYPERPARAMETER)

def compute_step(self, parameter, previous_step):
return self.learning_rate * previous_step, []
Expand All @@ -408,10 +417,12 @@ class BasicMomentum(StepRule):
"""
def __init__(self, momentum=0.):
self.momentum = shared_floatx(momentum)
self.momentum = shared_floatx(momentum, "momentum")
add_role(self.momentum, ALGORITHM_HYPERPARAMETER)

def compute_step(self, parameter, previous_step):
velocity = shared_floatx(parameter.get_value() * 0.)
velocity = shared_floatx_zeros_matching(parameter, "velocity")
add_role(velocity, ALGORITHM_BUFFER)
step = self.momentum * velocity + previous_step
updates = [(velocity, step)]
return step, updates
Expand Down Expand Up @@ -471,12 +482,18 @@ class AdaDelta(StepRule):
def __init__(self, decay_rate=0.95, epsilon=1e-6):
if not 0.0 <= decay_rate <= 1.0:
raise ValueError("decay rate needs to be in [0, 1]")
self.decay_rate = shared_floatx(decay_rate)
self.epsilon = shared_floatx(epsilon)
self.decay_rate = shared_floatx(decay_rate, "decay_rate")
add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
self.epsilon = shared_floatx(epsilon, "epsilon")
add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)

def compute_step(self, parameter, previous_step):
mean_square_step_tm1 = shared_floatx(parameter.get_value() * 0.)
mean_square_delta_x_tm1 = shared_floatx(parameter.get_value() * 0.)
mean_square_step_tm1 = shared_floatx_zeros_matching(
parameter, "mean_square_step_tm1")
add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
mean_square_delta_x_tm1 = shared_floatx_zeros_matching(
parameter, "mean_square_delta_x_tm1")
add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER)

mean_square_step_t = (
self.decay_rate * mean_square_step_tm1 +
Expand Down Expand Up @@ -529,14 +546,18 @@ def __init__(self, decay_rate=0.9, max_scaling=1e5):
raise ValueError("decay rate needs to be in [0, 1]")
if max_scaling <= 0:
raise ValueError("max. scaling needs to be greater than 0")
self.decay_rate = shared_floatx(decay_rate)
self.decay_rate = shared_floatx(decay_rate, "decay_rate")
add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
self.epsilon = 1. / max_scaling

def compute_step(self, parameter, previous_step):
mean_square_step_tm1 = shared_floatx(parameter.get_value() * 0.)
mean_square_step_tm1 = shared_floatx_zeros_matching(
parameter, "mean_square_step_tm1")
add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
mean_square_step_t = (
self.decay_rate * mean_square_step_tm1 +
(1 - self.decay_rate) * tensor.sqr(previous_step))
add_role(mean_square_step_t, ALGORITHM_BUFFER)
rms_step_t = tensor.maximum(
tensor.sqrt(mean_square_step_t), self.epsilon)
step = previous_step / rms_step_t
Expand Down Expand Up @@ -607,7 +628,8 @@ class StepClipping(StepRule):
"""
def __init__(self, threshold=None):
if threshold:
self.threshold = shared_floatx(threshold)
self.threshold = shared_floatx(threshold, "threshold")
add_role(self.threshold, ALGORITHM_HYPERPARAMETER)

def compute_steps(self, previous_steps):
if not hasattr(self, 'threshold'):
Expand Down Expand Up @@ -671,7 +693,8 @@ class VariableClipping(StepRule):
def __init__(self, threshold, axis=None):
axis = pack(axis) if axis is not None else ()
self.axis = set(axis)
self.threshold = shared_floatx(threshold)
self.threshold = shared_floatx(threshold, "threshold")
add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
if len(axis) != len(self.axis):
raise ValueError("axis must be unique")

Expand Down Expand Up @@ -727,8 +750,8 @@ def compute_step(self, parameter, previous_step):
name = 'adagrad_sqs'
if parameter.name:
name += '_' + parameter.name
ssq = shared_floatx(parameter.get_value() * 0.,
name=name)
ssq = shared_floatx_zeros_matching(parameter, name=name)
add_role(ssq, ALGORITHM_BUFFER)

ssq_t = (tensor.sqr(previous_step) + ssq)
step = (self.learning_rate * previous_step /
Expand Down Expand Up @@ -773,9 +796,12 @@ def __init__(self, learning_rate=0.002,
self.decay_factor = decay_factor

def compute_step(self, parameter, previous_step):
mean = shared_floatx(parameter.get_value() * 0., 'mean')
variance = shared_floatx(parameter.get_value() * 0., 'variance')
mean = shared_floatx_zeros_matching(parameter, 'mean')
add_role(mean, ALGORITHM_BUFFER)
variance = shared_floatx_zeros_matching(parameter, 'variance')
add_role(variance, ALGORITHM_BUFFER)
time = shared_floatx(0., 'time')
add_role(time, ALGORITHM_BUFFER)

t1 = time + 1
learning_rate = (self.learning_rate *
Expand Down
Loading

0 comments on commit a629e64

Please sign in to comment.