fix rnnoutput, update readme

ekinakyurek · Jan 10, 2019 · deeb41b · deeb41b
1 parent b735316
commit deeb41b
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 126 deletions.
diff --git a/README.md b/README.md
@@ -67,65 +67,9 @@ lstm.gatesview
 
 1) [ResNet](./examples/resnet.jl)
 
-2) An example of sequence to sequence models which learns sorting integer numbers.
-```JULIA
-using KnetLayers
+2) [Seq2Seq](./examples/s2smodel.jl)
 
-struct S2S # model definition
-    encoder
-    decoder
-    output
-    loss
-end
-# initialize model
-model = S2S(LSTM(input=11,hidden=128,embed=9),
-            LSTM(input=11,hidden=128,embed=9),
-            Multiply(input=128,output=11),
-            CrossEntropyLoss())
-
-# Helper functions for padding
-leftpad(p::Int,x::Array)=cat(p*ones(Int,size(x,1)),x;dims=2)
-rightpad(x::Array,p::Int)=cat(x,p*ones(Int,size(x,1));dims=2)
-
-# forward functions
-(m::S2S)(x)     = m.output(m.decoder(leftpad(10,sort(x,dims=2)), m.encoder(x;hy=true).hidden).y)
-predict(m,x)    = getindex.(argmax(Array(m(x)), dims=1)[1,:,:], 1)
-loss(m,x,ygold) = m.loss(m(x),ygold)
-
-# create sorting data
-# 10 is used as start token and 11 is stop token.
-dataxy(x) = (x,rightpad(sort(x, dims=2), 11))
-B, maxL= 64, 15; # Batch size and maximum sequence length for training
-data = [dataxy([rand(1:9) for j=1:B, k=1:rand(1:maxL)]) for i=1:10000]
-
-#train your model
-train!(model,data;loss=loss,optimizer=Adam())
-
-"julia> predict(model,[3 2 1 4 5 9 3 5 6 6 1 2 5;])
-1×14 Array{Int64,2}:
- 1  2  2  2  3  4  4  5  5  5  6  7  9  11
-
-julia> sort([3 2 1 4 5 9 3 5 6 6 1 2 5;];dims=2)
-1×13 Array{Int64,2}:
- 1  1  2  2  3  3  4  5  5  5  6  6  9
-"
-```
-## Exported Layers
-```
-Core:
-  Multiply, Linear, Embed, Dense
-CNN
-  Conv, DeConv, Pool, UnPool
-MLP
-RNN:
-  LSTM, GRU, SRNN
-Loss:
-  CrossEntropyLoss, BCELoss, LogisticLoss
-NonLinear:
-  Sigm, Tanh, ReLU, ELU
-  LogSoftMax, LogSumExp, SoftMax,
-  Dropout
-```
+## [Exported Layers](https://ekinakyurek.github.io/KnetLayers.jl/latest/reference.html#Function-Index-1)
 
 ## TO-DO
 3) Examples

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -8,7 +8,18 @@ Welcome to KnetLayers.jl's documentation!
 
 KnetLayers provides configurable deep learning layers for Knet, fostering your model development. You are able to use Knet and AutoGrad functionalities without adding them to current workspace.
 
-## Example Layer Usages
+## How does it look like ?
+```Julia
+model = Chain(
+          Dense(input=768, output=128, activation=Sigm()),
+          Dense(input=128, output=10, activation=nothing),
+          CrossEntropyLoss()
+        )
+
+loss(x, y) = model[end](model[1:end-1](x), y)
+```
+
+## Example Layers and Usage
 ```JULIA
 using KnetLayers
 
@@ -19,7 +30,7 @@ mlp = MLP(100,50,20; activation=Sigm()) # input size=100, hidden=50 and output=2
 prediction = mlp(randn(Float32,100,1))
 
 #Instantiate a convolutional layer with random parameters
-cnn = Conv(height=3, width=3, channels=3, filters=10, padding=1, stride=1) # A conv layer
+cnn = Conv(height=3, width=3, inout=3=>10, padding=1, stride=1) # A conv layer
 
 #Filter your input with the convolutional layer
 output = cnn(randn(Float32,224,224,3,1))
@@ -53,71 +64,21 @@ rnnoutput = lstm([[1,2,3,4],[5,6]];sorted=true,hy=true,cy=true)
 lstm.gatesview
 ```
 
-## Example Model
+## Example Models
 
-An example of sequence to sequence models which learns sorting integer numbers.
-```JULIA
-using KnetLayers
+1) [ResNet](./examples/resnet.jl)
 
-struct S2S # model definition
-    encoder
-    decoder
-    output
-    loss
-end
-# initialize model
-model = S2S(LSTM(input=11,hidden=128,embed=9),
-            LSTM(input=11,hidden=128,embed=9),
-            Multiply(input=128,output=11),
-            CrossEntropyLoss())
-
-# Helper functions for padding
-leftpad(p::Int,x::Array)=cat(p*ones(Int,size(x,1)),x;dims=2)
-rightpad(x::Array,p::Int)=cat(x,p*ones(Int,size(x,1));dims=2)
-
-# forward functions
-(m::S2S)(x)     = m.output(m.decoder(leftpad(10,sort(x,dims=2)), m.encoder(x;hy=true).hidden).y)
-predict(m,x)    = getindex.(argmax(Array(m(x)), dims=1)[1,:,:], 1)
-loss(m,x,ygold) = m.loss(m(x),ygold)
-
-# create sorting data
-# 10 is used as start token and 11 is stop token.
-dataxy(x) = (x,rightpad(sort(x, dims=2), 11))
-B, maxL= 64, 15; # Batch size and maximum sequence length for training
-data = [dataxy([rand(1:9) for j=1:B, k=1:rand(1:maxL)]) for i=1:10000]
-
-#train your model
-train!(model,data;loss=loss,optimizer=Adam())
-
-"julia> predict(model,[3 2 1 4 5 9 3 5 6 6 1 2 5;])
-1×14 Array{Int64,2}:
- 1  2  2  2  3  4  4  5  5  5  6  7  9  11
-
-julia> sort([3 2 1 4 5 9 3 5 6 6 1 2 5;];dims=2)
-1×13 Array{Int64,2}:
- 1  1  2  2  3  3  4  5  5  5  6  6  9
-"
-```
+2) [Seq2Seq](./examples/s2smodel.jl)
 
-## Exported Layers
-```
-Core:
-  Multiply, Linear, Embed, Dense
-CNN
-  Conv, DeConv, Pool, UnPool
-MLP
-RNN:
-  LSTM, GRU, SRNN
-Loss:
-  CrossEntropyLoss, BCELoss, LogisticLoss
-NonLinear:
-  Sigm, Tanh, ReLU, ELU
-  LogSoftMax, LogSumExp, SoftMax,
-  Dropout
-```
+## [Exported Layers](https://ekinakyurek.github.io/KnetLayers.jl/latest/reference.html#Function-Index-1)
 
-## Function Documentation
+## TO-DO
+3) Examples
+4) Special layers such Google's `inception`   
+5) Known embeddings such `Gloove`   
+6) Pretrained Models   
 
+## Function Documentation
 ```@contents
 Pages = [
  "reference.md",

diff --git a/src/KnetLayers.jl b/src/KnetLayers.jl
@@ -2,11 +2,13 @@ module KnetLayers
 
 using Knet
 import Knet: save, load
-export gpu,knetgc,KnetArray,relu,sigm,elu,invx,mat,
-       Data,minibatch,train!,Train,param,param0,params,
-       logp, logsumexp, nll, bce, logistic, accuracy,zeroone,dropout,
+export gpu, knetgc, KnetArray, relu, sigm, elu, invx, mat, bmm,
+       Data, minibatch,
+       param, param0, params,
+       logp, logsumexp, nll, bce, logistic, accuracy, zeroone, dropout,
        SGD, Sgd, Momentum, Nesterov, Adam, Adagrad, Adadelta, Rmsprop, update!, optimizers,
-       gaussian, xavier, bilinear, setseed, train!,
+       gaussian, xavier, bilinear, setseed,
+       train!, train, converge, converge!,
        hyperband, goldensection, cpucopy, gpucopy,
        value, grad, cat1d, Param, @diff, @zerograd
 

diff --git a/src/rnn.jl b/src/rnn.jl
@@ -22,10 +22,10 @@ See `indices` and `PadRNNOutput` to get correct time outputs for a specific inst
 
 `indices` is corresponding instace indices for your `RNNOutput.y`. You may call `yi = y[:,indices[i]]`.
 """
-struct RNNOutput{T,V}
+struct RNNOutput{T,V,Z}
     y::T
     hidden::V
-    memory::V
+    memory::Z
     indices::Union{Vector{Vector{Int}},Nothing}
 end