From 0748d78bc945752ea13b6dbcf9d8c22fe49e8f0b Mon Sep 17 00:00:00 2001 From: wangwei Date: Tue, 14 Apr 2020 22:26:57 +0800 Subject: [PATCH] update the rnn example --- examples/rnn/sample.py | 107 ----------------------------------------- examples/rnn/train.py | 10 ++-- 2 files changed, 5 insertions(+), 112 deletions(-) delete mode 100644 examples/rnn/sample.py diff --git a/examples/rnn/sample.py b/examples/rnn/sample.py deleted file mode 100644 index eb745cff0..000000000 --- a/examples/rnn/sample.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -'''Sample characters from the pre-trained model''' - -from __future__ import division -from __future__ import print_function -from builtins import range -import sys -import numpy as np -import argparse -try: - import pickle -except ImportError: - import cPickle as pickle - -from singa import layer -from singa import tensor -from singa import device - - -def sample(model_path, nsamples=100, seed_text='', do_sample=True): - with open(model_path, 'rb') as fd: - d = pickle.load(fd) - rnn_w = tensor.from_numpy(d['rnn_w']) - idx_to_char = d['idx_to_char'] - char_to_idx = d['char_to_idx'] - vocab_size = len(idx_to_char) - dense_w = tensor.from_numpy(d['dense_w']) - dense_b = tensor.from_numpy(d['dense_b']) - hidden_size = d['hidden_size'] - num_stacks = d['num_stacks'] - dropout = d['dropout'] - - cuda = device.create_cuda_gpu() - rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, - num_stacks=num_stacks, dropout=dropout, - input_sample_shape=(len(idx_to_char),)) - rnn.to_device(cuda) - rnn.param_values()[0].copy_data(rnn_w) - dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,)) - dense.to_device(cuda) - dense.param_values()[0].copy_data(dense_w) - dense.param_values()[1].copy_data(dense_b) - hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda) - cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda) - hx.set_value(0.0) - cx.set_value(0.0) - if len(seed_text) > 0: - for c in seed_text: - x = np.zeros((1, vocab_size), dtype=np.float32) - x[0, char_to_idx[c]] = 1 - tx = tensor.from_numpy(x) - tx.to_device(cuda) - inputs = [tx, hx, cx] - outputs = rnn.forward(False, inputs) - y = dense.forward(False, outputs[0]) - y = tensor.softmax(y) - hx = outputs[1] - cx = outputs[2] - sys.stdout.write(seed_text) - else: - y = tensor.Tensor((1, vocab_size), cuda) - y.set_value(1.0 / vocab_size) - - for i in range(nsamples): - y.to_host() - prob = tensor.to_numpy(y)[0] - if do_sample: - cur = np.random.choice(vocab_size, 1, p=prob)[0] - else: - cur = np.argmax(prob) - sys.stdout.write(idx_to_char[cur]) - x = np.zeros((1, vocab_size), dtype=np.float32) - x[0, cur] = 1 - tx = tensor.from_numpy(x) - tx.to_device(cuda) - inputs = [tx, hx, cx] - outputs = rnn.forward(False, inputs) - y = dense.forward(False, outputs[0]) - y = tensor.softmax(y) - hx = outputs[1] - cx = outputs[2] - print('') - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='sample chars from char-rnn') - parser.add_argument('model', help='the model checkpoint file') - parser.add_argument('n', type=int, help='num of characters to sample') - parser.add_argument('--seed', help='seed text string which warms up the ' - ' rnn states for sampling', default='') - args = parser.parse_args() - assert args.n > 0, 'n must > 0' - sample(args.model, args.n, seed_text=args.seed) diff --git a/examples/rnn/train.py b/examples/rnn/train.py index 30ce680ee..0060d062e 100644 --- a/examples/rnn/train.py +++ b/examples/rnn/train.py @@ -86,7 +86,7 @@ def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8): data = [self.char_to_idx[c] for c in self.raw_data] # seq_length + 1 for the data + label nsamples = len(data) // (1 + seq_length) - data = data[0:300 * (1 + seq_length)] + data = data[0: nsamples * (1 + seq_length)] data = np.asarray(data, dtype=np.int32) data = np.reshape(data, (-1, seq_length + 1)) # shuffle all sequences @@ -172,13 +172,13 @@ def sample(model, data, dev, nsamples=100, use_max=False): y = tensor.softmax(outputs[-1]) -def evaluate(model, data, batch_size, seq_length, dev): +def evaluate(model, data, batch_size, seq_length, dev, inputs, labels): model.eval() val_loss = 0.0 for b in range(data.num_test_batch): batch = data.val_dat[b * batch_size:(b + 1) * batch_size] inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, - dev) + dev, inputs, labels) model.reset_states(dev) y = model(inputs) loss = model.loss(y, labels)[0] @@ -217,8 +217,8 @@ def train(data, print('\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch / seq_length)) - # evaluate(model, data, batch_size, seq_length, cuda, inputs, labels) - # sample(model, data, cuda) + evaluate(model, data, batch_size, seq_length, cuda, inputs, labels) + sample(model, data, cuda) if __name__ == '__main__':