import torch import torch.nn as nn import numpy as np import matplotlib.pyplot as plt import time from scipy.sparse import csr_matrix from tensorboardX import SummaryWriter %matplotlib inline
导入库,幼儿园都学过吧。
1 2
withopen('./mfjsml.txt', 'r', encoding='utf-8') as f: data = f.readlines()
读取数据集,mfjsml这个名字可以改,但数据集和jupyter lab代码放一个文件夹下!
1
data=''.join(data)
1
print(data[:100])
展示数据集的一部分
1 2 3 4 5
chars = list(set(data)) data_size, vocab_size = len(data), len(chars) print(f'data has {data_size} characters, {vocab_size} unique.') char_to_ix = { ch:i for i,ch inenumerate(chars) } ix_to_char = { i:ch for i,ch inenumerate(chars) }
接下来我们开始构建LSTM模型
1 2 3
X_train = csr_matrix((len(data), len(chars)), dtype=np.int) char_id = np.array([chars.index(c) for c in data]) X_train[np.arange(len(data)), char_id] = 1
1
y_train = np.roll(char_id,-1)
1
X_train.shape
1
y_train.shape
1 2 3 4 5 6 7 8
defget_batch(X_train, y_train, seq_length): X = X_train #X = torch.from_numpy(X_train).float() y = torch.from_numpy(y_train).long() for i inrange(0, len(y), seq_length): id_stop = i+seq_length if i+seq_length < len(y) elselen(y) yield([torch.from_numpy(X[i:id_stop].toarray().astype(np.float32)), y[i:id_stop]])
1 2 3 4 5 6 7 8 9 10 11 12 13
defsample_chars(rnn, X_seed, h_prev, length=20): '''Generate text using trained model''' X_next = X_seed results = [] with torch.no_grad(): for i inrange(length): y_score, h_prev = rnn(X_next.view(1,1,-1), h_prev) y_prob = nn.Softmax(0)(y_score.view(-1)).detach().numpy() y_pred = np.random.choice(chars,1, p=y_prob).item() results.append(y_pred) X_next = torch.zeros_like(X_seed) X_next[chars.index(y_pred)] = 1 return''.join(results)
all_losses = [] print_every = 100 for epoch inrange(20): for batch in get_batch(X_train, y_train, seq_length): X_batch, y_batch = batch _, batch_loss = train(X_batch, y_batch) all_losses.append(batch_loss.item()) iflen(all_losses)%print_every==1: print(f'----\n训练正在进行,请耐心等待 Loss:{np.mean(all_losses[-print_every:])} at iter: {len(all_losses)}\n----') # log to tensorboard every X iterations. Can be removed if Tensorboard is not installed. writer.add_scalar('loss', np.mean(all_losses[-100:]), len(all_losses)) # generate text every X iterations print(sample_chars(rnn, X_batch[0], rnn.initHidden(), 200))