因為上一次作業已經寫過Logistic Regression了,這次就寫parser好了。原理都是差不多的,主要是加了dropout。
class ParserModel(nn.Module):
def __init__(self, config, word_embeddings=None, pos_embeddings=None,dep_embeddings=None):
super(ParserModel, self).__init__()
self.config = config
n_w = config.word_features_types # 18
n_p = config.pos_features_types # 18
n_d = config.dep_features_types # 12
self.word_embeddings = word_embeddings
self.pos_embeddings = pos_embeddings # TODO
self.dep_embeddings = dep_embeddings
self.layer1 = nn.Linear((n_w + n_p + n_d)* self.config.embedding_dim, self.config.l1_hidden_size) # 2400, 200
self.dropout = nn.Dropout(self.config.keep_prob)
self.outlayer = nn.Linear(self.config.l1_hidden_size, self.config.num_classes)
self.init_weights()
def init_weights(self):
initrange = 0.01
self.layer1.weight.data.uniform_(-initrange, initrange)
self.layer1.bias.data.zero_()
self.outlayer.weight.data.uniform_(-initrange, initrange)
self.outlayer.bias.data.zero_()
def cubic(self, x):
return x**3
def lookup_embeddings(self, word_indices, pos_indices, dep_indices, keep_pos = 1):
w_embeddings = self.word_embeddings(word_indices) #[self.word_embeddings[int(wi)] for wi in word_indices]
p_embeddings = self.pos_embeddings(pos_indices) # [self.pos_embeddings[int(pi)] for pi in pos_indices]
d_embeddings = self.dep_embeddings(dep_indices) #[self.dep_embeddings[int(di)] for di in dep_indices]
return w_embeddings, p_embeddings, d_embeddings
def forward(self, word_indices, pos_indices, dep_indices):
"""
Computes the next transition step (shift, reduce-left, reduce-right)
based on the current state of the input.
The indices here represent the words/pos/dependencies in the current
context, which we'll need to turn into vectors.
"""
w_embeddings, p_embeddings, d_embeddings = self.lookup_embeddings(word_indices, pos_indices, dep_indices)
w_embeddings = w_embeddings.view(w_embeddings.shape[0], -1)
w_embeddings = w_embeddings.view(-1, (w_embeddings.shape[1] * w_embeddings.shape[2]))
p_embeddings = p_embeddings.view(-1, (p_embeddings.shape[1] * p_embeddings.shape[2]))
d_embeddings = d_embeddings.view(-1, d_embeddings.shape[1] * d_embeddings.shape[2])
# print(w_embeddings.shape, p_embeddings.shape, d_embeddings.shape)
input_embed = torch.cat((w_embeddings, p_embeddings, d_embeddings), 1)
layer1_out = self.layer1(input_embed) ## self.weight1?
cubic_out = self.cubic(layer1_out)
drop_out = self.dropout(cubic_out)
output_raw = self.outlayer(drop_out)
output = output_raw
return output
Training 部分
def train(save_dir='saved_weights', parser_name='parser', num_epochs=5, max_iters=-1,
print_every_iters=10):
"""
Trains the model.
parser_name is the string prefix used for the filename where the parser is
saved after every epoch
"""
# load dataset
load_existing_dump=False
print('Loading dataset for training')
dataset = load_datasets(load_existing_dump)
config = dataset.model_config
print('Loading embeddings')
word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config, emb_type=load_type, emb_file_name=embed_file) ## get embeddings according to load_type and embed_file
# load parser object
parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
parser.to(device)
# set save_dir for model
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# create object for loss function
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(parser.parameters(), lr=LEARNING_RATE)
for epoch in range(1, num_epochs+1):
###### Training #####
for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
dataset.train_targets], config.batch_size, is_multi_feature_input=True)):
word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x
word_inputs_batch = torch.LongTensor(word_inputs_batch)
pos_inputs_batch = torch.LongTensor(pos_inputs_batch) # TODO
dep_inputs_batch = torch.LongTensor(dep_inputs_batch)
word_inputs_batch.to(device)
pos_inputs_batch.to(device)
dep_inputs_batch.to(device)
labels = [np.argmax(x) for x in train_y]
labels = torch.LongTensor(labels)
if max_iters >= 0 and i > max_iters:
break
# Some debugging information for you
if i==0 and epoch==1:
print("size of word inputs: ",word_inputs_batch.size())
print("size of pos inputs: ",pos_inputs_batch.size())
print("size of dep inputs: ",dep_inputs_batch.size())
print("size of labels: ",labels.size())
optimizer.zero_grad()
outputs = parser(word_inputs_batch, pos_inputs_batch, dep_inputs_batch) # TODO
loss = loss_fn(outputs, labels) # TODO
loss.backward()
optimizer.step()
if i % print_every_iters == 0:
print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
% (epoch, i, loss.item(),
int((outputs.argmax(1)==labels).sum())/len(labels)))
print("End of epoch")
# save model
save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name, epoch))
print('Saving current state of model to %s' % save_file)
torch.save(parser, save_file)
###### Validation #####
print('Evaluating on valudation data after epoch %d' % epoch)
parser.eval()
compute_dependencies(parser, device, dataset.valid_data, dataset)
valid_UAS = get_UAS(dataset.valid_data)
print ("- validation UAS: {:.2f}".format(valid_UAS * 100.0))
uas_scores.append(valid_UAS)
parser.train()
return parser