天天看點

Dependency Parser (Pytorch)

因為上一次作業已經寫過Logistic Regression了,這次就寫parser好了。原理都是差不多的,主要是加了dropout。

class ParserModel(nn.Module):
	def __init__(self, config, word_embeddings=None, pos_embeddings=None,dep_embeddings=None):
		super(ParserModel, self).__init__()
		self.config = config
		n_w = config.word_features_types # 18
		n_p = config.pos_features_types # 18
		n_d = config.dep_features_types # 12
		
		self.word_embeddings = word_embeddings 
		self.pos_embeddings = pos_embeddings # TODO
		self.dep_embeddings = dep_embeddings
		
		self.layer1 = nn.Linear((n_w + n_p + n_d)* self.config.embedding_dim, self.config.l1_hidden_size)	# 2400, 200
		
		self.dropout = nn.Dropout(self.config.keep_prob)
		 
		self.outlayer = nn.Linear(self.config.l1_hidden_size, self.config.num_classes)

		self.init_weights()
		
	def init_weights(self):
		
		initrange = 0.01
		self.layer1.weight.data.uniform_(-initrange, initrange)
		self.layer1.bias.data.zero_()
		self.outlayer.weight.data.uniform_(-initrange, initrange)
		self.outlayer.bias.data.zero_()
		

	def cubic(self, x):
		return x**3
		
	def lookup_embeddings(self, word_indices, pos_indices, dep_indices, keep_pos = 1):
		
		w_embeddings = self.word_embeddings(word_indices) #[self.word_embeddings[int(wi)] for wi in word_indices] 
		p_embeddings = self.pos_embeddings(pos_indices) # [self.pos_embeddings[int(pi)] for pi in pos_indices] 
		d_embeddings = self.dep_embeddings(dep_indices) #[self.dep_embeddings[int(di)] for di in dep_indices]
		return w_embeddings, p_embeddings, d_embeddings

	def forward(self, word_indices, pos_indices, dep_indices):
		"""
		Computes the next transition step (shift, reduce-left, reduce-right)
		based on the current state of the input.
		

		The indices here represent the words/pos/dependencies in the current
		context, which we'll need to turn into vectors.
		"""
		
	
		w_embeddings, p_embeddings, d_embeddings = self.lookup_embeddings(word_indices, pos_indices, dep_indices)
		
		w_embeddings = w_embeddings.view(w_embeddings.shape[0], -1)
		w_embeddings = w_embeddings.view(-1, (w_embeddings.shape[1] * w_embeddings.shape[2]))
		p_embeddings = p_embeddings.view(-1, (p_embeddings.shape[1] * p_embeddings.shape[2]))
		d_embeddings = d_embeddings.view(-1, d_embeddings.shape[1] * d_embeddings.shape[2])
		# print(w_embeddings.shape, p_embeddings.shape, d_embeddings.shape)
		input_embed = torch.cat((w_embeddings, p_embeddings, d_embeddings), 1)
		
		layer1_out = self.layer1(input_embed) ## self.weight1?
	
		cubic_out = self.cubic(layer1_out)

		drop_out = self.dropout(cubic_out)
		
		output_raw = self.outlayer(drop_out)

		output = output_raw

		return output  

           

Training 部分

def train(save_dir='saved_weights', parser_name='parser', num_epochs=5, max_iters=-1,
		  print_every_iters=10):
	"""
	Trains the model.

	parser_name is the string prefix used for the filename where the parser is
	saved after every epoch    
	"""
	
	# load dataset
	load_existing_dump=False
	print('Loading dataset for training')
	dataset = load_datasets(load_existing_dump)
	
	config = dataset.model_config

	print('Loading embeddings')
	word_embeddings, pos_embeddings, dep_embeddings = load_embeddings(config, emb_type=load_type, emb_file_name=embed_file) ## get embeddings according to load_type and embed_file

	
	# load parser object
	parser = ParserModel(config, word_embeddings, pos_embeddings, dep_embeddings)
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	parser.to(device)

	# set save_dir for model
	if not os.path.exists(save_dir):
		os.makedirs(save_dir)

	# create object for loss function
	loss_fn = nn.CrossEntropyLoss()
	
	optimizer = torch.optim.Adam(parser.parameters(), lr=LEARNING_RATE)

	for epoch in range(1, num_epochs+1):
		
		###### Training #####

		for i, (train_x, train_y) in enumerate(get_minibatches([dataset.train_inputs,
dataset.train_targets], config.batch_size, is_multi_feature_input=True)):

			word_inputs_batch, pos_inputs_batch, dep_inputs_batch = train_x

			word_inputs_batch = torch.LongTensor(word_inputs_batch)
			pos_inputs_batch = torch.LongTensor(pos_inputs_batch)             # TODO
			dep_inputs_batch = torch.LongTensor(dep_inputs_batch)
			
			word_inputs_batch.to(device)
			pos_inputs_batch.to(device)
			dep_inputs_batch.to(device)

			labels = [np.argmax(x) for x in train_y] 

			labels = torch.LongTensor(labels) 

			if max_iters >= 0 and i > max_iters:
				break

			# Some debugging information for you
			if i==0 and epoch==1:
				print("size of word inputs: ",word_inputs_batch.size())
				print("size of pos inputs: ",pos_inputs_batch.size())
				print("size of dep inputs: ",dep_inputs_batch.size())
				print("size of labels: ",labels.size())

			optimizer.zero_grad()
		
			outputs = parser(word_inputs_batch, pos_inputs_batch, dep_inputs_batch) # TODO
			loss = loss_fn(outputs, labels) # TODO
			loss.backward()
		
			optimizer.step()
			
			if i % print_every_iters == 0:
				print ('Epoch: %d [%d], loss: %1.3f, acc: %1.3f' \
					   % (epoch, i, loss.item(),
						  int((outputs.argmax(1)==labels).sum())/len(labels)))

		print("End of epoch")

		# save model
		save_file = os.path.join(save_dir, '%s-epoch-%d.mdl' % (parser_name, epoch))
		print('Saving current state of model to %s' % save_file)
		torch.save(parser, save_file)

		###### Validation #####
		print('Evaluating on valudation data after epoch %d' % epoch)
		
		parser.eval()

		compute_dependencies(parser, device, dataset.valid_data, dataset)
		valid_UAS = get_UAS(dataset.valid_data)
		print ("- validation UAS: {:.2f}".format(valid_UAS * 100.0))
		uas_scores.append(valid_UAS)
		
		parser.train()

	return parser