diff --git a/data_preprocess.py b/data_preprocess.py index b393016..f3067c2 100644 --- a/data_preprocess.py +++ b/data_preprocess.py @@ -75,7 +75,7 @@ def lemmatize(): answer = line_info[5] question = ' '.join(map(lambda x: wn_lemmatizer.lemmatize(x), nltk.word_tokenize(question))) answer = ' '.join(map(lambda x: wn_lemmatizer.lemmatize(x), nltk.word_tokenize(answer))) - if set_name != 'test': + if 'test' not in set_name: label = line_info[6] fout.write('\t'.join([q_id, question, a_id, answer, label]) + '\n') else: @@ -207,4 +207,4 @@ def data_transform(embedding_size): gen_vocab() # 4.生成相应的embedding - data_transform(300) \ No newline at end of file + data_transform(300)