Skip to content

Commit

Permalink
setup of pypi
Browse files Browse the repository at this point in the history
  • Loading branch information
yongzhuo committed Sep 27, 2021
1 parent 0240a44 commit acb5cdb
Show file tree
Hide file tree
Showing 60 changed files with 76 additions and 40 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ import json
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from tcTools import get_current_time
from tcRun import TextClassification
from tcConfig import model_config
Expand Down Expand Up @@ -157,11 +159,13 @@ import json
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
# 分类下的引入, pytorch_textclassification
from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from slTools import get_current_time
from slRun import SequenceLabeling
from slConfig import model_config
Expand Down
2 changes: 1 addition & 1 deletion output/__init__.py → pytorch_nlu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2021/7/27 19:28
# @time : 2021/9/27 23:18
# @author : Mo
# @function:
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions pytorch_nlu/output/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2021/9/27 23:32
# @author : Mo
# @function:
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ import json
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# @function: config of sequence-labeling, 超参数/类


import os
os.environ["USE_TORCH"] = "1"
from transformers import BertTokenizer, RobertaTokenizer, AlbertTokenizer, XLNetTokenizer, ElectraTokenizer, XLMTokenizer, AutoTokenizer
from transformers import BertConfig, RobertaConfig, AlbertConfig, XLNetConfig, ElectraConfig, XLMConfig, AutoConfig
from transformers import BertModel, RobertaModel, AlbertModel, XLNetModel, ElectraModel, XLMModel, AutoModel
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
sys.path.append(path_root)
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from slConfig import _SL_MODEL_SOFTMAX, _SL_MODEL_GRID, _SL_MODEL_SPAN, _SL_MODEL_CRF
from slConfig import _SL_DATA_CONLL, _SL_DATA_SPAN
from slTools import get_logger, load_json
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
sys.path.append(path_root)

from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from slConfig import _SL_MODEL_SOFTMAX, _SL_MODEL_GRID, _SL_MODEL_SPAN, _SL_MODEL_CRF
from slConfig import _SL_DATA_CONLL, _SL_DATA_SPAN
from slTools import get_logger
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ import json
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcTools import get_current_time
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# @author : Mo
# @function: config of transformers and graph-model


import os
os.environ["USE_TORCH"] = "1"
from transformers import BertTokenizer, RobertaTokenizer, AlbertTokenizer, XLNetTokenizer, ElectraTokenizer, XLMTokenizer, AutoTokenizer
from transformers import BertConfig, RobertaConfig, AlbertConfig, XLNetConfig, ElectraConfig, XLMConfig, AutoConfig
from transformers import BertModel, RobertaModel, AlbertModel, XLNetModel, ElectraModel, XLMModel, AutoModel
Expand Down Expand Up @@ -36,7 +37,7 @@

# model算法超参数
model_config = {
"CUDA_VISIBLE_DEVICES": "1", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"...
"CUDA_VISIBLE_DEVICES": "0", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"...
"output_hidden_states": None, # [6,11] # 输出层, 即取第几层transformer的隐藏输出, list
"pretrained_model_name_or_path": "", # 预训练模型地址
"model_save_path": "save_path", # 训练模型保存-训练完毕模型目录
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,14 @@ def __init__(self, reduction="mean", inf=1e12):

def forward(self, logits, labels):
logits = (1 - 2 * labels) * logits # <3, 4>
logits_neg = logits - labels * self.inf # <3, 4>
logits_pos = logits - (1 - labels) * self.inf # <3, 4>
logits_neg = logits - labels * self.inf # <3, 4>, 减去选中多标签的index
logits_pos = logits - (1 - labels) * self.inf # <3, 4>, 减去其他不需要的多标签Index
zeros = torch.zeros_like(logits[..., :1]) # <3, 1>
logits_neg = torch.cat([logits_neg, zeros], dim=-1) # <3, 5>
logits_pos = torch.cat([logits_pos, zeros], dim=-1) # <3, 5>
neg_loss = torch.logsumexp(logits_neg, dim=-1) # <3, >
pos_loss = torch.logsumexp(logits_pos, dim=-1) # <3, >
loss = neg_loss + pos_loss
loss = neg_loss + pos_loss # pos比零大, neg比零小
if "mean" == self.reduction:
loss = loss.mean()
else:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
sys.path.append(path_root)
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from tcTools import get_logger, load_json
from tcOffice import Office
from tcData import Corpus
Expand Down Expand Up @@ -60,7 +61,8 @@ def predict(self, texts, logits_type="sigmoid"):


if __name__ == "__main__":

# BERT-base = 8109M
# path_config = "../output/text_classification/model_BERT/tc.config"
path_config = "../output/text_classification/model_ERNIE/tc.config"
tcp = TextClassificationPredict(path_config)
texts = [{"text": "平乐县,古称昭州,隶属于广西壮族自治区桂林市,位于广西东北部,桂林市东南部,东临钟山县,南接昭平,西北毗邻阳朔,北连恭城,总面积1919.34平方公里。"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
sys.path.append(path_root)
from tcConfig import model_config
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
from tcConfig import _TC_MULTI_CLASS, _TC_MULTI_LABEL
from tcTools import get_logger
from tcOffice import Office
Expand Down Expand Up @@ -123,11 +125,10 @@ def eval(self):
save_steps = 320 # 存储步数
ee = 0
# 训练-验证语料地址, 可以只输入训练地址
# path_corpus = path_root + "/corpus/text_classification/school"
path_corpus = path_root + "/corpus/text_classification/org_tnews"

path_train = os.path.join(path_corpus, "train.json")
path_dev = os.path.join(path_corpus, "dev.json")
path_corpus = path_root + "/corpus/text_classification/org_multi-label_school"
# path_corpus = path_root + "/corpus/text_classification/org_tnews"
path_train = os.path.join(path_corpus, "train.json.augment")
path_dev = os.path.join(path_corpus, "dev.json.augment")
model_config["evaluate_steps"] = evaluate_steps # 评估步数
model_config["save_steps"] = save_steps # 存储步数
model_config["path_train"] = path_train
Expand All @@ -145,17 +146,17 @@ def eval(self):
"ROBERTA": pretrained_model_dir + "/chinese_roberta_wwm_ext_pytorch",
"ALBERT": pretrained_model_dir + "/albert_base_v1",
"XLNET": pretrained_model_dir + "/chinese_xlnet_mid_pytorch",
"ERNIE": pretrained_model_dir + "/ERNIE_stable-1.0.1-pytorch",
# "ERNIE": pretrained_model_dir + "/ernie-tiny",
# "ERNIE": pretrained_model_dir + "/ERNIE_stable-1.0.1-pytorch",
"ERNIE": pretrained_model_dir + "/ernie-tiny",
"BERT": pretrained_model_dir + "/bert-base-chinese",
}
idx = 0 # 选择的预训练模型类型---model_type
idx = 1 # 选择的预训练模型类型---model_type
model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path[model_type[idx]]
# model_config["model_save_path"] = "../output/text_classification/model_{}".format(model_type[idx] + "_" + str(get_current_time()))
model_config["model_save_path"] = "../output/text_classification/model_{}".format(model_type[idx])
model_config["model_type"] = model_type[idx]

os.environ["CUDA_VISIBLE_DEVICES"] = str(model_config["CUDA_VISIBLE_DEVICES"])
# os.environ["CUDA_VISIBLE_DEVICES"] = str(model_config["CUDA_VISIBLE_DEVICES"])

# main
lc = TextClassification(model_config)
Expand Down
File renamed without changes.
File renamed without changes.
8 changes: 8 additions & 0 deletions pytorch_nlu/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# !/usr/bin/python
# -*- coding: utf-8 -*-
# @time : 2020/12/21 22:24
# @author : Mo
# @function: version of Pytorch-NLU


__version__ = "0.0.1"
18 changes: 14 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# @function :setup of Pytorch-NLU


from pytorch_nlu.version import __version__
from setuptools import find_packages, setup
import codecs

Expand All @@ -23,23 +24,32 @@
install_requires = list(map(lambda x: x.strip(), reader.readlines()))

setup(name=NAME,
version="0.0.1",
version=__version__,
description=DESCRIPTION,
long_description=long_description,
long_description_content_type="text/markdown",
author=AUTHOR,
author_email=EMAIL,
url=URL,
packages=find_packages(exclude=("test")),
packages=find_packages(),
install_requires=install_requires,
include_package_data=True,
package_data={"pytorch_nlu": ["*.*", "corpus/*",
"pytorch_textclassification/*"
"pytorch_sequencelabeling/*",
"corpus/text_classification/*",
"corpus/sequence_labeling/*",
"corpus/text_classification/school/*",
"corpus/text_classification/tnews/*",
"corpus/sequence_labeling/ner_china_people_daily_1998_conll/*",
"corpus/sequence_labeling/ner_china_people_daily_1998_span/*",]},
license=LICENSE,
classifiers=["License :: OSI Approved :: MIT License",
classifiers=["License :: OSI Approved :: Apache License",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy"],)

Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_crf_ernie.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_data_conll.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_data_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/sl/tet_sl_base_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
sys.path.append(path_sys)
print(path_root)
print(path_sys)
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_multi_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcTools import get_current_time
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_multi_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcTools import get_current_time
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_multi_label_focalloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcTools import get_current_time
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_multi_label_isadv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# 分类下的引入, pytorch_textclassification
from tcTools import get_current_time
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_predict_multiclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from tcPredict import TextClassificationPredict
Expand Down
2 changes: 1 addition & 1 deletion test/tc/tet_tc_base_predict_multilabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import sys
import os
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
print(path_root)
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from tcPredict import TextClassificationPredict
Expand Down

0 comments on commit acb5cdb

Please sign in to comment.