Skip to content

Commit

Permalink
fix: 拼音转换错误:芃 #13
Browse files Browse the repository at this point in the history
  • Loading branch information
toolgood committed Jun 6, 2018
1 parent b8c291a commit bba3a1c
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 40 deletions.
110 changes: 76 additions & 34 deletions ToolGood.PinYin.Build/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,47 @@ static void buildPinYinIndex()
buildPinYinIndex(0x4e00, 0x9fcb, "pyIndex.txt", "pyData.txt");
buildPinYinIndex(0x3400, 0x4DB5, "pyIndex2.txt", "pyData2.txt");
}
static void buildPinYinIndex(int start,int end,string outIndexFile,string outDataFile)
static void buildPinYinIndex(int start, int end, string outIndexFile, string outDataFile)
{
var index = start;
List<HashSet<int>> t = new List<HashSet<int>>();
while (index <= end) {
while (index <= end)
{
var ch = (char)index;

HashSet<int> ls = new HashSet<int>();
var p1 = getPinYin(ch);
if (p1 >= 0) ls.Add(p1);
var p1 = getPinYin(ch);//此方法查找最常用的拼音, 此处可能会出错, 出错字符如 芃

getPinYin2(ch, ls);
HashSet<int> ls2 = new HashSet<int>();
getPinYin2(ch, ls2);
if (ls2.Count>0)
{
if (ls2.Contains(p1))
{
ls.Add(p1);
}
foreach (var item in ls2)
{
ls.Add(item);
}
}


if (ls.Count == 0) {
if (ls.Count == 0)
{
char c;
Dict.TraditionalToSimplified(ch, out c);
getPinYin2(c, ls);
}

if (ls.Count == 0) {
if (ls.Count == 0)
{
var py3 = GetPyName2(ch.ToString());
if (py3 > 0) ls.Add(py3);
if (ch == '刓') ls.Add(GetPyName("Liang"));
}
if (ls.Count == 0) {
if (ls.Count == 0)
{
getPinYin3(ch, ls);
}

Expand All @@ -59,24 +75,31 @@ static void buildPinYinIndex(int start,int end,string outIndexFile,string outDat
List<short> node = new List<short>();
//StringBuilder node = new StringBuilder();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < t.Count; i++) {
for (int i = 0; i < t.Count; i++)
{
var item = t[i];
//node.Append(",");
if (item.Count == 0) {
if (item.Count == 0)
{
node.Add(-1);
} else {
}
else
{
node.Add((short)index);
index += item.Count;
foreach (var pyNum in item) {
foreach (var pyNum in item)
{
sb.Append(",");
sb.Append(pyNum);
}
}
}
//index++;
node.Add((short)index);
for (int i = node.Count - 1; i >= 0; i--) {
if (node[i] == -1) {
for (int i = node.Count - 1; i >= 0; i--)
{
if (node[i] == -1)
{
node[i] = node[i + 1];
}

Expand All @@ -88,42 +111,53 @@ static void buildPinYinIndex(int start,int end,string outIndexFile,string outDat

static int getPinYin(char ch)
{
try {
try
{
var gpy = PinYinConverter.Get(ch.ToString());
if (gpy != ch.ToString()) {
if (gpy != ch.ToString())
{
var py2 = GetPyName(gpy);
if (py2 > 0) {
if (py2 > 0)
{
return py2;
}
}
} catch (Exception) { }
}
catch (Exception) { }
return -1;
}
static void getPinYin2(char ch, HashSet<int> ls)
{
try {
try
{
var chinese = new ChineseChar(ch);
for (int i = 0; i < chinese.PinyinCount; i++) {
for (int i = 0; i < chinese.PinyinCount; i++)
{
var py = chinese.Pinyins[i];//.Replace("YAI", "YA");
var py2 = GetPyName(py);
if (py2 == -1) {
if (py2 == -1)
{
throw new Exception("");
}
ls.Add(py2);
}
} catch (Exception) { }
}
catch (Exception) { }
}
static void getPinYin3(char ch, HashSet<int> ls)
{
var texts = File.ReadAllLines("_pyyin.txt");
foreach (var text in texts) {
foreach (var text in texts)
{
var sp = text.Trim().Split(':');
if (sp[0][0]==ch) {
for (int i = 1; i < sp.Length; i++) {
if (sp[0][0] == ch)
{
for (int i = 1; i < sp.Length; i++)
{
var py = GetPyName(sp[i]);
ls.Add(py);
}

}
}
}
Expand All @@ -134,11 +168,14 @@ static void getPinYin3(char ch, HashSet<int> ls)
static void buildPinYinDict()
{
var texts = File.ReadAllLines("_pyyin.txt");
foreach (var text in texts) {
foreach (var text in texts)
{
var sp = text.Trim().Split(':');
for (int i = 1; i < sp.Length; i++) {
for (int i = 1; i < sp.Length; i++)
{
var py = GetPy(sp[i]);
if (pyName.Contains(py) == false) {
if (pyName.Contains(py) == false)
{
pyName.Add(py);
}

Expand All @@ -156,7 +193,8 @@ private static string GetPy(string name)
.Replace("5", "").Replace("6", "").Replace("7", "").Replace("8", "").Replace("9", "");

name = name.ToUpper();
if (name.Length > 1) {
if (name.Length > 1)
{
name = name[0] + name.Substring(1).ToLower();
}
return name;
Expand Down Expand Up @@ -214,12 +252,14 @@ private static string GetPy(string name)

private static Dictionary<string, string> getDict()
{
if (_dict == null) {
if (_dict == null)
{
var dict2 = "诘|Ji|揲|Ye|棓|Bei|足|Ju|栟|Ben|咯|Luo|迹|Gui|欻|Chua|耨|Nou|埏|Yan|囋|Can|噭|Chi|案|Wan|燝|Zhu|膻|Dan|汝|Zhuang|艹|Ao|磹|Tan|厖|Pang|观|Guang|窾|Kua|搂|Sou|继|Xu|房|Pang|黮|Shen|愬|Shuo|矜|Guan|盻|Pan|射|Ye|景|Ying|潠|Xun|蓧|Di|黈|Tou|从|Zong|洞|Tong|譳|Rou|鸊|Pi|桁|Hang|槱|Chao|被|Pi|擘|Bai|岂|Kai|铦|Kuo|瑱|Zhen|囝|Nan|嬛|Huan|乐|Lao|崚|Leng|蹻|Jue|浰|Li|摵|Se|梴|Yan|嶰|Jie|谌|Shen|撍|Qian|穞|Lu|黾|Meng|隩|Ao|刓|Liang|墄|Qi|擿|Zhe|能|Nan|居|Ji|及|Xi|揭|Qi|吾|Yu|扐|Cai|刓|Shu|啜|Shu|晻|Yan|兼|Xian|忒|Tei|痁|Dian|莫|Mu|宕|Tan|摘|Ti|灒|Cuan|什|Za|适|Di|逤|Suo|螫|Zhe|伈|Xin|扢|Jie|花|Hu|么|Mo|餧|Si|箐|Jing|禜|Ying|庳|Bei|硾|Chui|燋|Zhuo|棽|Shen|濊|Hun|泽|Shi|漱|Shou|摄|Nie|耆|Shi";
dict2 += "|㘄|Leng|䉄|Leng|䬋|Leng|䮚|Leng|䚏|Leng|䚏|Li|䚏|Lin|㭁|Reng|䖆|Niang";
var sp = dict2.Split('|');
_dict = new Dictionary<string, string>();
for (int i = 0; i < sp.Length; i += 2) {
for (int i = 0; i < sp.Length; i += 2)
{
_dict[sp[i]] = sp[i + 1];
}
}
Expand All @@ -230,7 +270,8 @@ private static int GetPyName2(string key)
{
var dict = getDict();
string py;
if (dict.TryGetValue(key, out py)) {
if (dict.TryGetValue(key, out py))
{
return pyName.IndexOf(py);
}
return -1;
Expand All @@ -242,7 +283,8 @@ private static int GetPyName(string name)
name = name.Replace("0", "").Replace("1", "").Replace("2", "").Replace("3", "").Replace("4", "")
.Replace("5", "").Replace("6", "").Replace("7", "").Replace("8", "").Replace("9", "");
name = name.ToUpper();
if (name.Length > 1) {
if (name.Length > 1)
{
name = name[0] + name.Substring(1).ToLower();
}
return pyName.IndexOf(name);
Expand Down
5 changes: 4 additions & 1 deletion ToolGood.Words.Test/WordHelper/WordHelperTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class WordHelperTest
[Test]
public void GetPinYin()
{
var t = WordsHelper.GetAllPinYin('芃');
Assert.AreEqual("Peng", t[0]);

var a = WordsHelper.GetPinYinFast("");
Assert.AreEqual("A", a);

Expand All @@ -23,7 +26,7 @@ public void GetPinYin()
b = WordsHelper.GetPinYin("秘鲁");
Assert.AreEqual("BiLu", b);



var py = WordsHelper.GetPinYinFast("我爱中国");
Assert.AreEqual("WoAiZhongGuo", py);
Expand Down
6 changes: 3 additions & 3 deletions ToolGood.Words/ToolGood.Words.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
<RootNamespace>ToolGood.Words</RootNamespace>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
<FileVersion>2.3.3.1</FileVersion>
<Version>2.3.3.1</Version>
<FileVersion>2.3.3.2</FileVersion>
<Version>2.3.3.2</Version>
<PackageReleaseNotes></PackageReleaseNotes>
<AssemblyVersion>2.3.3.1</AssemblyVersion>
<AssemblyVersion>2.3.3.2</AssemblyVersion>
</PropertyGroup>

<ItemGroup Condition=" '$(TargetFramework)' == 'net40' ">
Expand Down
4 changes: 2 additions & 2 deletions ToolGood.Words/internals/PinYinDict.cs

Large diffs are not rendered by default.

0 comments on commit bba3a1c

Please sign in to comment.