Skip to content

Commit

Permalink
Merge pull request #2591 from MediaBrowser/beta
Browse files Browse the repository at this point in the history
Beta
  • Loading branch information
LukePulverenti authored Apr 21, 2017
2 parents 4b695bf + 30ad22a commit cd78f7b
Show file tree
Hide file tree
Showing 318 changed files with 13,786 additions and 2,441 deletions.
5 changes: 5 additions & 0 deletions Emby.Common.Implementations/BaseApplicationHost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ public string SystemId
}
}

public virtual PackageVersionClass SystemUpdateLevel
{
get { return PackageVersionClass.Release; }
}

public virtual string OperatingSystemDisplayName
{
get { return EnvironmentInfo.OperatingSystemName; }
Expand Down
170 changes: 159 additions & 11 deletions Emby.Common.Implementations/TextEncoding/TextEncoding.cs
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
using System.Text;
using System;
using System.Text;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Text;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Model.MediaInfo;
using MediaBrowser.Model.Logging;
using UniversalDetector;

namespace Emby.Common.Implementations.TextEncoding
{
public class TextEncoding : ITextEncoding
{
private readonly IFileSystem _fileSystem;
private readonly ILogger _logger;

public TextEncoding(IFileSystem fileSystem)
public TextEncoding(IFileSystem fileSystem, ILogger logger)
{
_fileSystem = fileSystem;
_logger = logger;
}

public Encoding GetASCIIEncoding()
{
return Encoding.ASCII;
}

public Encoding GetFileEncoding(string srcFile)
private Encoding GetInitialEncoding(byte[] buffer)
{
// *** Detect byte order mark if any - otherwise assume default
var buffer = new byte[5];

using (var file = _fileSystem.OpenRead(srcFile))
{
file.Read(buffer, 0, 5);
}

if (buffer[0] == 0xef && buffer[1] == 0xbb && buffer[2] == 0xbf)
return Encoding.UTF8;
if (buffer[0] == 0xfe && buffer[1] == 0xff)
Expand All @@ -37,7 +38,154 @@ public Encoding GetFileEncoding(string srcFile)
if (buffer[0] == 0x2b && buffer[1] == 0x2f && buffer[2] == 0x76)
return Encoding.UTF7;

var result = new TextEncodingDetect().DetectEncoding(buffer, buffer.Length);

switch (result)
{
case TextEncodingDetect.CharacterEncoding.Ansi:
return Encoding.ASCII;
case TextEncodingDetect.CharacterEncoding.Ascii:
return Encoding.ASCII;
case TextEncodingDetect.CharacterEncoding.Utf16BeBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16BeNoBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16LeBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16LeNoBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf8Bom:
return Encoding.UTF8;
case TextEncodingDetect.CharacterEncoding.Utf8Nobom:
return Encoding.UTF8;
default:
return null;
}
}

public string GetDetectedEncodingName(byte[] bytes, string language)
{
var encoding = GetInitialEncoding(bytes);

if (encoding != null && encoding.Equals(Encoding.UTF8))
{
return "utf-8";
}

var charset = DetectCharset(bytes, language);

if (!string.IsNullOrWhiteSpace(charset))
{
if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase))
{
return "utf-8";
}

if (!string.Equals(charset, "windows-1252", StringComparison.OrdinalIgnoreCase))
{
return charset;
}
}

if (!string.IsNullOrWhiteSpace(language))
{
return GetFileCharacterSetFromLanguage(language);
}

return null;
}

public Encoding GetEncodingFromCharset(string charset)
{
if (string.IsNullOrWhiteSpace(charset))
{
throw new ArgumentNullException("charset");
}

_logger.Debug("Getting encoding object for character set: {0}", charset);

try
{
return Encoding.GetEncoding(charset);
}
catch (ArgumentException)
{
charset = charset.Replace("-", string.Empty);
_logger.Debug("Getting encoding object for character set: {0}", charset);

return Encoding.GetEncoding(charset);
}
}

public Encoding GetDetectedEncoding(byte[] bytes, string language)
{
var charset = GetDetectedEncodingName(bytes, language);

return GetEncodingFromCharset(charset);
}

private string GetFileCharacterSetFromLanguage(string language)
{
// https://developer.xamarin.com/api/type/System.Text.Encoding/

switch (language.ToLower())
{
case "hun":
return "windows-1252";
case "pol":
case "cze":
case "ces":
case "slo":
case "slk":
case "slv":
case "srp":
case "hrv":
case "rum":
case "ron":
case "rup":
case "alb":
case "sqi":
return "windows-1250";
case "ara":
return "windows-1256";
case "heb":
return "windows-1255";
case "grc":
case "gre":
return "windows-1253";
case "crh":
case "ota":
case "tur":
return "windows-1254";
case "rus":
return "windows-1251";
case "vie":
return "windows-1258";
case "kor":
return "cp949";
default:
return "windows-1252";
}
}

private string DetectCharset(byte[] bytes, string language)
{
var detector = new CharsetDetector();
detector.Feed(bytes, 0, bytes.Length);
detector.DataEnd();

var charset = detector.Charset;

// This is often incorrectly indetected. If this happens, try to use other techniques instead
if (string.Equals("x-mac-cyrillic", charset, StringComparison.OrdinalIgnoreCase))
{
if (!string.IsNullOrWhiteSpace(language))
{
return null;
}
}

return charset;
}
}
}
Loading

0 comments on commit cd78f7b

Please sign in to comment.