@ -27,18 +27,33 @@ namespace Emby.Server.Implementations.TextEncoding
return Encoding . ASCII ;
}
private Encoding GetInitialEncoding ( byte [ ] buffer )
private Encoding GetInitialEncoding ( byte [ ] buffer , int count )
{
if ( buffer [ 0 ] = = 0xef & & buffer [ 1 ] = = 0xbb & & buffer [ 2 ] = = 0xbf )
return Encoding . UTF8 ;
if ( buffer [ 0 ] = = 0xfe & & buffer [ 1 ] = = 0xff )
return Encoding . Unicode ;
if ( buffer [ 0 ] = = 0 & & buffer [ 1 ] = = 0 & & buffer [ 2 ] = = 0xfe & & buffer [ 3 ] = = 0xff )
return Encoding . UTF32 ;
if ( buffer [ 0 ] = = 0x2b & & buffer [ 1 ] = = 0x2f & & buffer [ 2 ] = = 0x76 )
return Encoding . UTF7 ;
if ( count > = 3 )
{
if ( buffer [ 0 ] = = 0xef & & buffer [ 1 ] = = 0xbb & & buffer [ 2 ] = = 0xbf )
return Encoding . UTF8 ;
}
if ( count > = 2 )
{
if ( buffer [ 0 ] = = 0xfe & & buffer [ 1 ] = = 0xff )
return Encoding . Unicode ;
}
if ( count > = 4 )
{
if ( buffer [ 0 ] = = 0 & & buffer [ 1 ] = = 0 & & buffer [ 2 ] = = 0xfe & & buffer [ 3 ] = = 0xff )
return Encoding . UTF32 ;
}
var result = new TextEncodingDetect ( ) . DetectEncoding ( buffer , buffer . Length ) ;
if ( count > = 3 )
{
if ( buffer [ 0 ] = = 0x2b & & buffer [ 1 ] = = 0x2f & & buffer [ 2 ] = = 0x76 )
return Encoding . UTF7 ;
}
var result = new TextEncodingDetect ( ) . DetectEncoding ( buffer , count ) ;
switch ( result )
{
@ -64,9 +79,11 @@ namespace Emby.Server.Implementations.TextEncoding
}
private bool _langDetectInitialized ;
public string GetDetectedEncodingName ( byte [ ] bytes , string language , bool enableLanguageDetection )
public string GetDetectedEncodingName ( byte [ ] bytes , int count , string language , bool enableLanguageDetection )
{
var encoding = GetInitialEncoding ( bytes ) ;
var index = 0 ;
var encoding = GetInitialEncoding ( bytes , count ) ;
if ( encoding ! = null & & encoding . Equals ( Encoding . UTF8 ) )
{
@ -81,7 +98,7 @@ namespace Emby.Server.Implementations.TextEncoding
LanguageDetector . Initialize ( _json ) ;
}
language = DetectLanguage ( bytes );
language = DetectLanguage ( bytes , index , count );
if ( ! string . IsNullOrWhiteSpace ( language ) )
{
@ -89,7 +106,7 @@ namespace Emby.Server.Implementations.TextEncoding
}
}
var charset = DetectCharset ( bytes , language) ;
var charset = DetectCharset ( bytes , index, count , language) ;
if ( ! string . IsNullOrWhiteSpace ( charset ) )
{
@ -112,11 +129,11 @@ namespace Emby.Server.Implementations.TextEncoding
return null ;
}
private string DetectLanguage ( byte [ ] bytes )
private string DetectLanguage ( byte [ ] bytes , int index , int count )
{
try
{
return LanguageDetector . DetectLanguage ( Encoding . UTF8 . GetString ( bytes )) ;
return LanguageDetector . DetectLanguage ( Encoding . UTF8 . GetString ( bytes , index , count )) ;
}
catch ( NLangDetectException ex )
{
@ -124,7 +141,7 @@ namespace Emby.Server.Implementations.TextEncoding
try
{
return LanguageDetector . DetectLanguage ( Encoding . ASCII . GetString ( bytes )) ;
return LanguageDetector . DetectLanguage ( Encoding . ASCII . GetString ( bytes , index , count )) ;
}
catch ( NLangDetectException ex )
{
@ -132,7 +149,7 @@ namespace Emby.Server.Implementations.TextEncoding
try
{
return LanguageDetector . DetectLanguage ( Encoding . Unicode . GetString ( bytes )) ;
return LanguageDetector . DetectLanguage ( Encoding . Unicode . GetString ( bytes , index , count )) ;
}
catch ( NLangDetectException ex )
{
@ -163,9 +180,9 @@ namespace Emby.Server.Implementations.TextEncoding
}
}
public Encoding GetDetectedEncoding ( byte [ ] bytes , string language , bool enableLanguageDetection )
public Encoding GetDetectedEncoding ( byte [ ] bytes , int size , string language , bool enableLanguageDetection )
{
var charset = GetDetectedEncodingName ( bytes , language, enableLanguageDetection ) ;
var charset = GetDetectedEncodingName ( bytes , size, language, enableLanguageDetection ) ;
return GetEncodingFromCharset ( charset ) ;
}
@ -225,10 +242,10 @@ namespace Emby.Server.Implementations.TextEncoding
}
}
private string DetectCharset ( byte [ ] bytes , string language )
private string DetectCharset ( byte [ ] bytes , int index , int count , string language )
{
var detector = new CharsetDetector ( ) ;
detector . Feed ( bytes , 0 , bytes . Length ) ;
detector . Feed ( bytes , index , count ) ;
detector . DataEnd ( ) ;
var charset = detector . Charset ;