using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using AngleSharp.Dom;
using AngleSharp.Html;
using NzbDrone.Common.Extensions;
namespace NzbDrone.Core.Parser
{
public static class StringUtil
{
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|'|\|)+", RegexOptions.Compiled);
private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
public static string CleanFileName(string name, bool replace = true)
{
string result = name;
string[] badCharacters = { "\\", "/", "<", ">", "?", "*", ":", "|", "\"" };
string[] goodCharacters = { "+", "+", "", "", "!", "-", "-", "", "" };
// Replace a colon followed by a space with space dash space for a better appearance
if (replace)
{
result = result.Replace(": ", " - ");
}
for (int i = 0; i < badCharacters.Length; i++)
{
result = result.Replace(badCharacters[i], replace ? goodCharacters[i] : string.Empty);
}
return result.TrimStart(' ', '.').TrimEnd(' ');
}
public static string StripNonAlphaNumeric(this string str, string replacement = "") =>
StripRegex(str, "[^a-zA-Z0-9 -]", replacement);
public static string StripRegex(string str, string regex, string replacement = "")
{
var rgx = new Regex(regex);
str = rgx.Replace(str, replacement);
return str;
}
// replaces culture specific characters with the corresponding base characters (e.g. è becomes e).
public static string RemoveDiacritics(string s)
{
var normalizedString = s.Normalize(NormalizationForm.FormD);
var stringBuilder = new StringBuilder();
for (var i = 0; i < normalizedString.Length; i++)
{
var c = normalizedString[i];
if (CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString();
}
public static string FromBase64(string str) =>
Encoding.UTF8.GetString(Convert.FromBase64String(str));
///
/// Convert an array of bytes to a string of hex digits
///
/// array of bytes
/// String of hex digits
public static string HexStringFromBytes(byte[] bytes) =>
string.Join("", bytes.Select(b => b.ToString("X2")));
///
/// Compute hash for string encoded as UTF8
///
/// String to be hashed
/// 40-character hex string
public static string HashSHA1(string s)
{
var sha1 = SHA1.Create();
var bytes = Encoding.UTF8.GetBytes(s);
var hashBytes = sha1.ComputeHash(bytes);
return HexStringFromBytes(hashBytes);
}
public static string Hash(string s)
{
// Use input string to calculate MD5 hash
var md5 = System.Security.Cryptography.MD5.Create();
var inputBytes = System.Text.Encoding.ASCII.GetBytes(s);
var hashBytes = md5.ComputeHash(inputBytes);
return HexStringFromBytes(hashBytes);
}
// Is never used
// remove in favor of Exception.ToString() ?
public static string GetExceptionDetails(this Exception exception)
{
var properties = exception.GetType()
.GetProperties();
var fields = properties
.Select(property => new
{
Name = property.Name,
Value = property.GetValue(exception, null)
})
.Select(x => string.Format(
"{0} = {1}",
x.Name,
x.Value != null ? x.Value.ToString() : string.Empty));
return string.Join("\n", fields);
}
private static char[] MakeValidFileName_invalids;
/// Replaces characters in text that are not allowed in
/// file names with the specified replacement character.
/// Text to make into a valid filename. The same string is returned if it is valid already.
/// Replacement character, or null to simply remove bad characters.
/// Whether to replace quotes and slashes with the non-ASCII characters ” and ⁄.
/// A string that can be used as a filename. If the output string would otherwise be empty, returns "_".
public static string MakeValidFileName(string text, char? replacement = '_', bool fancy = true)
{
var sb = new StringBuilder(text.Length);
var invalids = MakeValidFileName_invalids ?? (MakeValidFileName_invalids = Path.GetInvalidFileNameChars());
var changed = false;
for (var i = 0; i < text.Length; i++)
{
var c = text[i];
if (invalids.Contains(c))
{
changed = true;
var repl = replacement ?? '\0';
if (fancy)
{
if (c == '"')
{
repl = '”'; // U+201D right double quotation mark
}
else if (c == '\'')
{
repl = '’'; // U+2019 right single quotation mark
}
else if (c == '/')
{
repl = '⁄'; // U+2044 fraction slash
}
}
if (repl != '\0')
{
sb.Append(repl);
}
}
else
{
sb.Append(c);
}
}
if (sb.Length == 0)
{
return "_";
}
return changed ? sb.ToString() : text;
}
///
/// Converts a NameValueCollection to an appropriately formatted query string.
/// Duplicate keys are allowed in a NameValueCollection, but are stored as a csv string in Value.
/// This function handles leaving the values together in the csv string or splitting the value into separate keys
///
/// The NameValueCollection being converted
/// The Encoding to use in url encoding Value
/// Duplicate keys are handled as true => {"Key=Val1", "Key=Val2} or false => {"Key=Val1,Val2"}
/// The string used to separate each query value
/// A web encoded string of key=value parameters separated by the separator
public static string GetQueryString(this NameValueCollection collection,
Encoding encoding = null,
bool duplicateKeysIfMulti = false,
string separator = "&") =>
collection.ToEnumerable(duplicateKeysIfMulti).GetQueryString(encoding, separator);
public static string GetQueryString(this IEnumerable> collection,
Encoding encoding = null,
string separator = "&") =>
string.Join(separator,
collection.Select(a => $"{a.Key}={a.Value.UrlEncode(encoding ?? Encoding.UTF8)}"));
public static void Add(this ICollection> collection, string key, string value) => collection.Add(new KeyValuePair(key, value));
public static IEnumerable> ToEnumerable(
this NameValueCollection collection, bool duplicateKeysIfMulti = false)
{
foreach (string key in collection.Keys)
{
var value = collection[key];
if (duplicateKeysIfMulti)
{
foreach (var val in value.Split(','))
{
yield return new KeyValuePair(key, val);
}
}
else
{
yield return new KeyValuePair(key, value);
}
}
}
public static string ToHtmlPretty(this IElement element)
{
if (element == null)
{
return "";
}
var sb = new StringBuilder();
var sw = new StringWriter(sb);
var formatter = new PrettyMarkupFormatter();
element.ToHtml(sw, formatter);
return sb.ToString();
}
public static string GenerateRandom(int length)
{
var chars = "abcdefghijklmnopqrstuvwxyz0123456789";
var randBytes = new byte[length];
using (var rngCsp = RandomNumberGenerator.Create())
{
rngCsp.GetBytes(randBytes);
var key = "";
foreach (var b in randBytes)
{
key += chars[b % chars.Length];
}
return key;
}
}
public static string NormalizeTitle(this string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
title = SpecialCharRegex.Replace(title, string.Empty);
return title.Trim().ToLower();
}
}
}