New: Better Fuzzy DateTime Parse

pull/686/head
Qstick 3 years ago
parent 4a7bf39723
commit 6b886b938c

@ -633,7 +633,7 @@ namespace NzbDrone.Core.Indexers.Cardigann
var date = DateTimeUtil.ParseDateTimeGoLang(data, layout);
data = date.ToString(DateTimeUtil.Rfc1123ZPattern);
}
catch (FormatException ex)
catch (InvalidDateException ex)
{
_logger.Debug(ex.Message);
}

@ -0,0 +1,352 @@
//********************************************************************************************
//Author: Sergey Stoyan, CliverSoft.com
// http://cliversoft.com
// stoyan@cliversoft.com
// sergey.stoyan@gmail.com
// 27 February 2007
//********************************************************************************************
using System;
using System.Text.RegularExpressions;
namespace NzbDrone.Core.Parser
{
public static class DateTimeRoutines
{
public class ParsedDateTime
{
public readonly int IndexOfDate = -1;
public readonly int LengthOfDate = -1;
public readonly int IndexOfTime = -1;
public readonly int LengthOfTime = -1;
public readonly DateTime DateTime;
public readonly bool IsDateFound;
public readonly bool IsTimeFound;
internal ParsedDateTime(int index_of_date, int length_of_date, int index_of_time, int length_of_time, DateTime date_time)
{
IndexOfDate = index_of_date;
LengthOfDate = length_of_date;
IndexOfTime = index_of_time;
LengthOfTime = length_of_time;
DateTime = date_time;
IsDateFound = index_of_date > -1;
IsTimeFound = index_of_time > -1;
}
}
public static DateTime DefaultDate
{
get
{
if (DefaultDateIsNow)
{
return DateTime.Now;
}
else
{
return _DefaultDate;
}
}
set
{
_DefaultDate = value;
DefaultDateIsNow = false;
}
}
private static DateTime _DefaultDate = DateTime.Now;
public static bool DefaultDateIsNow = true;
public enum DateTimeFormat
{
USDate,
UKDate,
}
public static bool TryParseDateOrTime(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_date_time)
{
parsed_date_time = null;
ParsedDateTime parsed_date;
ParsedDateTime parsed_time;
if (!TryParseDate(str, default_format, out parsed_date))
{
if (!TryParseTime(str, default_format, out parsed_time, null))
{
return false;
}
var date_time = new DateTime(DefaultDate.Year, DefaultDate.Month, DefaultDate.Day, parsed_time.DateTime.Hour, parsed_time.DateTime.Minute, parsed_time.DateTime.Second);
parsed_date_time = new ParsedDateTime(-1, -1, parsed_time.IndexOfTime, parsed_time.LengthOfTime, date_time);
}
else
{
if (!TryParseTime(str, default_format, out parsed_time, parsed_date))
{
var date_time = new DateTime(parsed_date.DateTime.Year, parsed_date.DateTime.Month, parsed_date.DateTime.Day, 0, 0, 0);
parsed_date_time = new ParsedDateTime(parsed_date.IndexOfDate, parsed_date.LengthOfDate, -1, -1, date_time);
}
else
{
var date_time = new DateTime(parsed_date.DateTime.Year, parsed_date.DateTime.Month, parsed_date.DateTime.Day, parsed_time.DateTime.Hour, parsed_time.DateTime.Minute, parsed_time.DateTime.Second);
parsed_date_time = new ParsedDateTime(parsed_date.IndexOfDate, parsed_date.LengthOfDate, parsed_time.IndexOfTime, parsed_time.LengthOfTime, date_time);
}
}
return true;
}
public static bool TryParseTime(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_time, ParsedDateTime parsed_date)
{
parsed_time = null;
Match m;
if (parsed_date != null && parsed_date.IndexOfDate > -1)
{
//look around the found date
//look for <date> [h]h:mm[:ss] [PM/AM]
m = Regex.Match(str.Substring(parsed_date.IndexOfDate + parsed_date.LengthOfDate), @"(?<=^\s*,?\s+|^\s*at\s*|^\s*[T\-]\s*)(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[^\d\w])", RegexOptions.Compiled);
if (!m.Success)
{
//look for [h]h:mm:ss <date>
m = Regex.Match(str.Substring(0, parsed_date.IndexOfDate), @"(?<=^|[^\d])(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[\s,]+)", RegexOptions.Compiled);
}
}
else
{
//look anywere within string
//look for [h]h:mm[:ss] [PM/AM]
m = Regex.Match(str, @"(?<=^|\s+|\s*T\s*)(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[^\d\w])", RegexOptions.Compiled);
}
if (m.Success)
{
try
{
var hour = int.Parse(m.Groups["hour"].Value);
if (hour < 0 || hour > 23)
{
return false;
}
var minute = int.Parse(m.Groups["minute"].Value);
if (minute < 0 || minute > 59)
{
return false;
}
var second = 0;
if (!string.IsNullOrEmpty(m.Groups["second"].Value))
{
second = int.Parse(m.Groups["second"].Value);
if (second < 0 || second > 59)
{
return false;
}
}
if (string.Compare(m.Groups["ampm"].Value, "PM", true) == 0 && hour < 12)
{
hour += 12;
}
else if (string.Compare(m.Groups["ampm"].Value, "AM", true) == 0 && hour == 12)
{
hour -= 12;
}
var date_time = new DateTime(1, 1, 1, hour, minute, second);
parsed_time = new ParsedDateTime(-1, -1, m.Index, m.Length, date_time);
}
catch
{
return false;
}
return true;
}
return false;
}
public static bool TryParseDate(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_date)
{
parsed_date = null;
if (string.IsNullOrEmpty(str))
{
return false;
}
//look for dd/mm/yy
var m = Regex.Match(str, @"(?<=^|[^\d])(?'day'\d{1,2})\s*(?'separator'[\\/\.])+\s*(?'month'\d{1,2})\s*\'separator'+\s*(?'year'\d{2}|\d{4})(?=$|[^\d])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
if (m.Success)
{
DateTime date;
if ((default_format ^ DateTimeFormat.USDate) == DateTimeFormat.USDate)
{
if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["day"].Value), int.Parse(m.Groups["month"].Value), out date))
{
return false;
}
}
else
{
if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["month"].Value), int.Parse(m.Groups["day"].Value), out date))
{
return false;
}
}
parsed_date = new ParsedDateTime(m.Index, m.Length, -1, -1, date);
return true;
}
//look for [yy]yy-mm-dd
m = Regex.Match(str, @"(?<=^|[^\d])(?'year'\d{2}|\d{4})\s*(?'separator'[\-])\s*(?'month'\d{1,2})\s*\'separator'+\s*(?'day'\d{1,2})(?=$|[^\d])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
if (m.Success)
{
DateTime date;
if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["month"].Value), int.Parse(m.Groups["day"].Value), out date))
{
return false;
}
parsed_date = new ParsedDateTime(m.Index, m.Length, -1, -1, date);
return true;
}
//look for month dd yyyy
m = Regex.Match(str, @"(?:^|[^\d\w])(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?\s*,?\s*(?'year'\d{4})(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
if (!m.Success)
{
//look for dd month [yy]yy
m = Regex.Match(str, @"(?:^|[^\d\w:])(?'day'\d{1,2})(?:-?st\s+|-?th\s+|-?rd\s+|-?nd\s+|-|\s+)(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*(?:\s*,?\s*|-)'?(?'year'\d{2}|\d{4})(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
if (!m.Success)
{
//look for yyyy month dd
m = Regex.Match(str, @"(?:^|[^\d\w])(?'year'\d{4})\s+(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
if (!m.Success)
{
//look for month dd [yyyy]
m = Regex.Match(str, @"(?:^|[^\d\w])(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?(?:\s*,?\s*(?'year'\d{4}))?(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
if (m.Success)
{
var month = -1;
var index_of_date = m.Index;
var length_of_date = m.Length;
switch (m.Groups["month"].Value)
{
case "Jan":
case "JAN":
month = 1;
break;
case "Feb":
case "FEB":
month = 2;
break;
case "Mar":
case "MAR":
month = 3;
break;
case "Apr":
case "APR":
month = 4;
break;
case "May":
case "MAY":
month = 5;
break;
case "Jun":
case "JUN":
month = 6;
break;
case "Jul":
month = 7;
break;
case "Aug":
case "AUG":
month = 8;
break;
case "Sep":
case "SEP":
month = 9;
break;
case "Oct":
case "OCT":
month = 10;
break;
case "Nov":
case "NOV":
month = 11;
break;
case "Dec":
case "DEC":
month = 12;
break;
}
int year;
if (!string.IsNullOrEmpty(m.Groups["year"].Value))
{
year = int.Parse(m.Groups["year"].Value);
}
else
{
year = DefaultDate.Year;
}
DateTime date;
if (!ConvertToDate(year, month, int.Parse(m.Groups["day"].Value), out date))
{
return false;
}
parsed_date = new ParsedDateTime(index_of_date, length_of_date, -1, -1, date);
return true;
}
return false;
}
private static bool ConvertToDate(int year, int month, int day, out DateTime date)
{
if (year >= 100)
{
if (year < 1000)
{
date = new DateTime(1, 1, 1);
return false;
}
}
else
if (year > 30)
{
year += 1900;
}
else
{
year += 2000;
}
try
{
date = new DateTime(year, month, day);
}
catch
{
date = new DateTime(1, 1, 1);
return false;
}
return true;
}
}
}

@ -91,7 +91,7 @@ namespace NzbDrone.Core.Parser
}
else
{
throw new Exception("TimeAgo parsing failed, unknown unit: " + unit);
throw new InvalidDateException("TimeAgo parsing failed, unknown unit: " + unit);
}
}
@ -102,19 +102,17 @@ namespace NzbDrone.Core.Parser
// http://www.codeproject.com/Articles/33298/C-Date-Time-Parser
public static DateTime FromFuzzyTime(string str, string format = null)
{
//var dtFormat = format == "UK" ?
// DateTimeRoutines.DateTimeRoutines.DateTimeFormat.UkDate :
// DateTimeRoutines.DateTimeRoutines.DateTimeFormat.UsaDate;
//if (DateTimeRoutines.DateTimeRoutines.TryParseDateOrTime(
// str, dtFormat, out DateTimeRoutines.DateTimeRoutines.ParsedDateTime dt))
// return dt.DateTime;
if (DateTime.TryParse(str, out var dateTimeParsed))
var dtFormat = format == "UK" ?
DateTimeRoutines.DateTimeFormat.UKDate :
DateTimeRoutines.DateTimeFormat.UKDate;
if (DateTimeRoutines.TryParseDateOrTime(
str, dtFormat, out var dt))
{
return dateTimeParsed;
return dt.DateTime;
}
throw new Exception($"FromFuzzyTime parsing failed for string {str}");
throw new InvalidDateException($"FromFuzzyTime parsing failed for string {str}");
}
public static DateTime FromUnknown(string str, string format = null)
@ -244,7 +242,7 @@ namespace NzbDrone.Core.Parser
}
catch (Exception ex)
{
throw new Exception($"DateTime parsing failed for \"{str}\": {ex}");
throw new InvalidDateException($"DateTime parsing failed for \"{str}\": {ex}");
}
}
@ -314,7 +312,7 @@ namespace NzbDrone.Core.Parser
}
catch (FormatException ex)
{
throw new FormatException($"Error while parsing DateTime \"{date}\", using layout \"{layout}\" ({pattern}): {ex.Message}");
throw new InvalidDateException($"Error while parsing DateTime \"{date}\", using layout \"{layout}\" ({pattern}): {ex.Message}");
}
}

Loading…
Cancel
Save