From 6b886b938c08801c2854a55e7e257045e12b9e06 Mon Sep 17 00:00:00 2001 From: Qstick Date: Tue, 7 Dec 2021 18:42:38 -0600 Subject: [PATCH] New: Better Fuzzy DateTime Parse --- .../Definitions/Cardigann/CardigannBase.cs | 2 +- src/NzbDrone.Core/Parser/DateTimeRoutines.cs | 352 ++++++++++++++++++ src/NzbDrone.Core/Parser/DateTimeUtil.cs | 24 +- 3 files changed, 364 insertions(+), 14 deletions(-) create mode 100644 src/NzbDrone.Core/Parser/DateTimeRoutines.cs diff --git a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs index 1b46b05e7..05fdeda25 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs @@ -633,7 +633,7 @@ namespace NzbDrone.Core.Indexers.Cardigann var date = DateTimeUtil.ParseDateTimeGoLang(data, layout); data = date.ToString(DateTimeUtil.Rfc1123ZPattern); } - catch (FormatException ex) + catch (InvalidDateException ex) { _logger.Debug(ex.Message); } diff --git a/src/NzbDrone.Core/Parser/DateTimeRoutines.cs b/src/NzbDrone.Core/Parser/DateTimeRoutines.cs new file mode 100644 index 000000000..27cdcc17b --- /dev/null +++ b/src/NzbDrone.Core/Parser/DateTimeRoutines.cs @@ -0,0 +1,352 @@ +//******************************************************************************************** +//Author: Sergey Stoyan, CliverSoft.com +// http://cliversoft.com +// stoyan@cliversoft.com +// sergey.stoyan@gmail.com +// 27 February 2007 +//******************************************************************************************** +using System; +using System.Text.RegularExpressions; + +namespace NzbDrone.Core.Parser +{ + public static class DateTimeRoutines + { + public class ParsedDateTime + { + public readonly int IndexOfDate = -1; + public readonly int LengthOfDate = -1; + public readonly int IndexOfTime = -1; + public readonly int LengthOfTime = -1; + public readonly DateTime DateTime; + public readonly bool IsDateFound; + public readonly bool IsTimeFound; + + internal ParsedDateTime(int index_of_date, int length_of_date, int index_of_time, int length_of_time, DateTime date_time) + { + IndexOfDate = index_of_date; + LengthOfDate = length_of_date; + IndexOfTime = index_of_time; + LengthOfTime = length_of_time; + DateTime = date_time; + IsDateFound = index_of_date > -1; + IsTimeFound = index_of_time > -1; + } + } + + public static DateTime DefaultDate + { + get + { + if (DefaultDateIsNow) + { + return DateTime.Now; + } + else + { + return _DefaultDate; + } + } + set + { + _DefaultDate = value; + DefaultDateIsNow = false; + } + } + + private static DateTime _DefaultDate = DateTime.Now; + + public static bool DefaultDateIsNow = true; + + public enum DateTimeFormat + { + USDate, + UKDate, + } + + public static bool TryParseDateOrTime(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_date_time) + { + parsed_date_time = null; + + ParsedDateTime parsed_date; + ParsedDateTime parsed_time; + if (!TryParseDate(str, default_format, out parsed_date)) + { + if (!TryParseTime(str, default_format, out parsed_time, null)) + { + return false; + } + + var date_time = new DateTime(DefaultDate.Year, DefaultDate.Month, DefaultDate.Day, parsed_time.DateTime.Hour, parsed_time.DateTime.Minute, parsed_time.DateTime.Second); + parsed_date_time = new ParsedDateTime(-1, -1, parsed_time.IndexOfTime, parsed_time.LengthOfTime, date_time); + } + else + { + if (!TryParseTime(str, default_format, out parsed_time, parsed_date)) + { + var date_time = new DateTime(parsed_date.DateTime.Year, parsed_date.DateTime.Month, parsed_date.DateTime.Day, 0, 0, 0); + parsed_date_time = new ParsedDateTime(parsed_date.IndexOfDate, parsed_date.LengthOfDate, -1, -1, date_time); + } + else + { + var date_time = new DateTime(parsed_date.DateTime.Year, parsed_date.DateTime.Month, parsed_date.DateTime.Day, parsed_time.DateTime.Hour, parsed_time.DateTime.Minute, parsed_time.DateTime.Second); + parsed_date_time = new ParsedDateTime(parsed_date.IndexOfDate, parsed_date.LengthOfDate, parsed_time.IndexOfTime, parsed_time.LengthOfTime, date_time); + } + } + + return true; + } + + public static bool TryParseTime(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_time, ParsedDateTime parsed_date) + { + parsed_time = null; + + Match m; + if (parsed_date != null && parsed_date.IndexOfDate > -1) + { + //look around the found date + //look for [h]h:mm[:ss] [PM/AM] + m = Regex.Match(str.Substring(parsed_date.IndexOfDate + parsed_date.LengthOfDate), @"(?<=^\s*,?\s+|^\s*at\s*|^\s*[T\-]\s*)(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[^\d\w])", RegexOptions.Compiled); + if (!m.Success) + { + //look for [h]h:mm:ss + m = Regex.Match(str.Substring(0, parsed_date.IndexOfDate), @"(?<=^|[^\d])(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[\s,]+)", RegexOptions.Compiled); + } + } + else + { + //look anywere within string + //look for [h]h:mm[:ss] [PM/AM] + m = Regex.Match(str, @"(?<=^|\s+|\s*T\s*)(?'hour'\d{1,2})\s*:\s*(?'minute'\d{2})\s*(?::\s*(?'second'\d{2}))?(?:\s*(?'ampm'AM|am|PM|pm))?(?=$|[^\d\w])", RegexOptions.Compiled); + } + + if (m.Success) + { + try + { + var hour = int.Parse(m.Groups["hour"].Value); + if (hour < 0 || hour > 23) + { + return false; + } + + var minute = int.Parse(m.Groups["minute"].Value); + if (minute < 0 || minute > 59) + { + return false; + } + + var second = 0; + if (!string.IsNullOrEmpty(m.Groups["second"].Value)) + { + second = int.Parse(m.Groups["second"].Value); + if (second < 0 || second > 59) + { + return false; + } + } + + if (string.Compare(m.Groups["ampm"].Value, "PM", true) == 0 && hour < 12) + { + hour += 12; + } + else if (string.Compare(m.Groups["ampm"].Value, "AM", true) == 0 && hour == 12) + { + hour -= 12; + } + + var date_time = new DateTime(1, 1, 1, hour, minute, second); + parsed_time = new ParsedDateTime(-1, -1, m.Index, m.Length, date_time); + } + catch + { + return false; + } + + return true; + } + + return false; + } + + public static bool TryParseDate(this string str, DateTimeFormat default_format, out ParsedDateTime parsed_date) + { + parsed_date = null; + + if (string.IsNullOrEmpty(str)) + { + return false; + } + + //look for dd/mm/yy + var m = Regex.Match(str, @"(?<=^|[^\d])(?'day'\d{1,2})\s*(?'separator'[\\/\.])+\s*(?'month'\d{1,2})\s*\'separator'+\s*(?'year'\d{2}|\d{4})(?=$|[^\d])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + if (m.Success) + { + DateTime date; + if ((default_format ^ DateTimeFormat.USDate) == DateTimeFormat.USDate) + { + if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["day"].Value), int.Parse(m.Groups["month"].Value), out date)) + { + return false; + } + } + else + { + if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["month"].Value), int.Parse(m.Groups["day"].Value), out date)) + { + return false; + } + } + + parsed_date = new ParsedDateTime(m.Index, m.Length, -1, -1, date); + return true; + } + + //look for [yy]yy-mm-dd + m = Regex.Match(str, @"(?<=^|[^\d])(?'year'\d{2}|\d{4})\s*(?'separator'[\-])\s*(?'month'\d{1,2})\s*\'separator'+\s*(?'day'\d{1,2})(?=$|[^\d])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + if (m.Success) + { + DateTime date; + if (!ConvertToDate(int.Parse(m.Groups["year"].Value), int.Parse(m.Groups["month"].Value), int.Parse(m.Groups["day"].Value), out date)) + { + return false; + } + + parsed_date = new ParsedDateTime(m.Index, m.Length, -1, -1, date); + return true; + } + + //look for month dd yyyy + m = Regex.Match(str, @"(?:^|[^\d\w])(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?\s*,?\s*(?'year'\d{4})(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + if (!m.Success) + { + //look for dd month [yy]yy + m = Regex.Match(str, @"(?:^|[^\d\w:])(?'day'\d{1,2})(?:-?st\s+|-?th\s+|-?rd\s+|-?nd\s+|-|\s+)(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*(?:\s*,?\s*|-)'?(?'year'\d{2}|\d{4})(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + } + + if (!m.Success) + { + //look for yyyy month dd + m = Regex.Match(str, @"(?:^|[^\d\w])(?'year'\d{4})\s+(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + } + + if (!m.Success) + { + //look for month dd [yyyy] + m = Regex.Match(str, @"(?:^|[^\d\w])(?'month'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[uarychilestmbro]*\s+(?'day'\d{1,2})(?:-?st|-?th|-?rd|-?nd)?(?:\s*,?\s*(?'year'\d{4}))?(?=$|[^\d\w])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + } + + if (m.Success) + { + var month = -1; + var index_of_date = m.Index; + var length_of_date = m.Length; + + switch (m.Groups["month"].Value) + { + case "Jan": + case "JAN": + month = 1; + break; + case "Feb": + case "FEB": + month = 2; + break; + case "Mar": + case "MAR": + month = 3; + break; + case "Apr": + case "APR": + month = 4; + break; + case "May": + case "MAY": + month = 5; + break; + case "Jun": + case "JUN": + month = 6; + break; + case "Jul": + month = 7; + break; + case "Aug": + case "AUG": + month = 8; + break; + case "Sep": + case "SEP": + month = 9; + break; + case "Oct": + case "OCT": + month = 10; + break; + case "Nov": + case "NOV": + month = 11; + break; + case "Dec": + case "DEC": + month = 12; + break; + } + + int year; + if (!string.IsNullOrEmpty(m.Groups["year"].Value)) + { + year = int.Parse(m.Groups["year"].Value); + } + else + { + year = DefaultDate.Year; + } + + DateTime date; + if (!ConvertToDate(year, month, int.Parse(m.Groups["day"].Value), out date)) + { + return false; + } + + parsed_date = new ParsedDateTime(index_of_date, length_of_date, -1, -1, date); + return true; + } + + return false; + } + + private static bool ConvertToDate(int year, int month, int day, out DateTime date) + { + if (year >= 100) + { + if (year < 1000) + { + date = new DateTime(1, 1, 1); + return false; + } + } + else + if (year > 30) + { + year += 1900; + } + else + { + year += 2000; + } + + try + { + date = new DateTime(year, month, day); + } + catch + { + date = new DateTime(1, 1, 1); + return false; + } + + return true; + } + } +} diff --git a/src/NzbDrone.Core/Parser/DateTimeUtil.cs b/src/NzbDrone.Core/Parser/DateTimeUtil.cs index 2e1422da8..2f6596350 100644 --- a/src/NzbDrone.Core/Parser/DateTimeUtil.cs +++ b/src/NzbDrone.Core/Parser/DateTimeUtil.cs @@ -91,7 +91,7 @@ namespace NzbDrone.Core.Parser } else { - throw new Exception("TimeAgo parsing failed, unknown unit: " + unit); + throw new InvalidDateException("TimeAgo parsing failed, unknown unit: " + unit); } } @@ -102,19 +102,17 @@ namespace NzbDrone.Core.Parser // http://www.codeproject.com/Articles/33298/C-Date-Time-Parser public static DateTime FromFuzzyTime(string str, string format = null) { - //var dtFormat = format == "UK" ? - // DateTimeRoutines.DateTimeRoutines.DateTimeFormat.UkDate : - // DateTimeRoutines.DateTimeRoutines.DateTimeFormat.UsaDate; - - //if (DateTimeRoutines.DateTimeRoutines.TryParseDateOrTime( - // str, dtFormat, out DateTimeRoutines.DateTimeRoutines.ParsedDateTime dt)) - // return dt.DateTime; - if (DateTime.TryParse(str, out var dateTimeParsed)) + var dtFormat = format == "UK" ? + DateTimeRoutines.DateTimeFormat.UKDate : + DateTimeRoutines.DateTimeFormat.UKDate; + + if (DateTimeRoutines.TryParseDateOrTime( + str, dtFormat, out var dt)) { - return dateTimeParsed; + return dt.DateTime; } - throw new Exception($"FromFuzzyTime parsing failed for string {str}"); + throw new InvalidDateException($"FromFuzzyTime parsing failed for string {str}"); } public static DateTime FromUnknown(string str, string format = null) @@ -244,7 +242,7 @@ namespace NzbDrone.Core.Parser } catch (Exception ex) { - throw new Exception($"DateTime parsing failed for \"{str}\": {ex}"); + throw new InvalidDateException($"DateTime parsing failed for \"{str}\": {ex}"); } } @@ -314,7 +312,7 @@ namespace NzbDrone.Core.Parser } catch (FormatException ex) { - throw new FormatException($"Error while parsing DateTime \"{date}\", using layout \"{layout}\" ({pattern}): {ex.Message}"); + throw new InvalidDateException($"Error while parsing DateTime \"{date}\", using layout \"{layout}\" ({pattern}): {ex.Message}"); } }