From caab337379bac985cc8afc5e09ff79f14cff7622 Mon Sep 17 00:00:00 2001 From: Bogdan Date: Thu, 23 Feb 2023 08:02:37 +0200 Subject: [PATCH] Fixed: (Cardigann) Parse text templates only if necessary --- .../Definitions/Cardigann/CardigannBase.cs | 6 +- .../Definitions/Cardigann/CardigannParser.cs | 316 +++++++++--------- 2 files changed, 156 insertions(+), 166 deletions(-) diff --git a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs index f4e98c1be..140a26d31 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannBase.cs @@ -349,11 +349,13 @@ namespace NzbDrone.Core.Indexers.Cardigann public string ApplyGoTemplateText(string template, Dictionary variables = null, TemplateTextModifier modifier = null) { - if (variables == null) + if (template.IsNullOrWhiteSpace() || !template.Contains("{{")) { - variables = GetBaseTemplateVariables(); + return template; } + variables ??= GetBaseTemplateVariables(); + // handle re_replace expression // Example: {{ re_replace .Query.Keywords "[^a-zA-Z0-9]+" "%" }} var reReplaceRegex = new Regex(@"{{\s*re_replace\s+(\..+?)\s+""(.*?)""\s+""(.*?)""\s*}}"); diff --git a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannParser.cs b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannParser.cs index 5e841d7c9..ed2fb6e75 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannParser.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Cardigann/CardigannParser.cs @@ -165,196 +165,188 @@ namespace NzbDrone.Core.Indexers.Cardigann } else { - try + IHtmlCollection rowsDom; + + if (request.SearchPath.Response != null && request.SearchPath.Response.Type.Equals("xml")) { - IHtmlCollection rowsDom; + var searchResultParser = new XmlParser(); + var searchResultDocument = searchResultParser.ParseDocument(results); - if (request.SearchPath.Response != null && request.SearchPath.Response.Type.Equals("xml")) + if (search.Preprocessingfilters != null) { - var searchResultParser = new XmlParser(); - var searchResultDocument = searchResultParser.ParseDocument(results); - - if (search.Preprocessingfilters != null) - { - results = ApplyFilters(results, search.Preprocessingfilters, variables); - searchResultDocument = searchResultParser.ParseDocument(results); - _logger.Trace(string.Format("CardigannIndexer ({0}): result after preprocessingfilters: {1}", _definition.Id, results)); - } - - var rowsSelector = ApplyGoTemplateText(search.Rows.Selector, variables); - rowsDom = searchResultDocument.QuerySelectorAll(rowsSelector); + results = ApplyFilters(results, search.Preprocessingfilters, variables); + searchResultDocument = searchResultParser.ParseDocument(results); + _logger.Trace(string.Format("CardigannIndexer ({0}): result after preprocessingfilters: {1}", _definition.Id, results)); } - else - { - var searchResultParser = new HtmlParser(); - var searchResultDocument = searchResultParser.ParseDocument(results); - if (search.Preprocessingfilters != null) - { - results = ApplyFilters(results, search.Preprocessingfilters, variables); - searchResultDocument = searchResultParser.ParseDocument(results); - _logger.Trace(string.Format("CardigannIndexer ({0}): result after preprocessingfilters: {1}", _definition.Id, results)); - } - - var rowsSelector = ApplyGoTemplateText(search.Rows.Selector, variables); - rowsDom = searchResultDocument.QuerySelectorAll(rowsSelector); - } + var rowsSelector = ApplyGoTemplateText(search.Rows.Selector, variables); + rowsDom = searchResultDocument.QuerySelectorAll(rowsSelector); + } + else + { + var searchResultParser = new HtmlParser(); + var searchResultDocument = searchResultParser.ParseDocument(results); - var rows = new List(); - foreach (var rowDom in rowsDom) + if (search.Preprocessingfilters != null) { - rows.Add(rowDom); + results = ApplyFilters(results, search.Preprocessingfilters, variables); + searchResultDocument = searchResultParser.ParseDocument(results); + _logger.Trace(string.Format("CardigannIndexer ({0}): result after preprocessingfilters: {1}", _definition.Id, results)); } - // merge following rows for After selector - var after = search.Rows.After; - if (after > 0) + var rowsSelector = ApplyGoTemplateText(search.Rows.Selector, variables); + rowsDom = searchResultDocument.QuerySelectorAll(rowsSelector); + } + + var rows = new List(); + foreach (var rowDom in rowsDom) + { + rows.Add(rowDom); + } + + // merge following rows for After selector + var after = search.Rows.After; + if (after > 0) + { + for (var i = 0; i < rows.Count; i += 1) { - for (var i = 0; i < rows.Count; i += 1) + var currentRow = rows[i]; + for (var j = 0; j < after; j += 1) { - var currentRow = rows[i]; - for (var j = 0; j < after; j += 1) + var mergeRowIndex = i + j + 1; + var mergeRow = rows[mergeRowIndex]; + var mergeNodes = new List(); + foreach (var node in mergeRow.ChildNodes) { - var mergeRowIndex = i + j + 1; - var mergeRow = rows[mergeRowIndex]; - var mergeNodes = new List(); - foreach (var node in mergeRow.ChildNodes) - { - mergeNodes.Add(node); - } - - currentRow.Append(mergeNodes.ToArray()); + mergeNodes.Add(node); } - rows.RemoveRange(i + 1, after); + currentRow.Append(mergeNodes.ToArray()); } + + rows.RemoveRange(i + 1, after); } + } - foreach (var row in rows) + foreach (var row in rows) + { + try { - try + var release = new TorrentInfo(); + + // Parse fields + foreach (var field in search.Fields) { - var release = new TorrentInfo(); + var fieldParts = field.Key.Split('|'); + var fieldName = fieldParts[0]; + var fieldModifiers = new List(); + for (var i = 1; i < fieldParts.Length; i++) + { + fieldModifiers.Add(fieldParts[i]); + } - // Parse fields - foreach (var field in search.Fields) + string value = null; + var variablesKey = ".Result." + fieldName; + var isOptional = OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional; + try { - var fieldParts = field.Key.Split('|'); - var fieldName = fieldParts[0]; - var fieldModifiers = new List(); - for (var i = 1; i < fieldParts.Length; i++) + value = HandleSelector(field.Value, row, variables, !isOptional); + + if (isOptional && string.IsNullOrWhiteSpace(value)) { - fieldModifiers.Add(fieldParts[i]); + variables[variablesKey] = null; + continue; } - string value = null; - var variablesKey = ".Result." + fieldName; - var isOptional = OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional; - try + variables[variablesKey] = ParseFields(value, fieldName, release, fieldModifiers, searchUrlUri); + } + catch (Exception ex) + { + if (!variables.ContainsKey(variablesKey)) { - value = HandleSelector(field.Value, row, variables, !isOptional); - - if (isOptional && string.IsNullOrWhiteSpace(value)) - { - variables[variablesKey] = null; - continue; - } - - variables[variablesKey] = ParseFields(value, fieldName, release, fieldModifiers, searchUrlUri); + variables[variablesKey] = null; } - catch (Exception ex) - { - if (!variables.ContainsKey(variablesKey)) - { - variables[variablesKey] = null; - } - if (OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional) - { - variables[variablesKey] = null; - continue; - } + if (OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional) + { + variables[variablesKey] = null; + continue; + } - if (indexerLogging) - { - _logger.Trace("Error while parsing field={0}, selector={1}, value={2}: {3}", field.Key, field.Value.Selector, value == null ? "" : value, ex.Message); - } + if (indexerLogging) + { + _logger.Trace("Error while parsing field={0}, selector={1}, value={2}: {3}", field.Key, field.Value.Selector, value == null ? "" : value, ex.Message); } } + } - var filters = search.Rows.Filters; - var skipRelease = ParseRowFilters(filters, release, variables, row.ToHtmlPretty()); + var filters = search.Rows.Filters; + var skipRelease = ParseRowFilters(filters, release, variables, row.ToHtmlPretty()); + + if (skipRelease) + { + continue; + } - if (skipRelease) + // if DateHeaders is set go through the previous rows and look for the header selector + var dateHeaders = _definition.Search.Rows.Dateheaders; + if (release.PublishDate == DateTime.MinValue && dateHeaders != null) + { + var prevRow = row.PreviousElementSibling; + string value = null; + if (prevRow == null) { - continue; + // continue with parent + var parent = row.ParentElement; + if (parent != null) + { + prevRow = parent.PreviousElementSibling; + } } - // if DateHeaders is set go through the previous rows and look for the header selector - var dateHeaders = _definition.Search.Rows.Dateheaders; - if (release.PublishDate == DateTime.MinValue && dateHeaders != null) + while (prevRow != null) { - var prevRow = row.PreviousElementSibling; - string value = null; + var curRow = prevRow; + _logger.Debug(prevRow.OuterHtml); + try + { + value = HandleSelector(dateHeaders, curRow); + break; + } + catch (Exception) + { + // do nothing + } + + prevRow = curRow.PreviousElementSibling; if (prevRow == null) { // continue with parent - var parent = row.ParentElement; + var parent = curRow.ParentElement; if (parent != null) { prevRow = parent.PreviousElementSibling; } } + } - while (prevRow != null) - { - var curRow = prevRow; - _logger.Debug(prevRow.OuterHtml); - try - { - value = HandleSelector(dateHeaders, curRow); - break; - } - catch (Exception) - { - // do nothing - } - - prevRow = curRow.PreviousElementSibling; - if (prevRow == null) - { - // continue with parent - var parent = curRow.ParentElement; - if (parent != null) - { - prevRow = parent.PreviousElementSibling; - } - } - } - - if (value == null && dateHeaders.Optional == false) - { - throw new CardigannException(string.Format("No date header row found for {0}", release.ToString())); - } - - if (value != null) - { - release.PublishDate = DateTimeUtil.FromUnknown(value); - } + if (value == null && dateHeaders.Optional == false) + { + throw new CardigannException(string.Format("No date header row found for {0}", release.ToString())); } - releases.Add(release); - } - catch (Exception ex) - { - _logger.Error(ex, "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}", _definition.Id, row.ToHtmlPretty()); + if (value != null) + { + release.PublishDate = DateTimeUtil.FromUnknown(value); + } } + + releases.Add(release); + } + catch (Exception ex) + { + _logger.Error(ex, "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}", _definition.Id, row.ToHtmlPretty()); } - } - catch (Exception) - { - // OnParseError(results, ex); - throw; } } @@ -425,11 +417,7 @@ namespace NzbDrone.Core.Indexers.Cardigann break; case "comments": var commentsUrl = ResolvePath(value, searchUrlUri); - if (release.CommentUrl == null) - { - release.CommentUrl = commentsUrl.AbsoluteUri; - } - + release.CommentUrl ??= commentsUrl.AbsoluteUri; value = commentsUrl.ToString(); break; case "title": @@ -554,38 +542,38 @@ namespace NzbDrone.Core.Indexers.Cardigann value = release.ImdbId.ToString(); break; case "tmdbid": - var tmdbIDRegEx = new Regex(@"(\d+)", RegexOptions.Compiled); - var tmdbIDMatch = tmdbIDRegEx.Match(value); - var tmdbID = tmdbIDMatch.Groups[1].Value; - release.TmdbId = (int)ParseUtil.CoerceLong(tmdbID); + var tmdbIdRegex = new Regex(@"(\d+)", RegexOptions.Compiled); + var tmdbIdMatch = tmdbIdRegex.Match(value); + var tmdbId = tmdbIdMatch.Groups[1].Value; + release.TmdbId = (int)ParseUtil.CoerceLong(tmdbId); value = release.TmdbId.ToString(); break; case "rageid": - var rageIDRegEx = new Regex(@"(\d+)", RegexOptions.Compiled); - var rageIDMatch = rageIDRegEx.Match(value); - var rageID = rageIDMatch.Groups[1].Value; - release.TvRageId = (int)ParseUtil.CoerceLong(rageID); + var rageIdRegex = new Regex(@"(\d+)", RegexOptions.Compiled); + var rageIdMatch = rageIdRegex.Match(value); + var rageId = rageIdMatch.Groups[1].Value; + release.TvRageId = (int)ParseUtil.CoerceLong(rageId); value = release.TvRageId.ToString(); break; case "traktid": - var traktIDRegEx = new Regex(@"(\d+)", RegexOptions.Compiled); - var traktIDMatch = traktIDRegEx.Match(value); - var traktID = traktIDMatch.Groups[1].Value; - release.TraktId = (int)ParseUtil.CoerceLong(traktID); + var traktIdRegex = new Regex(@"(\d+)", RegexOptions.Compiled); + var traktIdMatch = traktIdRegex.Match(value); + var traktId = traktIdMatch.Groups[1].Value; + release.TraktId = (int)ParseUtil.CoerceLong(traktId); value = release.TraktId.ToString(); break; case "tvdbid": - var tvdbIdRegEx = new Regex(@"(\d+)", RegexOptions.Compiled); - var tvdbIdMatch = tvdbIdRegEx.Match(value); + var tvdbIdRegex = new Regex(@"(\d+)", RegexOptions.Compiled); + var tvdbIdMatch = tvdbIdRegex.Match(value); var tvdbId = tvdbIdMatch.Groups[1].Value; release.TvdbId = (int)ParseUtil.CoerceLong(tvdbId); value = release.TvdbId.ToString(); break; case "doubanid": - var doubanIDRegEx = new Regex(@"(\d+)", RegexOptions.Compiled); - var doubanIDMatch = doubanIDRegEx.Match(value); - var doubanID = doubanIDMatch.Groups[1].Value; - release.DoubanId = (int)ParseUtil.CoerceLong(doubanID); + var doubanIdRegex = new Regex(@"(\d+)", RegexOptions.Compiled); + var doubanIdMatch = doubanIdRegex.Match(value); + var doubanId = doubanIdMatch.Groups[1].Value; + release.DoubanId = (int)ParseUtil.CoerceLong(doubanId); value = release.DoubanId.ToString(); break; case "poster":