diff --git a/src/NzbDrone.Core/Indexers/Definitions/RuTracker.cs b/src/NzbDrone.Core/Indexers/Definitions/RuTracker.cs index 633a6bf85..074c58bb7 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/RuTracker.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/RuTracker.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Collections.Specialized; using System.Linq; -using System.Net.Http; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; @@ -25,10 +24,13 @@ namespace NzbDrone.Core.Indexers.Definitions public class RuTracker : TorrentIndexerBase { public override string Name => "RuTracker"; - public override string[] IndexerUrls => new[] { "https://rutracker.org/", "https://rutracker.net/" }; - private string LoginUrl => Settings.BaseUrl + "forum/login.php"; + public override string[] IndexerUrls => new[] + { + "https://rutracker.org/", + "https://rutracker.net/" + }; public override string Description => "RuTracker is a Semi-Private Russian torrent site with a thriving file-sharing community"; - public override string Language => "ru-org"; + public override string Language => "ru-RU"; public override Encoding Encoding => Encoding.GetEncoding("windows-1251"); public override DownloadProtocol Protocol => DownloadProtocol.Torrent; public override IndexerPrivacy Privacy => IndexerPrivacy.SemiPrivate; @@ -51,21 +53,24 @@ namespace NzbDrone.Core.Indexers.Definitions protected override async Task DoLogin() { - var requestBuilder = new HttpRequestBuilder(LoginUrl) + var loginUrl = $"{Settings.BaseUrl}forum/login.php"; + + var requestBuilder = new HttpRequestBuilder(loginUrl) { LogResponseContent = true, - AllowAutoRedirect = true, - Method = HttpMethod.Post + AllowAutoRedirect = true }; var cookies = Cookies; Cookies = null; - var authLoginRequest = requestBuilder + var authLoginRequest = requestBuilder.Post() .AddFormParameter("login_username", Settings.Username) .AddFormParameter("login_password", Settings.Password) .AddFormParameter("login", "Login") + .AddFormParameter("redirect", "index.php") .SetHeader("Content-Type", "application/x-www-form-urlencoded") + .SetHeader("Referer", loginUrl) .Build(); var response = await ExecuteAuth(authLoginRequest); @@ -1416,21 +1421,6 @@ namespace NzbDrone.Core.Indexers.Definitions return caps; } - - public override object RequestAction(string action, IDictionary query) - { - if (action == "getUrls") - { - var links = IndexerUrls; - - return new - { - options = links.Select(d => new { Value = d, Name = d }) - }; - } - - return null; - } } public class RuTrackerRequestGenerator : IIndexerRequestGenerator @@ -1446,16 +1436,14 @@ namespace NzbDrone.Core.Indexers.Definitions private IEnumerable GetPagedRequests(string term, int[] categories, int season = 0) { - var searchUrl = $"{_settings.BaseUrl.TrimEnd('/')}/forum/tracker.php"; - - var queryCollection = new NameValueCollection(); + var parameters = new NameValueCollection(); var searchString = term; // if the search string is empty use the getnew view if (searchString.IsNullOrWhiteSpace()) { - queryCollection.Add("nm", searchString); + parameters.Set("nm", searchString); } else { @@ -1466,19 +1454,28 @@ namespace NzbDrone.Core.Indexers.Definitions searchString += " Сезон: " + season; } - queryCollection.Add("nm", searchString); + parameters.Set("nm", searchString); } if (categories != null && categories.Length > 0) { - queryCollection.Add("f", string.Join(",", _capabilities.Categories.MapTorznabCapsToTrackers(categories))); + parameters.Set("f", string.Join(",", _capabilities.Categories.MapTorznabCapsToTrackers(categories))); } - searchUrl = searchUrl + "?" + queryCollection.GetQueryString(); + var searchUrl = $"{_settings.BaseUrl}forum/tracker.php"; - var request = new IndexerRequest(searchUrl, HttpAccept.Html); + if (parameters.Count > 0) + { + searchUrl += $"?{parameters.GetQueryString()}"; + } - request.HttpRequest.AllowAutoRedirect = false; + var request = new IndexerRequest(searchUrl, HttpAccept.Html) + { + HttpRequest = + { + AllowAutoRedirect = false + } + }; yield return request; } @@ -1542,6 +1539,8 @@ namespace NzbDrone.Core.Indexers.Definitions private readonly RuTrackerSettings _settings; private readonly IndexerCapabilitiesCategories _categories; + private readonly RuTrackerTitleParser _titleParser = new (); + public RuTrackerParser(RuTrackerSettings settings, IndexerCapabilitiesCategories categories) { _settings = settings; @@ -1578,12 +1577,12 @@ namespace NzbDrone.Core.Indexers.Definitions return null; } - var link = _settings.BaseUrl + "forum/" + qDownloadLink.GetAttribute("href"); - var qDetailsLink = row.QuerySelector("td.t-title-col > div.t-title > a.tLink"); - var details = _settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href"); + var infoUrl = _settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href"); + var downloadUrl = _settings.BaseUrl + "forum/" + qDownloadLink.GetAttribute("href"); - var category = GetCategoryOfRelease(row); + var title = qDetailsLink.TextContent.Trim(); + var categories = GetCategoryOfRelease(row); var size = GetSizeOfRelease(row); @@ -1596,139 +1595,224 @@ namespace NzbDrone.Core.Indexers.Definitions var release = new TorrentInfo { - MinimumRatio = 1, - MinimumSeedTime = 0, - Title = qDetailsLink.TextContent, - InfoUrl = details, - DownloadUrl = link, - Guid = details, + Guid = infoUrl, + InfoUrl = infoUrl, + DownloadUrl = downloadUrl, + Title = _titleParser.Parse(title, categories, _settings.RussianLetters, _settings.MoveFirstTagsToEndOfReleaseTitle, _settings.MoveAllTagsToEndOfReleaseTitle), + Description = title, + Categories = categories, Size = size, Seeders = seeders, Peers = leechers + seeders, Grabs = grabs, PublishDate = publishDate, - Categories = category, DownloadVolumeFactor = 1, - UploadVolumeFactor = 1 + UploadVolumeFactor = 1, + MinimumRatio = 1, + MinimumSeedTime = 0 }; - // TODO finish extracting release variables to simplify release initialization - if (IsAnyTvCategory(release.Categories)) + return release; + } + + private int GetSeedersOfRelease(in IElement row) + { + var seeders = 0; + var qSeeders = row.QuerySelector("td:nth-child(7)"); + if (qSeeders != null && !qSeeders.TextContent.Contains("дн")) { - // extract season and episodes - // should also handle multi-season releases listed as Сезон: 1-8 and Сезоны: 1-8 - var regex = new Regex(@".+\/\s([^а-яА-я\/]+)\s\/.+Сезон.\s*[:]*\s+(\d*\-?\d*).+(?:Серии|Эпизод)+\s*[:]*\s+(\d+-?\d*).+(\[.*\])[\s]?(.*)"); - - var title = regex.Replace(release.Title, "$1 - S$2E$3 - rus $4 $5"); - title = Regex.Replace(title, "-Rip", "Rip", RegexOptions.IgnoreCase); - title = Regex.Replace(title, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase); - title = Regex.Replace(title, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase); - title = Regex.Replace(title, "HDTVRip", "HDTV", RegexOptions.IgnoreCase); - title = Regex.Replace(title, "Кураж-Бамбей", "kurazh", RegexOptions.IgnoreCase); - - release.Title = title; + var seedersString = qSeeders.QuerySelector("b").TextContent; + if (!string.IsNullOrWhiteSpace(seedersString)) + { + seeders = ParseUtil.CoerceInt(seedersString); + } } - else if (IsAnyMovieCategory(release.Categories)) + + return seeders; + } + + private ICollection GetCategoryOfRelease(in IElement row) + { + var forum = row.QuerySelector("td.f-name-col > div.f-name > a")?.GetAttribute("href"); + var cat = ParseUtil.GetArgumentFromQueryString(forum, "f"); + + return _categories.MapTrackerCatToNewznab(cat); + } + + private long GetSizeOfRelease(in IElement row) + { + return ParseUtil.GetBytes(row.QuerySelector("td.tor-size").GetAttribute("data-ts_text")); + } + + private DateTime GetPublishDateOfRelease(in IElement row) + { + return DateTimeUtil.UnixTimestampToDateTime(long.Parse(row.QuerySelector("td:nth-child(10)").GetAttribute("data-ts_text"))); + } + + public Action, DateTime?> CookiesUpdater { get; set; } + } + + public class RuTrackerTitleParser + { + private static readonly List FindTagsInTitlesRegexList = new () + { + new Regex(@"\((?>\((?)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"), + new Regex(@"\[(?>\[(?)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]") + }; + + private readonly Regex _stripCyrillicRegex = new (@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private readonly Regex _tvTitleCommaRegex = new (@"\s(\d+),(\d+)", RegexOptions.Compiled); + private readonly Regex _tvTitleCyrillicXRegex = new (@"([\s-])Х+([\s\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private readonly Regex _tvTitleRusSeasonEpisodeOfRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleRusSeasonEpisodeRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleRusSeasonRegex = new (@"Сезон\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleRusEpisodeOfRegex = new (@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleRusEpisodeRegex = new (@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public string Parse(string title, ICollection categories, bool stripCyrillicLetters = true, bool moveFirstTagsToEndOfReleaseTitle = false, bool moveAllTagsToEndOfReleaseTitle = false) + { + // https://www.fileformat.info/info/unicode/category/Pd/list.htm + title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + // replace double 4K quality in title + title = Regex.Replace(title, @"\b(2160p), 4K\b", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + if (IsAnyTvCategory(categories)) + { + title = _tvTitleCommaRegex.Replace(title, " $1-$2"); + title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2"); + + title = _tvTitleRusSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3"); + title = _tvTitleRusSeasonEpisodeRegex.Replace(title, "S$1E$2"); + title = _tvTitleRusSeasonRegex.Replace(title, "S$1"); + title = _tvTitleRusEpisodeOfRegex.Replace(title, "E$1 of $2"); + title = _tvTitleRusEpisodeRegex.Replace(title, "E$1"); + } + else if (IsAnyMovieCategory(categories)) { // Bluray quality fix: radarr parse Blu-ray Disc as Bluray-1080p but should be BR-DISK - release.Title = Regex.Replace(release.Title, "Blu-ray Disc", "BR-DISK", RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bBlu-ray Disc\b", "BR-DISK", RegexOptions.Compiled | RegexOptions.IgnoreCase); } - if (IsAnyTvCategory(release.Categories) | IsAnyMovieCategory(release.Categories)) + if (IsAnyTvCategory(categories) | IsAnyMovieCategory(categories)) { // remove director's name from title // rutracker movies titles look like: russian name / english name (russian director / english director) other stuff // Ирландец / The Irishman (Мартин Скорсезе / Martin Scorsese) [2019, США, криминал, драма, биография, WEB-DL 1080p] Dub (Пифагор) + MVO (Jaskier) + AVO (Юрий Сербин) + Sub Rus, Eng + Original Eng // this part should be removed: (Мартин Скорсезе / Martin Scorsese) - //var director = new Regex(@"(\([А-Яа-яЁё\W]+)\s/\s(.+?)\)"); - var director = new Regex(@"(\([А-Яа-яЁё\W].+?\))"); - release.Title = director.Replace(release.Title, ""); + title = Regex.Replace(title, @"(\([\p{IsCyrillic}\W]+)\s/\s(.+?)\)", string.Empty, RegexOptions.Compiled | RegexOptions.IgnoreCase); // Remove VO, MVO and DVO from titles var vo = new Regex(@".VO\s\(.+?\)"); - release.Title = vo.Replace(release.Title, ""); + title = vo.Replace(title, string.Empty); // Remove R5 and (R5) from release names var r5 = new Regex(@"(.*)(.R5.)(.*)"); - release.Title = r5.Replace(release.Title, "$1"); + title = r5.Replace(title, "$1"); // Remove Sub languages from release names - var sub = new Regex(@"(Sub.*\+)|(Sub.*$)"); - release.Title = sub.Replace(release.Title, ""); + title = Regex.Replace(title, @"(\bSub\b.*$|\b[\+]*Sub[\+]*\b)", string.Empty); // language fix: all rutracker releases contains russian track - if (release.Title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0) + if (title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0) { - release.Title += " rus"; - } - - // remove russian letters - if (_settings.RussianLetters == true) - { - //Strip russian letters - var rusRegex = new Regex(@"(\([А-Яа-яЁё\W]+\))|(^[А-Яа-яЁё\W\d]+\/ )|([а-яА-ЯЁё \-]+,+)|([а-яА-ЯЁё]+)"); - - release.Title = rusRegex.Replace(release.Title, ""); - - // Replace everything after first forward slash with a year (to avoid filtering away releases with an fwdslash after title+year, like: Title Year [stuff / stuff]) - var fwdslashRegex = new Regex(@"(\/\s.+?\[)"); - release.Title = fwdslashRegex.Replace(release.Title, "["); + title += " rus"; } } - return release; - } + if (stripCyrillicLetters) + { + title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-'); + } - private int GetSeedersOfRelease(in IElement row) - { - var seeders = 0; - var qSeeders = row.QuerySelector("td:nth-child(7)"); - if (qSeeders != null && !qSeeders.TextContent.Contains("дн")) + if (moveAllTagsToEndOfReleaseTitle) { - var seedersString = qSeeders.QuerySelector("b").TextContent; - if (!string.IsNullOrWhiteSpace(seedersString)) - { - seeders = ParseUtil.CoerceInt(seedersString); - } + title = MoveAllTagsToEndOfReleaseTitle(title); + } + else if (moveFirstTagsToEndOfReleaseTitle) + { + title = MoveFirstTagsToEndOfReleaseTitle(title); } - return seeders; - } + title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bHDTVRip\b", "HDTV", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bWEB-DLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\bКураж-Бамбей\b", "kurazh", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private ICollection GetCategoryOfRelease(in IElement row) - { - var forum = row.QuerySelector("td.f-name-col > div.f-name > a"); - var forumid = forum.GetAttribute("href").Split('=')[1]; - return _categories.MapTrackerCatToNewznab(forumid); + title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled); + title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled); + + title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled); + + title = Regex.Replace(title, @"\s+\+(?:\s+\+)+\s+", " + ", RegexOptions.Compiled); + + title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':'); + + // replace multiple spaces with a single space + title = Regex.Replace(title, @"\s+", " "); + + return title.Trim(); } - private long GetSizeOfRelease(in IElement row) + private static bool IsAnyTvCategory(ICollection category) { - var qSize = row.QuerySelector("td.tor-size"); - var size = ParseUtil.GetBytes(qSize.GetAttribute("data-ts_text")); - return size; + return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCat => category.Contains(subCat)); } - private DateTime GetPublishDateOfRelease(in IElement row) + private static bool IsAnyMovieCategory(ICollection category) { - var timestr = row.QuerySelector("td:nth-child(10)").GetAttribute("data-ts_text"); - var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(timestr)); - return publishDate; + return category.Contains(NewznabStandardCategory.Movies) || NewznabStandardCategory.Movies.SubCategories.Any(subCat => category.Contains(subCat)); } - private bool IsAnyTvCategory(ICollection category) + private static string MoveAllTagsToEndOfReleaseTitle(string input) { - return category.Contains(NewznabStandardCategory.TV) - || NewznabStandardCategory.TV.SubCategories.Any(subCat => category.Contains(subCat)); + var output = input; + foreach (var findTagsRegex in FindTagsInTitlesRegexList) + { + foreach (Match match in findTagsRegex.Matches(input)) + { + var tag = match.ToString(); + output = $"{output.Replace(tag, "")} {tag}".Trim(); + } + } + + return output.Trim(); } - private bool IsAnyMovieCategory(ICollection category) + private static string MoveFirstTagsToEndOfReleaseTitle(string input) { - return category.Contains(NewznabStandardCategory.Movies) - || NewznabStandardCategory.Movies.SubCategories.Any(subCat => category.Contains(subCat)); - } + var output = input; + foreach (var findTagsRegex in FindTagsInTitlesRegexList) + { + var expectedIndex = 0; + foreach (Match match in findTagsRegex.Matches(output)) + { + if (match.Index > expectedIndex) + { + var substring = output.Substring(expectedIndex, match.Index - expectedIndex); + if (string.IsNullOrWhiteSpace(substring)) + { + expectedIndex = match.Index; + } + else + { + break; + } + } + + var tag = match.ToString(); + var regex = new Regex(Regex.Escape(tag)); + output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim(); + expectedIndex += tag.Length; + } + } - public Action, DateTime?> CookiesUpdater { get; set; } + return output.Trim(); + } } public class RuTrackerSettings : UserPassTorrentBaseSettings @@ -1736,9 +1820,17 @@ namespace NzbDrone.Core.Indexers.Definitions public RuTrackerSettings() { RussianLetters = false; + MoveFirstTagsToEndOfReleaseTitle = false; + MoveAllTagsToEndOfReleaseTitle = false; } - [FieldDefinition(4, Label = "Strip Russian letters", Type = FieldType.Checkbox, SelectOptionsProviderAction = "stripRussian", HelpText = "Removes russian letters")] + [FieldDefinition(4, Label = "Strip Russian letters", Type = FieldType.Checkbox, HelpText = "Removes russian letters")] public bool RussianLetters { get; set; } + + [FieldDefinition(5, Label = "Move first tags to end of release title", Type = FieldType.Checkbox)] + public bool MoveFirstTagsToEndOfReleaseTitle { get; set; } + + [FieldDefinition(6, Label = "Move all tags to end of release title", Type = FieldType.Checkbox)] + public bool MoveAllTagsToEndOfReleaseTitle { get; set; } } } diff --git a/src/NzbDrone.Core/Indexers/Definitions/Toloka.cs b/src/NzbDrone.Core/Indexers/Definitions/Toloka.cs index 5d85c206d..4aef0d744 100644 --- a/src/NzbDrone.Core/Indexers/Definitions/Toloka.cs +++ b/src/NzbDrone.Core/Indexers/Definitions/Toloka.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Net.Http; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; @@ -52,16 +51,15 @@ namespace NzbDrone.Core.Indexers.Definitions protected override async Task DoLogin() { - var loginUrl = Settings.BaseUrl + "login.php"; + var loginUrl = $"{Settings.BaseUrl}login.php"; var requestBuilder = new HttpRequestBuilder(loginUrl) { LogResponseContent = true, - AllowAutoRedirect = true, - Method = HttpMethod.Post + AllowAutoRedirect = true }; - var authLoginRequest = requestBuilder + var authLoginRequest = requestBuilder.Post() .AddFormParameter("username", Settings.Username) .AddFormParameter("password", Settings.Password) .AddFormParameter("autologin", "on") @@ -76,8 +74,6 @@ namespace NzbDrone.Core.Indexers.Definitions if (CheckIfLoginNeeded(response)) { - _logger.Debug(response.Content); - var parser = new HtmlParser(); var dom = parser.ParseDocument(response.Content); var errorMessage = dom.QuerySelector("table.forumline table span.gen")?.FirstChild?.TextContent; @@ -328,16 +324,12 @@ namespace NzbDrone.Core.Indexers.Definitions }; var queryCats = _capabilities.Categories.MapTorznabCapsToTrackers(categories); - if (queryCats.Any()) { - foreach (var cat in queryCats) - { - parameters.Add("f[]", $"{cat}"); - } + queryCats.ForEach(cat => parameters.Add("f[]", $"{cat}")); } - var searchUrl = _settings.BaseUrl + "tracker.php"; + var searchUrl = $"{_settings.BaseUrl}tracker.php"; if (parameters.Count > 0) { @@ -358,6 +350,8 @@ namespace NzbDrone.Core.Indexers.Definitions private readonly TolokaSettings _settings; private readonly IndexerCapabilitiesCategories _categories; + private readonly TolokaTitleParser _titleParser = new (); + public TolokaParser(TolokaSettings settings, IndexerCapabilitiesCategories categories) { _settings = settings; @@ -383,10 +377,9 @@ namespace NzbDrone.Core.Indexers.Definitions } var infoUrl = _settings.BaseUrl + row.QuerySelector("td:nth-child(3) > a")?.GetAttribute("href"); + var title = row.QuerySelector("td:nth-child(3) > a")?.TextContent.Trim() ?? string.Empty; - var title = row.QuerySelector("td:nth-child(3) > a").TextContent.Trim(); - - var categoryLink = row.QuerySelector("td:nth-child(2) > a").GetAttribute("href"); + var categoryLink = row.QuerySelector("td:nth-child(2) > a")?.GetAttribute("href") ?? string.Empty; var cat = ParseUtil.GetArgumentFromQueryString(categoryLink, "f"); var categories = _categories.MapTrackerCatToNewznab(cat); @@ -394,14 +387,15 @@ namespace NzbDrone.Core.Indexers.Definitions var peers = seeders + ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(11) > b")?.TextContent.Trim()); // 2023-01-21 - var added = row.QuerySelector("td:nth-child(13)").TextContent.Trim(); + var added = row.QuerySelector("td:nth-child(13)")?.TextContent.Trim() ?? string.Empty; var release = new TorrentInfo { Guid = infoUrl, InfoUrl = infoUrl, DownloadUrl = _settings.BaseUrl + downloadUrl, - Title = CleanTitle(title, categories, _settings.StripCyrillicLetters), + Title = _titleParser.Parse(title, categories, _settings.StripCyrillicLetters), + Description = title, Categories = categories, Seeders = seeders, Peers = peers, @@ -420,27 +414,65 @@ namespace NzbDrone.Core.Indexers.Definitions return releaseInfos.ToArray(); } - private static bool IsAnyTvCategory(ICollection category) - { - return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCategory => category.Contains(subCategory)); - } + public Action, DateTime?> CookiesUpdater { get; set; } + } - private static string CleanTitle(string title, ICollection categories, bool stripCyrillicLetters = true) + public class TolokaTitleParser + { + private static readonly List FindTagsInTitlesRegexList = new () { - var tvShowTitleRegex = new Regex(".+\\/\\s([^а-яА-я\\/]+)\\s\\/.+Сезон\\s*[:]*\\s+(\\d+).+(?:Серії|Епізод)+\\s*[:]*\\s+(\\d+-*\\d*).+,\\s+(.+)\\]\\s(.+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - var stripCyrillicRegex = new Regex(@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + new Regex(@"\((?>\((?)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"), + new Regex(@"\[(?>\[(?)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]") + }; + + private readonly Regex _tvTitleCommaRegex = new (@"\s(\d+),(\d+)", RegexOptions.Compiled); + private readonly Regex _tvTitleCyrillicXRegex = new (@"([\s-])Х+([\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private readonly Regex _tvTitleMultipleSeasonsRegex = new (@"(?:Сезон|Seasons?)\s*[:]*\s+(\d+-\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleUkrSeasonEpisodeOfRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleUkrSeasonEpisodeRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleUkrSeasonRegex = new (@"Сезон\s*[:]*\s+(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleUkrEpisodeOfRegex = new (@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleUkrEpisodeRegex = new (@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private readonly Regex _tvTitleEngSeasonEpisodeOfRegex = new (@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleEngSeasonEpisodeRegex = new (@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleEngSeasonRegex = new (@"Season\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleEngEpisodeOfRegex = new (@"(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + private readonly Regex _tvTitleEngEpisodeRegex = new (@"(?:Episodes?)+\s*[:]+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + private readonly Regex _stripCyrillicRegex = new (@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public string Parse(string title, ICollection categories, bool stripCyrillicLetters = true) + { // https://www.fileformat.info/info/unicode/category/Pd/list.htm - title = Regex.Replace(title, "\\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase); + title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase); if (IsAnyTvCategory(categories)) { - // extract season and episodes - title = tvShowTitleRegex.Replace(title, "$1 - S$2E$3 - rus $4 $5"); + title = _tvTitleCommaRegex.Replace(title, " $1-$2"); + title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2"); + + // special case for multiple seasons + title = _tvTitleMultipleSeasonsRegex.Replace(title, "S$1"); + + title = _tvTitleUkrSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3"); + title = _tvTitleUkrSeasonEpisodeRegex.Replace(title, "S$1E$2"); + title = _tvTitleUkrSeasonRegex.Replace(title, "S$1"); + title = _tvTitleUkrEpisodeOfRegex.Replace(title, "E$1 of $2"); + title = _tvTitleUkrEpisodeRegex.Replace(title, "E$1"); + + title = _tvTitleEngSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3"); + title = _tvTitleEngSeasonEpisodeRegex.Replace(title, "S$1E$2"); + title = _tvTitleEngSeasonRegex.Replace(title, "S$1"); + title = _tvTitleEngEpisodeOfRegex.Replace(title, "E$1 of $2"); + title = _tvTitleEngEpisodeRegex.Replace(title, "E$1"); } - else if (stripCyrillicLetters) + + if (stripCyrillicLetters) { - title = stripCyrillicRegex.Replace(title, string.Empty); + title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-'); } title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase); @@ -449,10 +481,56 @@ namespace NzbDrone.Core.Indexers.Definitions title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); - return title.Trim(' ', '.', '-', '_', '|', '/', '\''); + title = MoveFirstTagsToEndOfReleaseTitle(title); + + title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled); + title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled); + + title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled); + + title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':'); + + // replace multiple spaces with a single space + title = Regex.Replace(title, @"\s+", " "); + + return title.Trim(); } - public Action, DateTime?> CookiesUpdater { get; set; } + private static bool IsAnyTvCategory(ICollection category) + { + return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCategory => category.Contains(subCategory)); + } + + private static string MoveFirstTagsToEndOfReleaseTitle(string input) + { + var output = input; + foreach (var findTagsRegex in FindTagsInTitlesRegexList) + { + var expectedIndex = 0; + foreach (Match match in findTagsRegex.Matches(output)) + { + if (match.Index > expectedIndex) + { + var substring = output.Substring(expectedIndex, match.Index - expectedIndex); + if (string.IsNullOrWhiteSpace(substring)) + { + expectedIndex = match.Index; + } + else + { + break; + } + } + + var tag = match.ToString(); + var regex = new Regex(Regex.Escape(tag)); + output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim(); + expectedIndex += tag.Length; + } + } + + return output.Trim(); + } } public class TolokaSettings : UserPassTorrentBaseSettings