Fixed: (RuTracker/Toloka) Clean title

pull/1419/head
Bogdan 2 years ago
parent f99a2e1164
commit 8b898733ab

@ -2,7 +2,6 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.Specialized; using System.Collections.Specialized;
using System.Linq; using System.Linq;
using System.Net.Http;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
@ -25,10 +24,13 @@ namespace NzbDrone.Core.Indexers.Definitions
public class RuTracker : TorrentIndexerBase<RuTrackerSettings> public class RuTracker : TorrentIndexerBase<RuTrackerSettings>
{ {
public override string Name => "RuTracker"; public override string Name => "RuTracker";
public override string[] IndexerUrls => new[] { "https://rutracker.org/", "https://rutracker.net/" }; public override string[] IndexerUrls => new[]
private string LoginUrl => Settings.BaseUrl + "forum/login.php"; {
"https://rutracker.org/",
"https://rutracker.net/"
};
public override string Description => "RuTracker is a Semi-Private Russian torrent site with a thriving file-sharing community"; public override string Description => "RuTracker is a Semi-Private Russian torrent site with a thriving file-sharing community";
public override string Language => "ru-org"; public override string Language => "ru-RU";
public override Encoding Encoding => Encoding.GetEncoding("windows-1251"); public override Encoding Encoding => Encoding.GetEncoding("windows-1251");
public override DownloadProtocol Protocol => DownloadProtocol.Torrent; public override DownloadProtocol Protocol => DownloadProtocol.Torrent;
public override IndexerPrivacy Privacy => IndexerPrivacy.SemiPrivate; public override IndexerPrivacy Privacy => IndexerPrivacy.SemiPrivate;
@ -51,21 +53,24 @@ namespace NzbDrone.Core.Indexers.Definitions
protected override async Task DoLogin() protected override async Task DoLogin()
{ {
var requestBuilder = new HttpRequestBuilder(LoginUrl) var loginUrl = $"{Settings.BaseUrl}forum/login.php";
var requestBuilder = new HttpRequestBuilder(loginUrl)
{ {
LogResponseContent = true, LogResponseContent = true,
AllowAutoRedirect = true, AllowAutoRedirect = true
Method = HttpMethod.Post
}; };
var cookies = Cookies; var cookies = Cookies;
Cookies = null; Cookies = null;
var authLoginRequest = requestBuilder var authLoginRequest = requestBuilder.Post()
.AddFormParameter("login_username", Settings.Username) .AddFormParameter("login_username", Settings.Username)
.AddFormParameter("login_password", Settings.Password) .AddFormParameter("login_password", Settings.Password)
.AddFormParameter("login", "Login") .AddFormParameter("login", "Login")
.AddFormParameter("redirect", "index.php")
.SetHeader("Content-Type", "application/x-www-form-urlencoded") .SetHeader("Content-Type", "application/x-www-form-urlencoded")
.SetHeader("Referer", loginUrl)
.Build(); .Build();
var response = await ExecuteAuth(authLoginRequest); var response = await ExecuteAuth(authLoginRequest);
@ -1416,21 +1421,6 @@ namespace NzbDrone.Core.Indexers.Definitions
return caps; return caps;
} }
public override object RequestAction(string action, IDictionary<string, string> query)
{
if (action == "getUrls")
{
var links = IndexerUrls;
return new
{
options = links.Select(d => new { Value = d, Name = d })
};
}
return null;
}
} }
public class RuTrackerRequestGenerator : IIndexerRequestGenerator public class RuTrackerRequestGenerator : IIndexerRequestGenerator
@ -1446,16 +1436,14 @@ namespace NzbDrone.Core.Indexers.Definitions
private IEnumerable<IndexerRequest> GetPagedRequests(string term, int[] categories, int season = 0) private IEnumerable<IndexerRequest> GetPagedRequests(string term, int[] categories, int season = 0)
{ {
var searchUrl = $"{_settings.BaseUrl.TrimEnd('/')}/forum/tracker.php"; var parameters = new NameValueCollection();
var queryCollection = new NameValueCollection();
var searchString = term; var searchString = term;
// if the search string is empty use the getnew view // if the search string is empty use the getnew view
if (searchString.IsNullOrWhiteSpace()) if (searchString.IsNullOrWhiteSpace())
{ {
queryCollection.Add("nm", searchString); parameters.Set("nm", searchString);
} }
else else
{ {
@ -1466,19 +1454,28 @@ namespace NzbDrone.Core.Indexers.Definitions
searchString += " Сезон: " + season; searchString += " Сезон: " + season;
} }
queryCollection.Add("nm", searchString); parameters.Set("nm", searchString);
} }
if (categories != null && categories.Length > 0) if (categories != null && categories.Length > 0)
{ {
queryCollection.Add("f", string.Join(",", _capabilities.Categories.MapTorznabCapsToTrackers(categories))); parameters.Set("f", string.Join(",", _capabilities.Categories.MapTorznabCapsToTrackers(categories)));
} }
searchUrl = searchUrl + "?" + queryCollection.GetQueryString(); var searchUrl = $"{_settings.BaseUrl}forum/tracker.php";
var request = new IndexerRequest(searchUrl, HttpAccept.Html); if (parameters.Count > 0)
{
searchUrl += $"?{parameters.GetQueryString()}";
}
request.HttpRequest.AllowAutoRedirect = false; var request = new IndexerRequest(searchUrl, HttpAccept.Html)
{
HttpRequest =
{
AllowAutoRedirect = false
}
};
yield return request; yield return request;
} }
@ -1542,6 +1539,8 @@ namespace NzbDrone.Core.Indexers.Definitions
private readonly RuTrackerSettings _settings; private readonly RuTrackerSettings _settings;
private readonly IndexerCapabilitiesCategories _categories; private readonly IndexerCapabilitiesCategories _categories;
private readonly RuTrackerTitleParser _titleParser = new ();
public RuTrackerParser(RuTrackerSettings settings, IndexerCapabilitiesCategories categories) public RuTrackerParser(RuTrackerSettings settings, IndexerCapabilitiesCategories categories)
{ {
_settings = settings; _settings = settings;
@ -1578,12 +1577,12 @@ namespace NzbDrone.Core.Indexers.Definitions
return null; return null;
} }
var link = _settings.BaseUrl + "forum/" + qDownloadLink.GetAttribute("href");
var qDetailsLink = row.QuerySelector("td.t-title-col > div.t-title > a.tLink"); var qDetailsLink = row.QuerySelector("td.t-title-col > div.t-title > a.tLink");
var details = _settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href"); var infoUrl = _settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href");
var downloadUrl = _settings.BaseUrl + "forum/" + qDownloadLink.GetAttribute("href");
var category = GetCategoryOfRelease(row); var title = qDetailsLink.TextContent.Trim();
var categories = GetCategoryOfRelease(row);
var size = GetSizeOfRelease(row); var size = GetSizeOfRelease(row);
@ -1596,139 +1595,224 @@ namespace NzbDrone.Core.Indexers.Definitions
var release = new TorrentInfo var release = new TorrentInfo
{ {
MinimumRatio = 1, Guid = infoUrl,
MinimumSeedTime = 0, InfoUrl = infoUrl,
Title = qDetailsLink.TextContent, DownloadUrl = downloadUrl,
InfoUrl = details, Title = _titleParser.Parse(title, categories, _settings.RussianLetters, _settings.MoveFirstTagsToEndOfReleaseTitle, _settings.MoveAllTagsToEndOfReleaseTitle),
DownloadUrl = link, Description = title,
Guid = details, Categories = categories,
Size = size, Size = size,
Seeders = seeders, Seeders = seeders,
Peers = leechers + seeders, Peers = leechers + seeders,
Grabs = grabs, Grabs = grabs,
PublishDate = publishDate, PublishDate = publishDate,
Categories = category,
DownloadVolumeFactor = 1, DownloadVolumeFactor = 1,
UploadVolumeFactor = 1 UploadVolumeFactor = 1,
MinimumRatio = 1,
MinimumSeedTime = 0
}; };
// TODO finish extracting release variables to simplify release initialization return release;
if (IsAnyTvCategory(release.Categories)) }
private int GetSeedersOfRelease(in IElement row)
{
var seeders = 0;
var qSeeders = row.QuerySelector("td:nth-child(7)");
if (qSeeders != null && !qSeeders.TextContent.Contains("дн"))
{ {
// extract season and episodes var seedersString = qSeeders.QuerySelector("b").TextContent;
// should also handle multi-season releases listed as Сезон: 1-8 and Сезоны: 1-8 if (!string.IsNullOrWhiteSpace(seedersString))
var regex = new Regex(@".+\/\s([^а-яА-я\/]+)\s\/.+Сезон.\s*[:]*\s+(\d*\-?\d*).+(?:Серии|Эпизод)+\s*[:]*\s+(\d+-?\d*).+(\[.*\])[\s]?(.*)"); {
seeders = ParseUtil.CoerceInt(seedersString);
var title = regex.Replace(release.Title, "$1 - S$2E$3 - rus $4 $5"); }
title = Regex.Replace(title, "-Rip", "Rip", RegexOptions.IgnoreCase);
title = Regex.Replace(title, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase);
title = Regex.Replace(title, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase);
title = Regex.Replace(title, "HDTVRip", "HDTV", RegexOptions.IgnoreCase);
title = Regex.Replace(title, "Кураж-Бамбей", "kurazh", RegexOptions.IgnoreCase);
release.Title = title;
} }
else if (IsAnyMovieCategory(release.Categories))
return seeders;
}
private ICollection<IndexerCategory> GetCategoryOfRelease(in IElement row)
{
var forum = row.QuerySelector("td.f-name-col > div.f-name > a")?.GetAttribute("href");
var cat = ParseUtil.GetArgumentFromQueryString(forum, "f");
return _categories.MapTrackerCatToNewznab(cat);
}
private long GetSizeOfRelease(in IElement row)
{
return ParseUtil.GetBytes(row.QuerySelector("td.tor-size").GetAttribute("data-ts_text"));
}
private DateTime GetPublishDateOfRelease(in IElement row)
{
return DateTimeUtil.UnixTimestampToDateTime(long.Parse(row.QuerySelector("td:nth-child(10)").GetAttribute("data-ts_text")));
}
public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; }
}
public class RuTrackerTitleParser
{
private static readonly List<Regex> FindTagsInTitlesRegexList = new ()
{
new Regex(@"\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"),
new Regex(@"\[(?>\[(?<c>)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]")
};
private readonly Regex _stripCyrillicRegex = new (@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleCommaRegex = new (@"\s(\d+),(\d+)", RegexOptions.Compiled);
private readonly Regex _tvTitleCyrillicXRegex = new (@"([\s-])Х+([\s\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleRusSeasonEpisodeOfRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleRusSeasonEpisodeRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleRusSeasonRegex = new (@"Сезон\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleRusEpisodeOfRegex = new (@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleRusEpisodeRegex = new (@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public string Parse(string title, ICollection<IndexerCategory> categories, bool stripCyrillicLetters = true, bool moveFirstTagsToEndOfReleaseTitle = false, bool moveAllTagsToEndOfReleaseTitle = false)
{
// https://www.fileformat.info/info/unicode/category/Pd/list.htm
title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// replace double 4K quality in title
title = Regex.Replace(title, @"\b(2160p), 4K\b", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase);
if (IsAnyTvCategory(categories))
{
title = _tvTitleCommaRegex.Replace(title, " $1-$2");
title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2");
title = _tvTitleRusSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
title = _tvTitleRusSeasonEpisodeRegex.Replace(title, "S$1E$2");
title = _tvTitleRusSeasonRegex.Replace(title, "S$1");
title = _tvTitleRusEpisodeOfRegex.Replace(title, "E$1 of $2");
title = _tvTitleRusEpisodeRegex.Replace(title, "E$1");
}
else if (IsAnyMovieCategory(categories))
{ {
// Bluray quality fix: radarr parse Blu-ray Disc as Bluray-1080p but should be BR-DISK // Bluray quality fix: radarr parse Blu-ray Disc as Bluray-1080p but should be BR-DISK
release.Title = Regex.Replace(release.Title, "Blu-ray Disc", "BR-DISK", RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\bBlu-ray Disc\b", "BR-DISK", RegexOptions.Compiled | RegexOptions.IgnoreCase);
} }
if (IsAnyTvCategory(release.Categories) | IsAnyMovieCategory(release.Categories)) if (IsAnyTvCategory(categories) | IsAnyMovieCategory(categories))
{ {
// remove director's name from title // remove director's name from title
// rutracker movies titles look like: russian name / english name (russian director / english director) other stuff // rutracker movies titles look like: russian name / english name (russian director / english director) other stuff
// Ирландец / The Irishman (Мартин Скорсезе / Martin Scorsese) [2019, США, криминал, драма, биография, WEB-DL 1080p] Dub (Пифагор) + MVO (Jaskier) + AVO (Юрий Сербин) + Sub Rus, Eng + Original Eng // Ирландец / The Irishman (Мартин Скорсезе / Martin Scorsese) [2019, США, криминал, драма, биография, WEB-DL 1080p] Dub (Пифагор) + MVO (Jaskier) + AVO (Юрий Сербин) + Sub Rus, Eng + Original Eng
// this part should be removed: (Мартин Скорсезе / Martin Scorsese) // this part should be removed: (Мартин Скорсезе / Martin Scorsese)
//var director = new Regex(@"(\([А-Яа-яЁё\W]+)\s/\s(.+?)\)"); title = Regex.Replace(title, @"(\([\p{IsCyrillic}\W]+)\s/\s(.+?)\)", string.Empty, RegexOptions.Compiled | RegexOptions.IgnoreCase);
var director = new Regex(@"(\([А-Яа-яЁё\W].+?\))");
release.Title = director.Replace(release.Title, "");
// Remove VO, MVO and DVO from titles // Remove VO, MVO and DVO from titles
var vo = new Regex(@".VO\s\(.+?\)"); var vo = new Regex(@".VO\s\(.+?\)");
release.Title = vo.Replace(release.Title, ""); title = vo.Replace(title, string.Empty);
// Remove R5 and (R5) from release names // Remove R5 and (R5) from release names
var r5 = new Regex(@"(.*)(.R5.)(.*)"); var r5 = new Regex(@"(.*)(.R5.)(.*)");
release.Title = r5.Replace(release.Title, "$1"); title = r5.Replace(title, "$1");
// Remove Sub languages from release names // Remove Sub languages from release names
var sub = new Regex(@"(Sub.*\+)|(Sub.*$)"); title = Regex.Replace(title, @"(\bSub\b.*$|\b[\+]*Sub[\+]*\b)", string.Empty);
release.Title = sub.Replace(release.Title, "");
// language fix: all rutracker releases contains russian track // language fix: all rutracker releases contains russian track
if (release.Title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0) if (title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0)
{ {
release.Title += " rus"; title += " rus";
}
// remove russian letters
if (_settings.RussianLetters == true)
{
//Strip russian letters
var rusRegex = new Regex(@"(\([А-Яа-яЁё\W]+\))|(^[А-Яа-яЁё\W\d]+\/ )|([а-яА-ЯЁё \-]+,+)|([а-яА-ЯЁё]+)");
release.Title = rusRegex.Replace(release.Title, "");
// Replace everything after first forward slash with a year (to avoid filtering away releases with an fwdslash after title+year, like: Title Year [stuff / stuff])
var fwdslashRegex = new Regex(@"(\/\s.+?\[)");
release.Title = fwdslashRegex.Replace(release.Title, "[");
} }
} }
return release; if (stripCyrillicLetters)
} {
title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-');
}
private int GetSeedersOfRelease(in IElement row) if (moveAllTagsToEndOfReleaseTitle)
{
var seeders = 0;
var qSeeders = row.QuerySelector("td:nth-child(7)");
if (qSeeders != null && !qSeeders.TextContent.Contains("дн"))
{ {
var seedersString = qSeeders.QuerySelector("b").TextContent; title = MoveAllTagsToEndOfReleaseTitle(title);
if (!string.IsNullOrWhiteSpace(seedersString)) }
{ else if (moveFirstTagsToEndOfReleaseTitle)
seeders = ParseUtil.CoerceInt(seedersString); {
} title = MoveFirstTagsToEndOfReleaseTitle(title);
} }
return seeders; title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase);
} title = Regex.Replace(title, @"\bHDTVRip\b", "HDTV", RegexOptions.Compiled | RegexOptions.IgnoreCase);
title = Regex.Replace(title, @"\bWEB-DLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
title = Regex.Replace(title, @"\bКураж-Бамбей\b", "kurazh", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private ICollection<IndexerCategory> GetCategoryOfRelease(in IElement row) title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled);
{ title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled);
var forum = row.QuerySelector("td.f-name-col > div.f-name > a");
var forumid = forum.GetAttribute("href").Split('=')[1]; title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled);
return _categories.MapTrackerCatToNewznab(forumid);
title = Regex.Replace(title, @"\s+\+(?:\s+\+)+\s+", " + ", RegexOptions.Compiled);
title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':');
// replace multiple spaces with a single space
title = Regex.Replace(title, @"\s+", " ");
return title.Trim();
} }
private long GetSizeOfRelease(in IElement row) private static bool IsAnyTvCategory(ICollection<IndexerCategory> category)
{ {
var qSize = row.QuerySelector("td.tor-size"); return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCat => category.Contains(subCat));
var size = ParseUtil.GetBytes(qSize.GetAttribute("data-ts_text"));
return size;
} }
private DateTime GetPublishDateOfRelease(in IElement row) private static bool IsAnyMovieCategory(ICollection<IndexerCategory> category)
{ {
var timestr = row.QuerySelector("td:nth-child(10)").GetAttribute("data-ts_text"); return category.Contains(NewznabStandardCategory.Movies) || NewznabStandardCategory.Movies.SubCategories.Any(subCat => category.Contains(subCat));
var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(timestr));
return publishDate;
} }
private bool IsAnyTvCategory(ICollection<IndexerCategory> category) private static string MoveAllTagsToEndOfReleaseTitle(string input)
{ {
return category.Contains(NewznabStandardCategory.TV) var output = input;
|| NewznabStandardCategory.TV.SubCategories.Any(subCat => category.Contains(subCat)); foreach (var findTagsRegex in FindTagsInTitlesRegexList)
{
foreach (Match match in findTagsRegex.Matches(input))
{
var tag = match.ToString();
output = $"{output.Replace(tag, "")} {tag}".Trim();
}
}
return output.Trim();
} }
private bool IsAnyMovieCategory(ICollection<IndexerCategory> category) private static string MoveFirstTagsToEndOfReleaseTitle(string input)
{ {
return category.Contains(NewznabStandardCategory.Movies) var output = input;
|| NewznabStandardCategory.Movies.SubCategories.Any(subCat => category.Contains(subCat)); foreach (var findTagsRegex in FindTagsInTitlesRegexList)
} {
var expectedIndex = 0;
foreach (Match match in findTagsRegex.Matches(output))
{
if (match.Index > expectedIndex)
{
var substring = output.Substring(expectedIndex, match.Index - expectedIndex);
if (string.IsNullOrWhiteSpace(substring))
{
expectedIndex = match.Index;
}
else
{
break;
}
}
var tag = match.ToString();
var regex = new Regex(Regex.Escape(tag));
output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim();
expectedIndex += tag.Length;
}
}
public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; } return output.Trim();
}
} }
public class RuTrackerSettings : UserPassTorrentBaseSettings public class RuTrackerSettings : UserPassTorrentBaseSettings
@ -1736,9 +1820,17 @@ namespace NzbDrone.Core.Indexers.Definitions
public RuTrackerSettings() public RuTrackerSettings()
{ {
RussianLetters = false; RussianLetters = false;
MoveFirstTagsToEndOfReleaseTitle = false;
MoveAllTagsToEndOfReleaseTitle = false;
} }
[FieldDefinition(4, Label = "Strip Russian letters", Type = FieldType.Checkbox, SelectOptionsProviderAction = "stripRussian", HelpText = "Removes russian letters")] [FieldDefinition(4, Label = "Strip Russian letters", Type = FieldType.Checkbox, HelpText = "Removes russian letters")]
public bool RussianLetters { get; set; } public bool RussianLetters { get; set; }
[FieldDefinition(5, Label = "Move first tags to end of release title", Type = FieldType.Checkbox)]
public bool MoveFirstTagsToEndOfReleaseTitle { get; set; }
[FieldDefinition(6, Label = "Move all tags to end of release title", Type = FieldType.Checkbox)]
public bool MoveAllTagsToEndOfReleaseTitle { get; set; }
} }
} }

@ -1,7 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net.Http;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
@ -52,16 +51,15 @@ namespace NzbDrone.Core.Indexers.Definitions
protected override async Task DoLogin() protected override async Task DoLogin()
{ {
var loginUrl = Settings.BaseUrl + "login.php"; var loginUrl = $"{Settings.BaseUrl}login.php";
var requestBuilder = new HttpRequestBuilder(loginUrl) var requestBuilder = new HttpRequestBuilder(loginUrl)
{ {
LogResponseContent = true, LogResponseContent = true,
AllowAutoRedirect = true, AllowAutoRedirect = true
Method = HttpMethod.Post
}; };
var authLoginRequest = requestBuilder var authLoginRequest = requestBuilder.Post()
.AddFormParameter("username", Settings.Username) .AddFormParameter("username", Settings.Username)
.AddFormParameter("password", Settings.Password) .AddFormParameter("password", Settings.Password)
.AddFormParameter("autologin", "on") .AddFormParameter("autologin", "on")
@ -76,8 +74,6 @@ namespace NzbDrone.Core.Indexers.Definitions
if (CheckIfLoginNeeded(response)) if (CheckIfLoginNeeded(response))
{ {
_logger.Debug(response.Content);
var parser = new HtmlParser(); var parser = new HtmlParser();
var dom = parser.ParseDocument(response.Content); var dom = parser.ParseDocument(response.Content);
var errorMessage = dom.QuerySelector("table.forumline table span.gen")?.FirstChild?.TextContent; var errorMessage = dom.QuerySelector("table.forumline table span.gen")?.FirstChild?.TextContent;
@ -328,16 +324,12 @@ namespace NzbDrone.Core.Indexers.Definitions
}; };
var queryCats = _capabilities.Categories.MapTorznabCapsToTrackers(categories); var queryCats = _capabilities.Categories.MapTorznabCapsToTrackers(categories);
if (queryCats.Any()) if (queryCats.Any())
{ {
foreach (var cat in queryCats) queryCats.ForEach(cat => parameters.Add("f[]", $"{cat}"));
{
parameters.Add("f[]", $"{cat}");
}
} }
var searchUrl = _settings.BaseUrl + "tracker.php"; var searchUrl = $"{_settings.BaseUrl}tracker.php";
if (parameters.Count > 0) if (parameters.Count > 0)
{ {
@ -358,6 +350,8 @@ namespace NzbDrone.Core.Indexers.Definitions
private readonly TolokaSettings _settings; private readonly TolokaSettings _settings;
private readonly IndexerCapabilitiesCategories _categories; private readonly IndexerCapabilitiesCategories _categories;
private readonly TolokaTitleParser _titleParser = new ();
public TolokaParser(TolokaSettings settings, IndexerCapabilitiesCategories categories) public TolokaParser(TolokaSettings settings, IndexerCapabilitiesCategories categories)
{ {
_settings = settings; _settings = settings;
@ -383,10 +377,9 @@ namespace NzbDrone.Core.Indexers.Definitions
} }
var infoUrl = _settings.BaseUrl + row.QuerySelector("td:nth-child(3) > a")?.GetAttribute("href"); var infoUrl = _settings.BaseUrl + row.QuerySelector("td:nth-child(3) > a")?.GetAttribute("href");
var title = row.QuerySelector("td:nth-child(3) > a")?.TextContent.Trim() ?? string.Empty;
var title = row.QuerySelector("td:nth-child(3) > a").TextContent.Trim(); var categoryLink = row.QuerySelector("td:nth-child(2) > a")?.GetAttribute("href") ?? string.Empty;
var categoryLink = row.QuerySelector("td:nth-child(2) > a").GetAttribute("href");
var cat = ParseUtil.GetArgumentFromQueryString(categoryLink, "f"); var cat = ParseUtil.GetArgumentFromQueryString(categoryLink, "f");
var categories = _categories.MapTrackerCatToNewznab(cat); var categories = _categories.MapTrackerCatToNewznab(cat);
@ -394,14 +387,15 @@ namespace NzbDrone.Core.Indexers.Definitions
var peers = seeders + ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(11) > b")?.TextContent.Trim()); var peers = seeders + ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(11) > b")?.TextContent.Trim());
// 2023-01-21 // 2023-01-21
var added = row.QuerySelector("td:nth-child(13)").TextContent.Trim(); var added = row.QuerySelector("td:nth-child(13)")?.TextContent.Trim() ?? string.Empty;
var release = new TorrentInfo var release = new TorrentInfo
{ {
Guid = infoUrl, Guid = infoUrl,
InfoUrl = infoUrl, InfoUrl = infoUrl,
DownloadUrl = _settings.BaseUrl + downloadUrl, DownloadUrl = _settings.BaseUrl + downloadUrl,
Title = CleanTitle(title, categories, _settings.StripCyrillicLetters), Title = _titleParser.Parse(title, categories, _settings.StripCyrillicLetters),
Description = title,
Categories = categories, Categories = categories,
Seeders = seeders, Seeders = seeders,
Peers = peers, Peers = peers,
@ -420,27 +414,65 @@ namespace NzbDrone.Core.Indexers.Definitions
return releaseInfos.ToArray(); return releaseInfos.ToArray();
} }
private static bool IsAnyTvCategory(ICollection<IndexerCategory> category) public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; }
{ }
return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCategory => category.Contains(subCategory));
}
private static string CleanTitle(string title, ICollection<IndexerCategory> categories, bool stripCyrillicLetters = true) public class TolokaTitleParser
{
private static readonly List<Regex> FindTagsInTitlesRegexList = new ()
{ {
var tvShowTitleRegex = new Regex(".+\\/\\s([^а-яА-я\\/]+)\\s\\/.+Сезон\\s*[:]*\\s+(\\d+).+(?:Серії|Епізод)+\\s*[:]*\\s+(\\d+-*\\d*).+,\\s+(.+)\\]\\s(.+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); new Regex(@"\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"),
var stripCyrillicRegex = new Regex(@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); new Regex(@"\[(?>\[(?<c>)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]")
};
private readonly Regex _tvTitleCommaRegex = new (@"\s(\d+),(\d+)", RegexOptions.Compiled);
private readonly Regex _tvTitleCyrillicXRegex = new (@"([\s-])Х+([\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleMultipleSeasonsRegex = new (@"(?:Сезон|Seasons?)\s*[:]*\s+(\d+-\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleUkrSeasonEpisodeOfRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleUkrSeasonEpisodeRegex = new (@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleUkrSeasonRegex = new (@"Сезон\s*[:]*\s+(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleUkrEpisodeOfRegex = new (@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleUkrEpisodeRegex = new (@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleEngSeasonEpisodeOfRegex = new (@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleEngSeasonEpisodeRegex = new (@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleEngSeasonRegex = new (@"Season\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleEngEpisodeOfRegex = new (@"(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _tvTitleEngEpisodeRegex = new (@"(?:Episodes?)+\s*[:]+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private readonly Regex _stripCyrillicRegex = new (@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public string Parse(string title, ICollection<IndexerCategory> categories, bool stripCyrillicLetters = true)
{
// https://www.fileformat.info/info/unicode/category/Pd/list.htm // https://www.fileformat.info/info/unicode/category/Pd/list.htm
title = Regex.Replace(title, "\\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase);
if (IsAnyTvCategory(categories)) if (IsAnyTvCategory(categories))
{ {
// extract season and episodes title = _tvTitleCommaRegex.Replace(title, " $1-$2");
title = tvShowTitleRegex.Replace(title, "$1 - S$2E$3 - rus $4 $5"); title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2");
// special case for multiple seasons
title = _tvTitleMultipleSeasonsRegex.Replace(title, "S$1");
title = _tvTitleUkrSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
title = _tvTitleUkrSeasonEpisodeRegex.Replace(title, "S$1E$2");
title = _tvTitleUkrSeasonRegex.Replace(title, "S$1");
title = _tvTitleUkrEpisodeOfRegex.Replace(title, "E$1 of $2");
title = _tvTitleUkrEpisodeRegex.Replace(title, "E$1");
title = _tvTitleEngSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
title = _tvTitleEngSeasonEpisodeRegex.Replace(title, "S$1E$2");
title = _tvTitleEngSeasonRegex.Replace(title, "S$1");
title = _tvTitleEngEpisodeOfRegex.Replace(title, "E$1 of $2");
title = _tvTitleEngEpisodeRegex.Replace(title, "E$1");
} }
else if (stripCyrillicLetters)
if (stripCyrillicLetters)
{ {
title = stripCyrillicRegex.Replace(title, string.Empty); title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-');
} }
title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase);
@ -449,10 +481,56 @@ namespace NzbDrone.Core.Indexers.Definitions
title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase); title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
return title.Trim(' ', '.', '-', '_', '|', '/', '\''); title = MoveFirstTagsToEndOfReleaseTitle(title);
title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled);
title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled);
title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled);
title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':');
// replace multiple spaces with a single space
title = Regex.Replace(title, @"\s+", " ");
return title.Trim();
} }
public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; } private static bool IsAnyTvCategory(ICollection<IndexerCategory> category)
{
return category.Contains(NewznabStandardCategory.TV) || NewznabStandardCategory.TV.SubCategories.Any(subCategory => category.Contains(subCategory));
}
private static string MoveFirstTagsToEndOfReleaseTitle(string input)
{
var output = input;
foreach (var findTagsRegex in FindTagsInTitlesRegexList)
{
var expectedIndex = 0;
foreach (Match match in findTagsRegex.Matches(output))
{
if (match.Index > expectedIndex)
{
var substring = output.Substring(expectedIndex, match.Index - expectedIndex);
if (string.IsNullOrWhiteSpace(substring))
{
expectedIndex = match.Index;
}
else
{
break;
}
}
var tag = match.ToString();
var regex = new Regex(Regex.Escape(tag));
output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim();
expectedIndex += tag.Length;
}
}
return output.Trim();
}
} }
public class TolokaSettings : UserPassTorrentBaseSettings public class TolokaSettings : UserPassTorrentBaseSettings

Loading…
Cancel
Save