Fixed: (pornolab) Improvements generator/parsing

pull/1386/head
Bogdan 2 years ago
parent 6b62504916
commit ec389987df

@ -1,11 +1,11 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using System.Net.Http; using System.Net.Http;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using AngleSharp.Html.Parser; using AngleSharp.Html.Parser;
using FluentValidation;
using NLog; using NLog;
using NzbDrone.Common.Extensions; using NzbDrone.Common.Extensions;
using NzbDrone.Common.Http; using NzbDrone.Common.Http;
@ -17,14 +17,13 @@ using NzbDrone.Core.IndexerSearch.Definitions;
using NzbDrone.Core.Messaging.Events; using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Parser; using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.Validation;
namespace NzbDrone.Core.Indexers.Definitions namespace NzbDrone.Core.Indexers.Definitions
{ {
public class PornoLab : TorrentIndexerBase<PornoLabSettings> public class PornoLab : TorrentIndexerBase<PornoLabSettings>
{ {
public override string Name => "PornoLab"; public override string Name => "PornoLab";
public override string[] IndexerUrls => new string[] { "https://pornolab.net/" }; public override string[] IndexerUrls => new[] { "https://pornolab.net/" };
private string LoginUrl => Settings.BaseUrl + "forum/login.php"; private string LoginUrl => Settings.BaseUrl + "forum/login.php";
public override string Description => "PornoLab is a Semi-Private Russian site for Adult content"; public override string Description => "PornoLab is a Semi-Private Russian site for Adult content";
public override string Language => "ru-RU"; public override string Language => "ru-RU";
@ -40,7 +39,7 @@ namespace NzbDrone.Core.Indexers.Definitions
public override IIndexerRequestGenerator GetRequestGenerator() public override IIndexerRequestGenerator GetRequestGenerator()
{ {
return new PornoLabRequestGenerator() { Settings = Settings, Capabilities = Capabilities }; return new PornoLabRequestGenerator(Settings, Capabilities);
} }
public override IParseIndexerResponse GetParser() public override IParseIndexerResponse GetParser()
@ -68,38 +67,26 @@ namespace NzbDrone.Core.Indexers.Definitions
if (CheckIfLoginNeeded(response)) if (CheckIfLoginNeeded(response))
{ {
var errorMessage = "Unknown error message, please report";
var loginResultParser = new HtmlParser(); var loginResultParser = new HtmlParser();
var loginResultDocument = loginResultParser.ParseDocument(response.Content); var loginResultDocument = loginResultParser.ParseDocument(response.Content);
var errormsg = loginResultDocument.QuerySelector("h4[class=\"warnColor1 tCenter mrg_16\"]"); var errorMessage = loginResultDocument.QuerySelector("h4[class=\"warnColor1 tCenter mrg_16\"]")?.TextContent;
if (errormsg != null)
{
errorMessage = errormsg.TextContent;
}
throw new IndexerAuthException(errorMessage); throw new IndexerAuthException(errorMessage ?? "Unknown error message, please report");
} }
UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30)); UpdateCookies(response.GetCookies(), DateTime.Now + TimeSpan.FromDays(30));
_logger.Debug("PornoLab authentication succeeded"); _logger.Debug("Authentication succeeded");
} }
protected override bool CheckIfLoginNeeded(HttpResponse httpResponse) protected override bool CheckIfLoginNeeded(HttpResponse httpResponse)
{ {
if (!httpResponse.Content.Contains("Вы зашли как:")) return !httpResponse.Content.Contains("Вы зашли как:");
{
return true;
}
return false;
} }
private IndexerCapabilities SetCapabilities() private IndexerCapabilities SetCapabilities()
{ {
var caps = new IndexerCapabilities var caps = new IndexerCapabilities();
{
};
caps.Categories.AddCategoryMapping(1768, NewznabStandardCategory.XXX, "Эротические фильмы / Erotic Movies"); caps.Categories.AddCategoryMapping(1768, NewznabStandardCategory.XXX, "Эротические фильмы / Erotic Movies");
caps.Categories.AddCategoryMapping(60, NewznabStandardCategory.XXX, "Документальные фильмы / Documentary & Reality"); caps.Categories.AddCategoryMapping(60, NewznabStandardCategory.XXX, "Документальные фильмы / Documentary & Reality");
@ -246,45 +233,37 @@ namespace NzbDrone.Core.Indexers.Definitions
public class PornoLabRequestGenerator : IIndexerRequestGenerator public class PornoLabRequestGenerator : IIndexerRequestGenerator
{ {
public PornoLabSettings Settings { get; set; } private readonly PornoLabSettings _settings;
public IndexerCapabilities Capabilities { get; set; } private readonly IndexerCapabilities _capabilities;
public PornoLabRequestGenerator() public PornoLabRequestGenerator(PornoLabSettings settings, IndexerCapabilities capabilities)
{ {
_settings = settings;
_capabilities = capabilities;
} }
private IEnumerable<IndexerRequest> GetPagedRequests(string term, int[] categories) private IEnumerable<IndexerRequest> GetPagedRequests(string term, int[] categories)
{ {
var searchUrl = string.Format("{0}/forum/tracker.php", Settings.BaseUrl.TrimEnd('/')); var parameters = new List<KeyValuePair<string, string>>
var searchString = term;
// NameValueCollection don't support cat[]=19&cat[]=6
var qc = new List<KeyValuePair<string, string>>
{ {
{ "o", "1" }, { "o", "1" },
{ "s", "2" } { "s", "2" },
{ "nm", term.IsNotNullOrWhiteSpace() ? term.Replace("-", " ") : "" }
}; };
// if the search string is empty use the getnew view var queryCats = _capabilities.Categories.MapTorznabCapsToTrackers(categories);
if (string.IsNullOrWhiteSpace(searchString)) if (queryCats.Any())
{
qc.Add("nm", searchString);
}
else
{ {
// use the normal search queryCats.ForEach(cat => parameters.Add("f[]", $"{cat}"));
searchString = searchString.Replace("-", " ");
qc.Add("nm", searchString);
} }
foreach (var cat in Capabilities.Categories.MapTorznabCapsToTrackers(categories)) var searchUrl = $"{_settings.BaseUrl.TrimEnd('/')}/forum/tracker.php";
if (parameters.Count > 0)
{ {
qc.Add("f[]", cat); searchUrl += $"?{parameters.GetQueryString()}";
} }
searchUrl = searchUrl + "?" + qc.GetQueryString();
var request = new IndexerRequest(searchUrl, HttpAccept.Html); var request = new IndexerRequest(searchUrl, HttpAccept.Html);
yield return request; yield return request;
@ -294,7 +273,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
var pageableRequests = new IndexerPageableRequestChain(); var pageableRequests = new IndexerPageableRequestChain();
pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); pageableRequests.Add(GetPagedRequests($"{searchCriteria.SanitizedSearchTerm}", searchCriteria.Categories));
return pageableRequests; return pageableRequests;
} }
@ -303,7 +282,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
var pageableRequests = new IndexerPageableRequestChain(); var pageableRequests = new IndexerPageableRequestChain();
pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); pageableRequests.Add(GetPagedRequests($"{searchCriteria.SanitizedSearchTerm}", searchCriteria.Categories));
return pageableRequests; return pageableRequests;
} }
@ -312,7 +291,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
var pageableRequests = new IndexerPageableRequestChain(); var pageableRequests = new IndexerPageableRequestChain();
pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedTvSearchString), searchCriteria.Categories)); pageableRequests.Add(GetPagedRequests($"{searchCriteria.SanitizedTvSearchString}", searchCriteria.Categories));
return pageableRequests; return pageableRequests;
} }
@ -321,7 +300,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
var pageableRequests = new IndexerPageableRequestChain(); var pageableRequests = new IndexerPageableRequestChain();
pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); pageableRequests.Add(GetPagedRequests($"{searchCriteria.SanitizedSearchTerm}", searchCriteria.Categories));
return pageableRequests; return pageableRequests;
} }
@ -330,7 +309,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
var pageableRequests = new IndexerPageableRequestChain(); var pageableRequests = new IndexerPageableRequestChain();
pageableRequests.Add(GetPagedRequests(string.Format("{0}", searchCriteria.SanitizedSearchTerm), searchCriteria.Categories)); pageableRequests.Add(GetPagedRequests($"{searchCriteria.SanitizedSearchTerm}", searchCriteria.Categories));
return pageableRequests; return pageableRequests;
} }
@ -344,7 +323,7 @@ namespace NzbDrone.Core.Indexers.Definitions
private readonly PornoLabSettings _settings; private readonly PornoLabSettings _settings;
private readonly IndexerCapabilitiesCategories _categories; private readonly IndexerCapabilitiesCategories _categories;
private readonly Logger _logger; private readonly Logger _logger;
private static readonly Regex StripRussianRegex = new Regex(@"(\([А-Яа-яЁё\W]+\))|(^[А-Яа-яЁё\W\d]+\/ )|([а-яА-ЯЁё \-]+,+)|([а-яА-ЯЁё]+)"); private static readonly Regex StripRussianRegex = new (@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)");
public PornoLabParser(PornoLabSettings settings, IndexerCapabilitiesCategories categories, Logger logger) public PornoLabParser(PornoLabSettings settings, IndexerCapabilitiesCategories categories, Logger logger)
{ {
@ -355,13 +334,12 @@ namespace NzbDrone.Core.Indexers.Definitions
public IList<ReleaseInfo> ParseResponse(IndexerResponse indexerResponse) public IList<ReleaseInfo> ParseResponse(IndexerResponse indexerResponse)
{ {
var torrentInfos = new List<ReleaseInfo>(); var releaseInfos = new List<ReleaseInfo>();
var rowsSelector = "table#tor-tbl > tbody > tr";
var searchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
var searchResultDocument = searchResultParser.ParseDocument(indexerResponse.Content); var searchResultDocument = searchResultParser.ParseDocument(indexerResponse.Content);
var rows = searchResultDocument.QuerySelectorAll(rowsSelector);
var rows = searchResultDocument.QuerySelectorAll("table#tor-tbl > tbody > tr");
foreach (var row in rows) foreach (var row in rows)
{ {
try try
@ -377,48 +355,47 @@ namespace NzbDrone.Core.Indexers.Definitions
var qForumLink = row.QuerySelector("a.f"); var qForumLink = row.QuerySelector("a.f");
var qDetailsLink = row.QuerySelector("a.tLink"); var qDetailsLink = row.QuerySelector("a.tLink");
var qSize = row.QuerySelector("td:nth-child(6) u"); var qSize = row.QuerySelector("td:nth-child(6) u");
var link = new Uri(_settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href")); var infoUrl = _settings.BaseUrl + "forum/" + qDetailsLink.GetAttribute("href");
var seederString = row.QuerySelector("td:nth-child(7) b").TextContent; var seederString = row.QuerySelector("td:nth-child(7) b").TextContent;
var seeders = string.IsNullOrWhiteSpace(seederString) ? 0 : ParseUtil.CoerceInt(seederString); var seeders = string.IsNullOrWhiteSpace(seederString) ? 0 : ParseUtil.CoerceInt(seederString);
var timestr = row.QuerySelector("td:nth-child(11) u").TextContent; var forumid = ParseUtil.GetArgumentFromQueryString(qForumLink?.GetAttribute("href"), "f");
var forum = qForumLink;
var forumid = forum.GetAttribute("href").Split('=')[1];
var title = _settings.StripRussianLetters var title = _settings.StripRussianLetters
? StripRussianRegex.Replace(qDetailsLink.TextContent, "") ? StripRussianRegex.Replace(qDetailsLink.TextContent, string.Empty)
: qDetailsLink.TextContent; : qDetailsLink.TextContent;
var size = ParseUtil.GetBytes(qSize.TextContent); var size = ParseUtil.GetBytes(qSize.TextContent);
var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(8)").TextContent); var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(8)").TextContent);
var grabs = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(9)").TextContent); var grabs = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(9)").TextContent);
var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(timestr)); var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(row.QuerySelector("td:nth-child(11) u").TextContent));
var release = new TorrentInfo var release = new TorrentInfo
{ {
MinimumRatio = 1, Guid = infoUrl,
MinimumSeedTime = 0, DownloadUrl = infoUrl,
InfoUrl = infoUrl,
Title = title, Title = title,
InfoUrl = link.AbsoluteUri,
Description = qForumLink.TextContent, Description = qForumLink.TextContent,
DownloadUrl = link.AbsoluteUri, Categories = _categories.MapTrackerCatToNewznab(forumid),
Guid = link.AbsoluteUri,
Size = size, Size = size,
Grabs = grabs,
Seeders = seeders, Seeders = seeders,
Peers = leechers + seeders, Peers = leechers + seeders,
Grabs = grabs,
PublishDate = publishDate, PublishDate = publishDate,
Categories = _categories.MapTrackerCatToNewznab(forumid),
DownloadVolumeFactor = 1, DownloadVolumeFactor = 1,
UploadVolumeFactor = 1 UploadVolumeFactor = 1,
MinimumRatio = 1,
MinimumSeedTime = 0,
}; };
torrentInfos.Add(release); releaseInfos.Add(release);
} }
catch (Exception ex) catch (Exception ex)
{ {
_logger.Error(string.Format("Pornolab: Error while parsing row '{0}':\n\n{1}", row.OuterHtml, ex)); _logger.Error($"Pornolab: Error while parsing row '{row.OuterHtml}':\n\n{ex}");
} }
} }
return torrentInfos.ToArray(); return releaseInfos.ToArray();
} }
public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; } public Action<IDictionary<string, string>, DateTime?> CookiesUpdater { get; set; }
@ -428,6 +405,7 @@ namespace NzbDrone.Core.Indexers.Definitions
{ {
public PornoLabSettings() public PornoLabSettings()
{ {
StripRussianLetters = false;
} }
[FieldDefinition(4, Label = "Strip Russian Letters", HelpLink = "Strip Cyrillic letters from release names", Type = FieldType.Checkbox)] [FieldDefinition(4, Label = "Strip Russian Letters", HelpLink = "Strip Cyrillic letters from release names", Type = FieldType.Checkbox)]

Loading…
Cancel
Save