You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Lidarr/src/NzbDrone.Core/Parser/QualityParser.cs

350 lines
16 KiB

using System;
using System.IO;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.Qualities;
namespace NzbDrone.Core.Parser
{
public class QualityParser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(QualityParser));
private static readonly Regex ProperRegex = new Regex(@"\b(?<proper>proper|repack|rerip)\b",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex VersionRegex = new Regex(@"\dv(?<version>\d)\b|\[v(?<version>\d)\]",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex RealRegex = new Regex(@"\b(?<real>REAL)\b",
RegexOptions.Compiled);
private static readonly Regex BitRateRegex = new Regex(@"\b(?:(?<B096>96[ ]?kbps|96|[\[\(].*96.*[\]\)])|
(?<B128>128[ ]?kbps|128|[\[\(].*128.*[\]\)])|
(?<B160>160[ ]?kbps|160|[\[\(].*160.*[\]\)]|q5)|
(?<B192>192[ ]?kbps|192|[\[\(].*192.*[\]\)]|q6)|
(?<B224>224[ ]?kbps|224|[\[\(].*224.*[\]\)]|q7)|
(?<B256>256[ ]?kbps|256|itunes\splus|[\[\(].*256.*[\]\)]|q8)|
(?<B320>320[ ]?kbps|320|[\[\(].*320.*[\]\)]|q9)|
(?<B500>500[ ]?kbps|500|[\[\(].*500.*[\]\)]|q10)|
(?<VBRV0>V0[ ]?kbps|V0|[\[\(].*V0.*[\]\)])|
(?<VBRV2>V2[ ]?kbps|V2|[\[\(].*V2.*[\]\)]))\b",
RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
private static readonly Regex SampleSizeRegex = new Regex(@"\b(?:(?<S24>24[ ]bit|24bit|[\[\(].*24bit.*[\]\)]))");
private static readonly Regex CodecRegex = new Regex(@"\b(?:(?<MP1>MPEG Version \d(.5)? Audio, Layer 1|MP1)|(?<MP2>MPEG Version \d(.5)? Audio, Layer 2|MP2)|(?<MP3VBR>MP3.*VBR|MPEG Version \d(.5)? Audio, Layer 3 vbr)|(?<MP3CBR>MP3|MPEG Version \d(.5)? Audio, Layer 3)|(?<FLAC>flac)|(?<WAVPACK>wavpack|wv)|(?<ALAC>alac)|(?<WMA>WMA\d?)|(?<WAV>WAV|PCM)|(?<AAC>M4A|M4P|M4B|AAC|mp4a|MPEG-4 Audio(?!.*alac))|(?<OGG>OGG|OGA|Vorbis))\b|(?<APE>monkey's audio|[\[|\(].*\bape\b.*[\]|\)])|(?<OPUS>Opus Version \d(.5)? Audio|[\[|\(].*\bopus\b.*[\]|\)])",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static QualityModel ParseQuality(string name, string desc, int fileBitrate, int fileSampleSize = 0)
{
Logger.Debug("Trying to parse quality for {0}", name);
var normalizedName = name.Replace('_', ' ').Trim().ToLower();
var result = ParseQualityModifiers(name, normalizedName);
if (desc.IsNotNullOrWhiteSpace())
{
var descCodec = ParseCodec(desc, "");
Logger.Trace($"Got codec {descCodec}");
result.Quality = FindQuality(descCodec, fileBitrate, fileSampleSize);
Whole album matching and fingerprinting (#592) * Cache result of GetAllArtists * Fixed: Manual import not respecting album import notifications * Fixed: partial album imports stay in queue, prompting manual import * Fixed: Allow release if tracks are missing * Fixed: Be tolerant of missing/extra "The" at start of artist name * Improve manual import UI * Omit video tracks from DB entirely * Revert "faster test packaging in build.sh" This reverts commit 2723e2a7b86bcbff9051fd2aced07dd807b4bcb7. -u and -T are not supported on macOS * Fix tests on linux and macOS * Actually lint on linux On linux yarn runs scripts with sh not bash so ** doesn't recursively glob * Match whole albums * Option to disable fingerprinting * Rip out MediaInfo * Don't split up things that have the same album selected in manual import * Try to speed up IndentificationService * More speedups * Some fixes and increase power of recording id * Fix NRE when no tags * Fix NRE when some (but not all) files in a directory have missing tags * Bump taglib, tidy up tag parsing * Add a health check * Remove media info setting * Tags -> audioTags * Add some tests where tags are null * Rename history events * Add missing method to interface * Reinstate MediaInfo tags and update info with artist scan Also adds migration to remove old format media info * This file no longer exists * Don't penalise year if missing from tags * Formatting improvements * Use correct system newline * Switch to the netstandard2.0 library to support net 461 * TagLib.File is IDisposable so should be in a using * Improve filename matching and add tests * Neater logging of parsed tags * Fix disk scan tests for new media info update * Fix quality detection source * Fix Inexact Artist/Album match * Add button to clear track mapping * Fix warning * Pacify eslint * Use \ not / * Fix UI updates * Fix media covers Prevent localizing URL propaging back to the metadata object * Reduce database overhead broadcasting UI updates * Relax timings a bit to make test pass * Remove irrelevant tests * Test framework for identification service * Fix PreferMissingToBadMatch test case * Make fingerprinting more robust * More logging * Penalize unknown media format and country * Prefer USA to UK * Allow Data CD * Fix exception if fingerprinting fails for all files * Fix tests * Fix NRE * Allow apostrophes and remove accents in filename aggregation * Address codacy issues * Cope with old versions of fpcalc and suggest upgrade * fpcalc health check passes if fingerprinting disabled * Get the Artist meta with the artist * Fix the mapper so that lazy loaded lists will be populated on Join And therefore we can join TrackFiles on Tracks by default and avoid an extra query * Rename subtitle -> lyric * Tidy up MediaInfoFormatter
5 years ago
if (result.Quality != Quality.Unknown)
{
result.QualityDetectionSource = QualityDetectionSource.TagLib;
return result;
}
}
var codec = ParseCodec(normalizedName,name);
var bitrate = ParseBitRate(normalizedName);
var sampleSize = ParseSampleSize(normalizedName);
switch(codec)
{
case Codec.MP1:
case Codec.MP2:
result.Quality = Quality.Unknown;
break;
case Codec.MP3VBR:
if (bitrate == BitRate.VBRV0) { result.Quality = Quality.MP3_VBR; }
else if (bitrate == BitRate.VBRV2) { result.Quality = Quality.MP3_VBR_V2; }
else { result.Quality = Quality.Unknown; }
break;
case Codec.MP3CBR:
if (bitrate == BitRate.B096) { result.Quality = Quality.MP3_096; }
else if (bitrate == BitRate.B128) { result.Quality = Quality.MP3_128; }
else if (bitrate == BitRate.B160) { result.Quality = Quality.MP3_160; }
else if (bitrate == BitRate.B192) { result.Quality = Quality.MP3_192; }
else if (bitrate == BitRate.B256) { result.Quality = Quality.MP3_256; }
else if (bitrate == BitRate.B320) { result.Quality = Quality.MP3_320; }
else { result.Quality = Quality.Unknown; }
break;
case Codec.FLAC:
if (sampleSize == SampleSize.S24) {result.Quality = Quality.FLAC_24;}
else {result.Quality = Quality.FLAC;}
break;
case Codec.ALAC:
result.Quality = Quality.ALAC;
break;
case Codec.WAVPACK:
result.Quality = Quality.WAVPACK;
break;
case Codec.APE:
result.Quality = Quality.APE;
break;
case Codec.WMA:
result.Quality = Quality.WMA;
break;
case Codec.WAV:
result.Quality = Quality.WAV;
break;
case Codec.AAC:
if (bitrate == BitRate.B192) { result.Quality = Quality.AAC_192; }
else if (bitrate == BitRate.B256) { result.Quality = Quality.AAC_256; }
else if (bitrate == BitRate.B320) { result.Quality = Quality.AAC_320; }
else { result.Quality = Quality.AAC_VBR; }
break;
case Codec.AACVBR:
result.Quality = Quality.AAC_VBR;
break;
case Codec.OGG:
case Codec.OPUS:
if (bitrate == BitRate.B160) { result.Quality = Quality.VORBIS_Q5; }
else if (bitrate == BitRate.B192) { result.Quality = Quality.VORBIS_Q6; }
else if (bitrate == BitRate.B224) { result.Quality = Quality.VORBIS_Q7; }
else if (bitrate == BitRate.B256) { result.Quality = Quality.VORBIS_Q8; }
else if (bitrate == BitRate.B320) { result.Quality = Quality.VORBIS_Q9; }
else if (bitrate == BitRate.B500) { result.Quality = Quality.VORBIS_Q10; }
else { result.Quality = Quality.Unknown; }
break;
case Codec.Unknown:
if (bitrate == BitRate.B192) { result.Quality = Quality.MP3_192; }
else if (bitrate == BitRate.B256) { result.Quality = Quality.MP3_256; }
else if (bitrate == BitRate.B320) { result.Quality = Quality.MP3_320; }
else if (bitrate == BitRate.VBR) { result.Quality = Quality.MP3_VBR_V2; }
else { result.Quality = Quality.Unknown; }
break;
default:
result.Quality = Quality.Unknown;
break;
}
//Based on extension
if (result.Quality == Quality.Unknown && !name.ContainsInvalidPathChars())
{
try
{
result.Quality = MediaFileExtensions.GetQualityForExtension(Path.GetExtension(name));
Whole album matching and fingerprinting (#592) * Cache result of GetAllArtists * Fixed: Manual import not respecting album import notifications * Fixed: partial album imports stay in queue, prompting manual import * Fixed: Allow release if tracks are missing * Fixed: Be tolerant of missing/extra "The" at start of artist name * Improve manual import UI * Omit video tracks from DB entirely * Revert "faster test packaging in build.sh" This reverts commit 2723e2a7b86bcbff9051fd2aced07dd807b4bcb7. -u and -T are not supported on macOS * Fix tests on linux and macOS * Actually lint on linux On linux yarn runs scripts with sh not bash so ** doesn't recursively glob * Match whole albums * Option to disable fingerprinting * Rip out MediaInfo * Don't split up things that have the same album selected in manual import * Try to speed up IndentificationService * More speedups * Some fixes and increase power of recording id * Fix NRE when no tags * Fix NRE when some (but not all) files in a directory have missing tags * Bump taglib, tidy up tag parsing * Add a health check * Remove media info setting * Tags -> audioTags * Add some tests where tags are null * Rename history events * Add missing method to interface * Reinstate MediaInfo tags and update info with artist scan Also adds migration to remove old format media info * This file no longer exists * Don't penalise year if missing from tags * Formatting improvements * Use correct system newline * Switch to the netstandard2.0 library to support net 461 * TagLib.File is IDisposable so should be in a using * Improve filename matching and add tests * Neater logging of parsed tags * Fix disk scan tests for new media info update * Fix quality detection source * Fix Inexact Artist/Album match * Add button to clear track mapping * Fix warning * Pacify eslint * Use \ not / * Fix UI updates * Fix media covers Prevent localizing URL propaging back to the metadata object * Reduce database overhead broadcasting UI updates * Relax timings a bit to make test pass * Remove irrelevant tests * Test framework for identification service * Fix PreferMissingToBadMatch test case * Make fingerprinting more robust * More logging * Penalize unknown media format and country * Prefer USA to UK * Allow Data CD * Fix exception if fingerprinting fails for all files * Fix tests * Fix NRE * Allow apostrophes and remove accents in filename aggregation * Address codacy issues * Cope with old versions of fpcalc and suggest upgrade * fpcalc health check passes if fingerprinting disabled * Get the Artist meta with the artist * Fix the mapper so that lazy loaded lists will be populated on Join And therefore we can join TrackFiles on Tracks by default and avoid an extra query * Rename subtitle -> lyric * Tidy up MediaInfoFormatter
5 years ago
result.QualityDetectionSource = QualityDetectionSource.Extension;
}
catch (ArgumentException)
{
//Swallow exception for cases where string contains illegal
//path characters.
}
}
return result;
}
Whole album matching and fingerprinting (#592) * Cache result of GetAllArtists * Fixed: Manual import not respecting album import notifications * Fixed: partial album imports stay in queue, prompting manual import * Fixed: Allow release if tracks are missing * Fixed: Be tolerant of missing/extra "The" at start of artist name * Improve manual import UI * Omit video tracks from DB entirely * Revert "faster test packaging in build.sh" This reverts commit 2723e2a7b86bcbff9051fd2aced07dd807b4bcb7. -u and -T are not supported on macOS * Fix tests on linux and macOS * Actually lint on linux On linux yarn runs scripts with sh not bash so ** doesn't recursively glob * Match whole albums * Option to disable fingerprinting * Rip out MediaInfo * Don't split up things that have the same album selected in manual import * Try to speed up IndentificationService * More speedups * Some fixes and increase power of recording id * Fix NRE when no tags * Fix NRE when some (but not all) files in a directory have missing tags * Bump taglib, tidy up tag parsing * Add a health check * Remove media info setting * Tags -> audioTags * Add some tests where tags are null * Rename history events * Add missing method to interface * Reinstate MediaInfo tags and update info with artist scan Also adds migration to remove old format media info * This file no longer exists * Don't penalise year if missing from tags * Formatting improvements * Use correct system newline * Switch to the netstandard2.0 library to support net 461 * TagLib.File is IDisposable so should be in a using * Improve filename matching and add tests * Neater logging of parsed tags * Fix disk scan tests for new media info update * Fix quality detection source * Fix Inexact Artist/Album match * Add button to clear track mapping * Fix warning * Pacify eslint * Use \ not / * Fix UI updates * Fix media covers Prevent localizing URL propaging back to the metadata object * Reduce database overhead broadcasting UI updates * Relax timings a bit to make test pass * Remove irrelevant tests * Test framework for identification service * Fix PreferMissingToBadMatch test case * Make fingerprinting more robust * More logging * Penalize unknown media format and country * Prefer USA to UK * Allow Data CD * Fix exception if fingerprinting fails for all files * Fix tests * Fix NRE * Allow apostrophes and remove accents in filename aggregation * Address codacy issues * Cope with old versions of fpcalc and suggest upgrade * fpcalc health check passes if fingerprinting disabled * Get the Artist meta with the artist * Fix the mapper so that lazy loaded lists will be populated on Join And therefore we can join TrackFiles on Tracks by default and avoid an extra query * Rename subtitle -> lyric * Tidy up MediaInfoFormatter
5 years ago
public static Codec ParseCodec(string name, string origName)
{
if (name.IsNullOrWhiteSpace())
{
return Codec.Unknown;
}
var match = CodecRegex.Match(name);
if (!match.Success) { return Codec.Unknown; }
if (match.Groups["FLAC"].Success) { return Codec.FLAC; }
if (match.Groups["ALAC"].Success) { return Codec.ALAC; }
if (match.Groups["WMA"].Success) { return Codec.WMA; }
if (match.Groups["WAV"].Success) { return Codec.WAV; }
if (match.Groups["AAC"].Success) { return Codec.AAC; }
if (match.Groups["OGG"].Success) { return Codec.OGG; }
if (match.Groups["OPUS"].Success) { return Codec.OPUS; }
if (match.Groups["MP1"].Success) { return Codec.MP1; }
if (match.Groups["MP2"].Success) { return Codec.MP2; }
if (match.Groups["MP3VBR"].Success) { return Codec.MP3VBR; }
if (match.Groups["MP3CBR"].Success) { return Codec.MP3CBR; }
if (match.Groups["WAVPACK"].Success) { return Codec.WAVPACK; }
if (match.Groups["APE"].Success) { return Codec.APE; }
return Codec.Unknown;
}
private static BitRate ParseBitRate(string name)
{
//var nameWithNoSpaces = Regex.Replace(name, @"\s+", "");
var match = BitRateRegex.Match(name);
if (!match.Success) return BitRate.Unknown;
if (match.Groups["B096"].Success) { return BitRate.B096; }
if (match.Groups["B128"].Success) { return BitRate.B128; }
if (match.Groups["B160"].Success) { return BitRate.B160; }
if (match.Groups["B192"].Success) { return BitRate.B192; }
if (match.Groups["B224"].Success) { return BitRate.B224; }
if (match.Groups["B256"].Success) { return BitRate.B256; }
if (match.Groups["B320"].Success) { return BitRate.B320; }
if (match.Groups["B500"].Success) { return BitRate.B500; }
if (match.Groups["VBR"].Success) { return BitRate.VBR; }
if (match.Groups["VBRV0"].Success) { return BitRate.VBRV0; }
if (match.Groups["VBRV2"].Success) { return BitRate.VBRV2; }
return BitRate.Unknown;
}
private static SampleSize ParseSampleSize(string name)
{
var match = SampleSizeRegex.Match(name);
if (!match.Success) { return SampleSize.Unknown; }
if (match.Groups["S24"].Success) { return SampleSize.S24; }
return SampleSize.Unknown;
}
private static Quality FindQuality(Codec codec, int bitrate, int sampleSize = 0)
{
switch (codec)
{
case Codec.MP1:
case Codec.MP2:
return Quality.Unknown;
case Codec.MP3VBR:
return Quality.MP3_VBR;
case Codec.MP3CBR:
if (bitrate == 8) { return Quality.MP3_008; }
if (bitrate == 16) { return Quality.MP3_016; }
if (bitrate == 24) { return Quality.MP3_024; }
if (bitrate == 32) { return Quality.MP3_032; }
if (bitrate == 40) { return Quality.MP3_040; }
if (bitrate == 48) { return Quality.MP3_048; }
if (bitrate == 56) { return Quality.MP3_056; }
if (bitrate == 64) { return Quality.MP3_064; }
if (bitrate == 80) { return Quality.MP3_080; }
if (bitrate == 96) { return Quality.MP3_096; }
if (bitrate == 112) { return Quality.MP3_112; }
if (bitrate == 128) { return Quality.MP3_128; }
if (bitrate == 160) { return Quality.MP3_160; }
if (bitrate == 192) { return Quality.MP3_192; }
if (bitrate == 224) { return Quality.MP3_224; }
if (bitrate == 256) { return Quality.MP3_256; }
if (bitrate == 320) { return Quality.MP3_320; }
return Quality.Unknown;
case Codec.FLAC:
if (sampleSize == 24) {return Quality.FLAC_24;}
return Quality.FLAC;
case Codec.ALAC:
return Quality.ALAC;
case Codec.WAVPACK:
return Quality.WAVPACK;
case Codec.APE:
return Quality.APE;
case Codec.WMA:
return Quality.WMA;
case Codec.WAV:
return Quality.WAV;
case Codec.AAC:
if (bitrate == 192) { return Quality.AAC_192; }
if (bitrate == 256) { return Quality.AAC_256; }
if (bitrate == 320) { return Quality.AAC_320; }
return Quality.AAC_VBR;
case Codec.OGG:
if (bitrate == 160) { return Quality.VORBIS_Q5; }
if (bitrate == 192) { return Quality.VORBIS_Q6; }
if (bitrate == 224) { return Quality.VORBIS_Q7; }
if (bitrate == 256) { return Quality.VORBIS_Q8; }
if (bitrate == 320) { return Quality.VORBIS_Q9; }
if (bitrate == 500) { return Quality.VORBIS_Q10; }
return Quality.Unknown;
case Codec.OPUS:
if (bitrate < 130) { return Quality.Unknown; }
if (bitrate < 180) { return Quality.VORBIS_Q5; }
if (bitrate < 205) { return Quality.VORBIS_Q6; }
if (bitrate < 240) { return Quality.VORBIS_Q7; }
if (bitrate < 290) { return Quality.VORBIS_Q8; }
if (bitrate < 410) { return Quality.VORBIS_Q9; }
return Quality.VORBIS_Q10;
default:
return Quality.Unknown;
}
}
private static QualityModel ParseQualityModifiers(string name, string normalizedName)
{
var result = new QualityModel { Quality = Quality.Unknown };
if (ProperRegex.IsMatch(normalizedName))
{
result.Revision.Version = 2;
}
Match versionRegexResult = VersionRegex.Match(normalizedName);
if (versionRegexResult.Success)
{
result.Revision.Version = Convert.ToInt32(versionRegexResult.Groups["version"].Value);
}
//TODO: re-enable this when we have a reliable way to determine real
//TODO: Only treat it as a real if it comes AFTER the season/epsiode number
MatchCollection realRegexResult = RealRegex.Matches(name);
if (realRegexResult.Count > 0)
{
result.Revision.Real = realRegexResult.Count;
}
return result;
}
}
public enum Codec
{
MP1,
MP2,
MP3CBR,
MP3VBR,
FLAC,
ALAC,
APE,
WAVPACK,
WMA,
AAC,
AACVBR,
OGG,
OPUS,
WAV,
Unknown
}
public enum BitRate
{
B096,
B128,
B160,
B192,
B224,
B256,
B320,
B500,
VBR,
VBRV0,
VBRV2,
Unknown,
}
public enum SampleSize
{
S24,
Unknown
}
}