From 5db22529ff815add601d01c51d9e04428724d60e Mon Sep 17 00:00:00 2001 From: Robert Dailey Date: Mon, 4 Jul 2022 22:33:14 -0500 Subject: [PATCH] refactor: Custom format group table parser --- .../Guide/CustomFormatGroupParserTest.cs | 87 ++++++++++++++ .../Guide/CustomFormatGroupParser.cs | 107 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 src/TrashLib.Tests/Radarr/CustomFormat/Guide/CustomFormatGroupParserTest.cs create mode 100644 src/TrashLib/Radarr/CustomFormat/Guide/CustomFormatGroupParser.cs diff --git a/src/TrashLib.Tests/Radarr/CustomFormat/Guide/CustomFormatGroupParserTest.cs b/src/TrashLib.Tests/Radarr/CustomFormat/Guide/CustomFormatGroupParserTest.cs new file mode 100644 index 00000000..5eb1035e --- /dev/null +++ b/src/TrashLib.Tests/Radarr/CustomFormat/Guide/CustomFormatGroupParserTest.cs @@ -0,0 +1,87 @@ +using System.IO.Abstractions; +using System.IO.Abstractions.TestingHelpers; +using AutoFixture.NUnit3; +using FluentAssertions; +using NUnit.Framework; +using TestLibrary.AutoFixture; +using TrashLib.Radarr.CustomFormat.Guide; +using TrashLib.Startup; + +namespace TrashLib.Tests.Radarr.CustomFormat.Guide; + +[TestFixture] +[Parallelizable(ParallelScope.All)] +public class CustomFormatGroupParserTest +{ + [Test, AutoMockData] + public void It_works( + [Frozen] IAppPaths paths, + [Frozen(Matching.ImplementedInterfaces)] MockFileSystem fs, + CustomFormatGroupParser sut) + { + const string markdown = @" +## INDEX + +------ + +| Audio Advanced #1 | Audio Advanced #2 | +| ----------------------------------------- | ------------------------------- | +| [TrueHD ATMOS](#truehd-atmos) | [FLAC](#flac) | +| [DTS X](#dts-x) | [PCM](#pcm) | +| [ATMOS (undefined)](#atmos-undefined) | [DTS-HD HRA](#dts-hd-hra) | +| [DD+ ATMOS](#dd-atmos) | [AAC](#aac) | +| [TrueHD](#truehd) | [DD](#dd) | +| [DTS-HD MA](#dts-hd-ma) | [MP3](#mp3) | +| [DD+](#ddplus) | [Opus](#opus) | +| [DTS-ES](#dts-es) | | +| [DTS](#dts) | | +| | | + +------ + +| Movie Versions | Unwanted | +| --------------------------------------------- | ---------------------------------- | +| [Hybrid](#hybrid) | [BR-DISK](#br-disk) | +| [Remaster](#remaster) | [EVO (no WEBDL)](#evo-no-webdl) | +| [4K Remaster](#4k-remaster) | [LQ](#lq) | +| [Special Editions](#special-edition) | [x265 (720/1080p)](#x265-7201080p) | +| [Criterion Collection](#criterion-collection) | [3D](#3d) | +| [Theatrical Cut](#theatrical-cut) | [No-RlsGroup](#no-rlsgroup) | +| [IMAX](#imax) | [Obfuscated](#obfuscated) | +| [IMAX Enhanced](#imax-enhanced) | [DV (WEBDL)](#dv-webdl) | +| | | + +------ +"; + + var file = paths.RepoDirectory + .SubDirectory("docs") + .SubDirectory("Radarr") + .File("Radarr-collection-of-custom-formats.md"); + + fs.AddFile(file.FullName, new MockFileData(markdown)); + + var result = sut.Parse(); + + result.Keys.Should().BeEquivalentTo( + "Audio Advanced #1", + "Audio Advanced #2", + "Movie Versions", + "Unwanted" + ); + + result.Should().ContainKey("Audio Advanced #1") + .WhoseValue.Should().BeEquivalentTo(new[] + { + new CustomFormatGroupItem("TrueHD ATMOS", "truehd-atmos"), + new CustomFormatGroupItem("DTS X", "dts-x"), + new CustomFormatGroupItem("ATMOS (undefined)", "atmos-undefined"), + new CustomFormatGroupItem("DD+ ATMOS", "dd-atmos"), + new CustomFormatGroupItem("TrueHD", "truehd"), + new CustomFormatGroupItem("DTS-HD MA", "dts-hd-ma"), + new CustomFormatGroupItem("DD+", "ddplus"), + new CustomFormatGroupItem("DTS-ES", "dts-es"), + new CustomFormatGroupItem("DTS", "dts") + }); + } +} diff --git a/src/TrashLib/Radarr/CustomFormat/Guide/CustomFormatGroupParser.cs b/src/TrashLib/Radarr/CustomFormat/Guide/CustomFormatGroupParser.cs new file mode 100644 index 00000000..c8869228 --- /dev/null +++ b/src/TrashLib/Radarr/CustomFormat/Guide/CustomFormatGroupParser.cs @@ -0,0 +1,107 @@ +using System.Collections.ObjectModel; +using System.IO.Abstractions; +using System.Text.RegularExpressions; +using Common.Extensions; +using TrashLib.Startup; + +namespace TrashLib.Radarr.CustomFormat.Guide; + +public record CustomFormatGroupItem(string Name, string Anchor); + +public class CustomFormatGroupParser +{ + private readonly IAppPaths _paths; + private static readonly Regex TableRegex = new(@"^\s*\|(.*)\|\s*$"); + private static readonly Regex LinkRegex = new(@"^\[(.+?)\]\(#(.+?)\)$"); + + public CustomFormatGroupParser(IAppPaths paths) + { + _paths = paths; + } + + public IDictionary> Parse() + { + var mdFile = _paths.RepoDirectory + .SubDirectory("docs") + .SubDirectory("Radarr") + .File("Radarr-collection-of-custom-formats.md"); + + var columns = new List>(); + + using var md = mdFile.OpenText(); + while (!md.EndOfStream) + { + var rows = ParseTable(md); + + // Pivot the data so that we have lists of columns instead of lists of rows + // Taken from: https://stackoverflow.com/a/39485441/157971 + columns.AddRange(rows + .SelectMany(x => x.Select((value, index) => (value, index))) + .GroupBy(x => x.index, x => x.value) + .Select(x => x.ToList())); + } + + return columns.ToDictionary( + x => x[0], + x => x.Skip(1).Select(ParseLink).NotNull().ToList().AsReadOnly()); + } + + private static CustomFormatGroupItem? ParseLink(string markdownLink) + { + var match = LinkRegex.Match(markdownLink); + return match.Success ? new CustomFormatGroupItem(match.Groups[1].Value, match.Groups[2].Value) : null; + } + + private static IEnumerable> ParseTable(TextReader stream) + { + var tableRows = new List>(); + + while (true) + { + var line = stream.ReadLine(); + if (line is null) + { + break; + } + + if (!line.Any()) + { + if (tableRows.Any()) + { + break; + } + + continue; + } + + var match = TableRegex.Match(line); + if (!match.Success) + { + if (tableRows.Any()) + { + break; + } + + continue; + } + + var tableRow = match.Groups[1].Value; + var fields = tableRow.Split('|').Select(x => x.Trim()).ToList(); + if (!fields.Any()) + { + if (tableRows.Any()) + { + break; + } + + continue; + } + + tableRows.Add(fields); + } + + return tableRows + // Filter out the `|---|---|---|` part of the table between the heading & data rows. + .Where(x => !Regex.IsMatch(x[0], @"^-+$")); + } +}