From bd98f4cb6a70a9a488f0c0ab1ef689e8c1060642 Mon Sep 17 00:00:00 2001 From: Oleksii Holub <1935960+Tyrrrz@users.noreply.github.com> Date: Sat, 5 Feb 2022 04:10:35 +0200 Subject: [PATCH] Refactor `StringPart` into `StringSegment` --- .../Markdown/Parsing/AggregateMatcher.cs | 8 ++-- .../Markdown/Parsing/IMatcher.cs | 47 ++++++++++++------- .../Markdown/Parsing/MarkdownParser.cs | 42 ++++++++--------- .../Markdown/Parsing/ParsedMatch.cs | 6 +-- .../Markdown/Parsing/RegexMatcher.cs | 16 +++---- .../Markdown/Parsing/StringMatcher.cs | 16 +++---- .../Markdown/Parsing/StringPart.cs | 21 --------- .../Markdown/Parsing/StringSegment.cs | 19 ++++++++ 8 files changed, 93 insertions(+), 82 deletions(-) delete mode 100644 DiscordChatExporter.Core/Markdown/Parsing/StringPart.cs create mode 100644 DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs diff --git a/DiscordChatExporter.Core/Markdown/Parsing/AggregateMatcher.cs b/DiscordChatExporter.Core/Markdown/Parsing/AggregateMatcher.cs index 450aa83..4a3f5dc 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/AggregateMatcher.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/AggregateMatcher.cs @@ -16,7 +16,7 @@ internal class AggregateMatcher : IMatcher { } - public ParsedMatch? TryMatch(StringPart stringPart) + public ParsedMatch? TryMatch(StringSegment segment) { ParsedMatch? earliestMatch = null; @@ -24,19 +24,19 @@ internal class AggregateMatcher : IMatcher foreach (var matcher in _matchers) { // Try to match - var match = matcher.TryMatch(stringPart); + var match = matcher.TryMatch(segment); // If there's no match - continue if (match is null) continue; // If this match is earlier than previous earliest - replace - if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex) + if (earliestMatch is null || match.Segment.StartIndex < earliestMatch.Segment.StartIndex) earliestMatch = match; // If the earliest match starts at the very beginning - break, // because it's impossible to find a match earlier than that - if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex) + if (earliestMatch.Segment.StartIndex == segment.StartIndex) break; } diff --git a/DiscordChatExporter.Core/Markdown/Parsing/IMatcher.cs b/DiscordChatExporter.Core/Markdown/Parsing/IMatcher.cs index 175c312..bafddfc 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/IMatcher.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/IMatcher.cs @@ -5,44 +5,57 @@ namespace DiscordChatExporter.Core.Markdown.Parsing; internal interface IMatcher { - ParsedMatch? TryMatch(StringPart stringPart); + ParsedMatch? TryMatch(StringSegment segment); } internal static class MatcherExtensions { - public static IEnumerable> MatchAll(this IMatcher matcher, - StringPart stringPart, Func transformFallback) + public static IEnumerable> MatchAll( + this IMatcher matcher, + StringSegment segment, + Func transformFallback) { // Loop through segments divided by individual matches - var currentIndex = stringPart.StartIndex; - while (currentIndex < stringPart.EndIndex) + var currentIndex = segment.StartIndex; + while (currentIndex < segment.EndIndex) { // Find a match within this segment - var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex)); + var match = matcher.TryMatch( + segment.Relocate( + currentIndex, + segment.EndIndex - currentIndex + ) + ); - // If there's no match - break if (match is null) break; - // If this match doesn't start immediately at current index - transform and yield fallback first - if (match.StringPart.StartIndex > currentIndex) + // If this match doesn't start immediately at the current position - transform and yield fallback first + if (match.Segment.StartIndex > currentIndex) { - var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex); - yield return new ParsedMatch(fallbackPart, transformFallback(fallbackPart)); + var fallbackSegment = segment.Relocate( + currentIndex, + match.Segment.StartIndex - currentIndex + ); + + yield return new ParsedMatch(fallbackSegment, transformFallback(fallbackSegment)); } - // Yield match yield return match; // Shift current index to the end of the match - currentIndex = match.StringPart.StartIndex + match.StringPart.Length; + currentIndex = match.Segment.StartIndex + match.Segment.Length; } - // If EOL wasn't reached - transform and yield remaining part as fallback - if (currentIndex < stringPart.EndIndex) + // If EOL hasn't been reached - transform and yield remaining part as fallback + if (currentIndex < segment.EndIndex) { - var fallbackPart = stringPart.Slice(currentIndex); - yield return new ParsedMatch(fallbackPart, transformFallback(fallbackPart)); + var fallbackSegment = segment.Relocate( + currentIndex, + segment.EndIndex - currentIndex + ); + + yield return new ParsedMatch(fallbackSegment, transformFallback(fallbackSegment)); } } } \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs index a0e9041..906bfac 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs @@ -23,7 +23,7 @@ internal static partial class MarkdownParser // Capture any character until the earliest double asterisk not followed by an asterisk private static readonly IMatcher BoldFormattingNodeMatcher = new RegexMatcher( new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the earliest single asterisk not preceded or followed by an asterisk @@ -31,54 +31,54 @@ internal static partial class MarkdownParser // Closing asterisk must not be preceded by whitespace private static readonly IMatcher ItalicFormattingNodeMatcher = new RegexMatcher( new Regex("\\*(?!\\s)(.+?)(? new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the earliest triple asterisk not followed by an asterisk private static readonly IMatcher ItalicBoldFormattingNodeMatcher = new RegexMatcher( new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher)) + (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher)) ); // Capture any character except underscore until an underscore // Closing underscore must not be followed by a word character private static readonly IMatcher ItalicAltFormattingNodeMatcher = new RegexMatcher( new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the earliest double underscore not followed by an underscore private static readonly IMatcher UnderlineFormattingNodeMatcher = new RegexMatcher( new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the earliest triple underscore not followed by an underscore private static readonly IMatcher ItalicUnderlineFormattingNodeMatcher = new RegexMatcher( new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Italic, - Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher)) + (s, m) => new FormattingNode(FormattingKind.Italic, + Parse(s.Relocate(m.Groups[1]), UnderlineFormattingNodeMatcher)) ); // Capture any character until the earliest double tilde private static readonly IMatcher StrikethroughFormattingNodeMatcher = new RegexMatcher( new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the earliest double pipe private static readonly IMatcher SpoilerFormattingNodeMatcher = new RegexMatcher( new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1]))) ); // Capture any character until the end of the line // Opening 'greater than' character must be followed by whitespace private static readonly IMatcher SingleLineQuoteNodeMatcher = new RegexMatcher( new Regex("^>\\s(.+\n?)", DefaultRegexOptions), - (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) ); // Repeatedly capture any character until the end of the line @@ -97,7 +97,7 @@ internal static partial class MarkdownParser // Opening 'greater than' characters must be followed by whitespace private static readonly IMatcher MultiLineQuoteNodeMatcher = new RegexMatcher( new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline), - (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1]))) + (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) ); /* Code blocks */ @@ -185,7 +185,7 @@ internal static partial class MarkdownParser // Capture [title](link) private static readonly IMatcher TitledLinkNodeMatcher = new RegexMatcher( new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions), - (p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1]))) + (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1]))) ); // Capture any non-whitespace character after http:// or https:// @@ -207,7 +207,7 @@ internal static partial class MarkdownParser // This escapes it from matching for formatting private static readonly IMatcher ShrugTextNodeMatcher = new StringMatcher( @"¯\_(ツ)_/¯", - p => new TextNode(p.ToString()) + s => new TextNode(s.ToString()) ); // Capture some specific emoji that don't get rendered @@ -323,24 +323,24 @@ internal static partial class MarkdownParser UnixTimestampNodeMatcher ); - private static IReadOnlyList Parse(StringPart stringPart, IMatcher matcher) => + private static IReadOnlyList Parse(StringSegment segment, IMatcher matcher) => matcher - .MatchAll(stringPart, p => new TextNode(p.ToString())) + .MatchAll(segment, s => new TextNode(s.ToString())) .Select(r => r.Value) .ToArray(); } internal static partial class MarkdownParser { - private static IReadOnlyList Parse(StringPart stringPart) => - Parse(stringPart, AggregateNodeMatcher); + private static IReadOnlyList Parse(StringSegment segment) => + Parse(segment, AggregateNodeMatcher); - private static IReadOnlyList ParseMinimal(StringPart stringPart) => - Parse(stringPart, MinimalAggregateNodeMatcher); + private static IReadOnlyList ParseMinimal(StringSegment segment) => + Parse(segment, MinimalAggregateNodeMatcher); public static IReadOnlyList Parse(string input) => - Parse(new StringPart(input)); + Parse(new StringSegment(input)); public static IReadOnlyList ParseMinimal(string input) => - ParseMinimal(new StringPart(input)); + ParseMinimal(new StringSegment(input)); } \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/ParsedMatch.cs b/DiscordChatExporter.Core/Markdown/Parsing/ParsedMatch.cs index c15afc4..a43dba2 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/ParsedMatch.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/ParsedMatch.cs @@ -2,13 +2,13 @@ internal class ParsedMatch { - public StringPart StringPart { get; } + public StringSegment Segment { get; } public T Value { get; } - public ParsedMatch(StringPart stringPart, T value) + public ParsedMatch(StringSegment segment, T value) { - StringPart = stringPart; + Segment = segment; Value = value; } } \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/RegexMatcher.cs b/DiscordChatExporter.Core/Markdown/Parsing/RegexMatcher.cs index 592bdec..d0280d7 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/RegexMatcher.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/RegexMatcher.cs @@ -6,17 +6,17 @@ namespace DiscordChatExporter.Core.Markdown.Parsing; internal class RegexMatcher : IMatcher { private readonly Regex _regex; - private readonly Func _transform; + private readonly Func _transform; - public RegexMatcher(Regex regex, Func transform) + public RegexMatcher(Regex regex, Func transform) { _regex = regex; _transform = transform; } - public ParsedMatch? TryMatch(StringPart stringPart) + public ParsedMatch? TryMatch(StringSegment segment) { - var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length); + var match = _regex.Match(segment.Source, segment.StartIndex, segment.Length); if (!match.Success) return null; @@ -25,14 +25,14 @@ internal class RegexMatcher : IMatcher // Which is super weird because regex.Match(string, int) takes the whole input in context. // So in order to properly account for ^/$ regex tokens, we need to make sure that // the expression also matches on the bigger part of the input. - if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex)) + if (!_regex.IsMatch(segment.Source[..segment.EndIndex], segment.StartIndex)) return null; - var stringPartMatch = stringPart.Slice(match.Index, match.Length); - var value = _transform(stringPartMatch, match); + var segmentMatch = segment.Relocate(match); + var value = _transform(segmentMatch, match); return value is not null - ? new ParsedMatch(stringPartMatch, value) + ? new ParsedMatch(segmentMatch, value) : null; } } \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/StringMatcher.cs b/DiscordChatExporter.Core/Markdown/Parsing/StringMatcher.cs index 949fb5d..d086b70 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/StringMatcher.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/StringMatcher.cs @@ -6,31 +6,31 @@ internal class StringMatcher : IMatcher { private readonly string _needle; private readonly StringComparison _comparison; - private readonly Func _transform; + private readonly Func _transform; - public StringMatcher(string needle, StringComparison comparison, Func transform) + public StringMatcher(string needle, StringComparison comparison, Func transform) { _needle = needle; _comparison = comparison; _transform = transform; } - public StringMatcher(string needle, Func transform) + public StringMatcher(string needle, Func transform) : this(needle, StringComparison.Ordinal, transform) { } - public ParsedMatch? TryMatch(StringPart stringPart) + public ParsedMatch? TryMatch(StringSegment segment) { - var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison); + var index = segment.Source.IndexOf(_needle, segment.StartIndex, segment.Length, _comparison); if (index < 0) return null; - var stringPartMatch = stringPart.Slice(index, _needle.Length); - var value = _transform(stringPartMatch); + var segmentMatch = segment.Relocate(index, _needle.Length); + var value = _transform(segmentMatch); return value is not null - ? new ParsedMatch(stringPartMatch, value) + ? new ParsedMatch(segmentMatch, value) : null; } } \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/StringPart.cs b/DiscordChatExporter.Core/Markdown/Parsing/StringPart.cs deleted file mode 100644 index 46658a6..0000000 --- a/DiscordChatExporter.Core/Markdown/Parsing/StringPart.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System.Text.RegularExpressions; - -namespace DiscordChatExporter.Core.Markdown.Parsing; - -internal readonly record struct StringPart(string Target, int StartIndex, int Length) -{ - public int EndIndex => StartIndex + Length; - - public StringPart(string target) - : this(target, 0, target.Length) - { - } - - public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength); - - public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex); - - public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length); - - public override string ToString() => Target.Substring(StartIndex, Length); -} \ No newline at end of file diff --git a/DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs b/DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs new file mode 100644 index 0000000..5894a32 --- /dev/null +++ b/DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs @@ -0,0 +1,19 @@ +using System.Text.RegularExpressions; + +namespace DiscordChatExporter.Core.Markdown.Parsing; + +internal readonly record struct StringSegment(string Source, int StartIndex, int Length) +{ + public int EndIndex => StartIndex + Length; + + public StringSegment(string target) + : this(target, 0, target.Length) + { + } + + public StringSegment Relocate(int newStartIndex, int newLength) => new(Source, newStartIndex, newLength); + + public StringSegment Relocate(Capture capture) => Relocate(capture.Index, capture.Length); + + public override string ToString() => Source.Substring(StartIndex, Length); +} \ No newline at end of file