From ab933a72405f7397915a8bd5683dbe3f13a38e2c Mon Sep 17 00:00:00 2001 From: Tyrrrz <1935960+Tyrrrz@users.noreply.github.com> Date: Sun, 30 Apr 2023 20:05:31 +0300 Subject: [PATCH] Rename `TitleLinkNodeMatcher` to `MaskedLinkNodeMatcher` to align with Discord's own terminology --- .../Markdown/Parsing/MarkdownParser.cs | 118 +++++++++--------- 1 file changed, 58 insertions(+), 60 deletions(-) diff --git a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs index 9d276d0..605e8ef 100644 --- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs +++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs @@ -24,41 +24,41 @@ internal static partial class MarkdownParser /* Formatting */ private static readonly IMatcher BoldFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest double asterisk not followed by an asterisk + // Capture any character until the earliest double asterisk not followed by an asterisk. new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1]))) ); private static readonly IMatcher ItalicFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest single asterisk not preceded or followed by an asterisk - // Opening asterisk must not be followed by whitespace - // Closing asterisk must not be preceded by whitespace + // Capture any character until the earliest single asterisk not preceded or followed by an asterisk. + // Opening asterisk must not be followed by whitespace. + // Closing asterisk must not be preceded by whitespace. new Regex(@"\*(?!\s)(.+?)(? new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]))) ); private static readonly IMatcher ItalicBoldFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest triple asterisk not followed by an asterisk + // Capture any character until the earliest triple asterisk not followed by an asterisk. new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher)) ); private static readonly IMatcher ItalicAltFormattingNodeMatcher = new RegexMatcher( - // Capture any character except underscore until an underscore - // Closing underscore must not be followed by a word character + // Capture any character except underscore until an underscore. + // Closing underscore must not be followed by a word character. new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]))) ); private static readonly IMatcher UnderlineFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest double underscore not followed by an underscore + // Capture any character until the earliest double underscore not followed by an underscore. new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1]))) ); private static readonly IMatcher ItalicUnderlineFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest triple underscore not followed by an underscore + // Capture any character until the earliest triple underscore not followed by an underscore. new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode( FormattingKind.Italic, @@ -66,44 +66,42 @@ internal static partial class MarkdownParser ) ); - private static readonly IMatcher StrikethroughFormattingNodeMatcher = - new RegexMatcher( - // Capture any character until the earliest double tilde - new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline), - (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1]))) - ); + private static readonly IMatcher StrikethroughFormattingNodeMatcher = new RegexMatcher( + // Capture any character until the earliest double tilde. + new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline), + (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1]))) + ); private static readonly IMatcher SpoilerFormattingNodeMatcher = new RegexMatcher( - // Capture any character until the earliest double pipe + // Capture any character until the earliest double pipe. new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1]))) ); private static readonly IMatcher SingleLineQuoteNodeMatcher = new RegexMatcher( - // Capture any character until the end of the line - // Opening 'greater than' character must be followed by whitespace - // Text content is optional + // Capture any character until the end of the line. + // Opening 'greater than' character must be followed by whitespace. + // Text content is optional. new Regex(@"^>\s(.*\n?)", DefaultRegexOptions), (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) ); - private static readonly IMatcher RepeatedSingleLineQuoteNodeMatcher = - new RegexMatcher( - // Repeatedly capture any character until the end of the line - // This one is tricky as it ends up producing multiple separate captures which need to be joined - new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions), - (_, m) => new FormattingNode( - FormattingKind.Quote, - Parse( - // Combine all captures into a single string - string.Concat(m.Groups[1].Captures.Select(c => c.Value)) - ) + private static readonly IMatcher RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher( + // Repeatedly capture any character until the end of the line. + // This one is tricky as it ends up producing multiple separate captures which need to be joined. + new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions), + (_, m) => new FormattingNode( + FormattingKind.Quote, + Parse( + // Combine all captures into a single string + string.Concat(m.Groups[1].Captures.Select(c => c.Value)) ) - ); + ) + ); private static readonly IMatcher MultiLineQuoteNodeMatcher = new RegexMatcher( - // Capture any character until the end of the input - // Opening 'greater than' characters must be followed by whitespace + // Capture any character until the end of the input. + // Opening 'greater than' characters must be followed by whitespace. new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline), (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) ); @@ -111,17 +109,17 @@ internal static partial class MarkdownParser /* Code blocks */ private static readonly IMatcher InlineCodeBlockNodeMatcher = new RegexMatcher( - // Capture any character except backtick until a backtick - // Blank lines at the beginning and end of content are trimmed - // There can be either one or two backticks, but equal number on both sides + // Capture any character except backtick until a backtick. + // Blank lines at the beginning and at the end of content are trimmed. + // There can be either one or two backticks, but equal number on both sides. new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline), (_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n')) ); private static readonly IMatcher MultiLineCodeBlockNodeMatcher = new RegexMatcher( - // Capture language identifier and then any character until the earliest triple backtick - // Language identifier is one word immediately after opening backticks, followed immediately by newline - // Blank lines at the beginning and end of content are trimmed + // Capture language identifier and then any character until the earliest triple backtick. + // Language identifier is one word immediately after opening backticks, followed immediately by newline. + // Blank lines at the beginning and at the end of content are trimmed. new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline), (_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n')) ); @@ -224,12 +222,6 @@ internal static partial class MarkdownParser /* Links */ - private static readonly IMatcher TitledLinkNodeMatcher = new RegexMatcher( - // Capture [title](link) - new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions), - (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1]))) - ); - private static readonly IMatcher AutoLinkNodeMatcher = new RegexMatcher( // Capture any non-whitespace character after http:// or https:// // until the last punctuation character or whitespace @@ -243,32 +235,38 @@ internal static partial class MarkdownParser (_, m) => new LinkNode(m.Groups[1].Value) ); + private static readonly IMatcher MaskedLinkNodeMatcher = new RegexMatcher( + // Capture [title](link) + new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions), + (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1]))) + ); + /* Text */ private static readonly IMatcher ShrugTextNodeMatcher = new StringMatcher( - // Capture the shrug kaomoji - // This escapes it from matching for formatting + // Capture the shrug kaomoji. + // This escapes it from matching for formatting. @"¯\_(ツ)_/¯", s => new TextNode(s.ToString()) ); private static readonly IMatcher IgnoredEmojiTextNodeMatcher = new RegexMatcher( - // Capture some specific emoji that don't get rendered - // This escapes it from matching for emoji + // Capture some specific emoji that don't get rendered. + // This escapes them from matching for emoji. new Regex(@"([\u26A7\u2640\u2642\u2695\u267E\u00A9\u00AE\u2122])", DefaultRegexOptions), (_, m) => new TextNode(m.Groups[1].Value) ); private static readonly IMatcher EscapedSymbolTextNodeMatcher = new RegexMatcher( - // Capture any "symbol/other" character or surrogate pair preceded by a backslash - // This escapes it from matching for emoji + // Capture any "symbol/other" character or surrogate pair preceded by a backslash. + // This escapes them from matching for emoji. new Regex(@"\\(\p{So}|\p{Cs}{2})", DefaultRegexOptions), (_, m) => new TextNode(m.Groups[1].Value) ); private static readonly IMatcher EscapedCharacterTextNodeMatcher = new RegexMatcher( - // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash - // This escapes it from matching for formatting or other tokens + // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash. + // This escapes them from matching for formatting or other tokens. new Regex(@"\\([^a-zA-Z0-9\s])", DefaultRegexOptions), (_, m) => new TextNode(m.Groups[1].Value) ); @@ -310,9 +308,9 @@ internal static partial class MarkdownParser } ); - // Combine all matchers into one - // Matchers that have similar patterns are ordered from most specific to least specific - private static readonly IMatcher AggregateNodeMatcher = new AggregateMatcher( + // Combine all matchers into one. + // Matchers that have similar patterns are ordered from most specific to least specific. + private static readonly IMatcher NodeMatcher = new AggregateMatcher( // Escaped text ShrugTextNodeMatcher, IgnoredEmojiTextNodeMatcher, @@ -344,7 +342,7 @@ internal static partial class MarkdownParser RoleMentionNodeMatcher, // Links - TitledLinkNodeMatcher, + MaskedLinkNodeMatcher, AutoLinkNodeMatcher, HiddenLinkNodeMatcher, @@ -358,7 +356,7 @@ internal static partial class MarkdownParser ); // Minimal set of matchers for non-multimedia formats (e.g. plain text) - private static readonly IMatcher MinimalAggregateNodeMatcher = new AggregateMatcher( + private static readonly IMatcher MinimalNodeMatcher = new AggregateMatcher( // Mentions EveryoneMentionNodeMatcher, HereMentionNodeMatcher, @@ -383,13 +381,13 @@ internal static partial class MarkdownParser internal static partial class MarkdownParser { private static IReadOnlyList Parse(StringSegment segment) => - Parse(segment, AggregateNodeMatcher); + Parse(segment, NodeMatcher); public static IReadOnlyList Parse(string markdown) => Parse(new StringSegment(markdown)); private static IReadOnlyList ParseMinimal(StringSegment segment) => - Parse(segment, MinimalAggregateNodeMatcher); + Parse(segment, MinimalNodeMatcher); public static IReadOnlyList ParseMinimal(string markdown) => ParseMinimal(new StringSegment(markdown));