Rename `TitleLinkNodeMatcher` to `MaskedLinkNodeMatcher` to align with Discord's own terminology

1 year ago · ab933a7240
parent a9fc439cc5
commit ab933a7240
1 changed files with 58 additions and 60 deletions
--- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
+++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
@ -24,41 +24,41 @@ internal static partial class MarkdownParser
    /* Formatting */

    private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double asterisk not followed by an asterisk
+        // Capture any character until the earliest double asterisk not followed by an asterisk.
        new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
    );

    private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk
-        // Opening asterisk must not be followed by whitespace
-        // Closing asterisk must not be preceded by whitespace
+        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk.
+        // Opening asterisk must not be followed by whitespace.
+        // Closing asterisk must not be preceded by whitespace.
        new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
    );

    private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest triple asterisk not followed by an asterisk
+        // Capture any character until the earliest triple asterisk not followed by an asterisk.
        new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
    );

    private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character except underscore until an underscore
-        // Closing underscore must not be followed by a word character
+        // Capture any character except underscore until an underscore.
+        // Closing underscore must not be followed by a word character.
        new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
    );

    private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double underscore not followed by an underscore
+        // Capture any character until the earliest double underscore not followed by an underscore.
        new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
    );

    private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
        new RegexMatcher<MarkdownNode>(
-            // Capture any character until the earliest triple underscore not followed by an underscore
+            // Capture any character until the earliest triple underscore not followed by an underscore.
            new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
            (s, m) => new FormattingNode(
                FormattingKind.Italic,
@ -66,44 +66,42 @@ internal static partial class MarkdownParser
            )
        );

-    private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
-        new RegexMatcher<MarkdownNode>(
-            // Capture any character until the earliest double tilde
-            new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
-            (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
-        );
+    private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
+        // Capture any character until the earliest double tilde.
+        new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
+        (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
+    );

    private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double pipe
+        // Capture any character until the earliest double pipe.
        new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
    );

    private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the end of the line
-        // Opening 'greater than' character must be followed by whitespace
-        // Text content is optional
+        // Capture any character until the end of the line.
+        // Opening 'greater than' character must be followed by whitespace.
+        // Text content is optional.
        new Regex(@"^>\s(.*\n?)", DefaultRegexOptions),
        (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
    );

-    private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
-        new RegexMatcher<MarkdownNode>(
-            // Repeatedly capture any character until the end of the line
-            // This one is tricky as it ends up producing multiple separate captures which need to be joined
-            new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions),
-            (_, m) => new FormattingNode(
-                FormattingKind.Quote,
-                Parse(
-                    // Combine all captures into a single string
-                    string.Concat(m.Groups[1].Captures.Select(c => c.Value))
-                )
+    private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
+        // Repeatedly capture any character until the end of the line.
+        // This one is tricky as it ends up producing multiple separate captures which need to be joined.
+        new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions),
+        (_, m) => new FormattingNode(
+            FormattingKind.Quote,
+            Parse(
+                // Combine all captures into a single string
+                string.Concat(m.Groups[1].Captures.Select(c => c.Value))
            )
-        );
+        )
+    );

    private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the end of the input
-        // Opening 'greater than' characters must be followed by whitespace
+        // Capture any character until the end of the input.
+        // Opening 'greater than' characters must be followed by whitespace.
        new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
    );
@ -111,17 +109,17 @@ internal static partial class MarkdownParser
    /* Code blocks */

    private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character except backtick until a backtick
-        // Blank lines at the beginning and end of content are trimmed
-        // There can be either one or two backticks, but equal number on both sides
+        // Capture any character except backtick until a backtick.
+        // Blank lines at the beginning and at the end of content are trimmed.
+        // There can be either one or two backticks, but equal number on both sides.
        new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline),
        (_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
    );

    private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture language identifier and then any character until the earliest triple backtick
-        // Language identifier is one word immediately after opening backticks, followed immediately by newline
-        // Blank lines at the beginning and end of content are trimmed
+        // Capture language identifier and then any character until the earliest triple backtick.
+        // Language identifier is one word immediately after opening backticks, followed immediately by newline.
+        // Blank lines at the beginning and at the end of content are trimmed.
        new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
        (_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
    );
@ -224,12 +222,6 @@ internal static partial class MarkdownParser

    /* Links */

-    private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture [title](link)
-        new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
-        (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
-    );
-
    private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
        // Capture any non-whitespace character after http:// or https://
        // until the last punctuation character or whitespace
@ -243,32 +235,38 @@ internal static partial class MarkdownParser
        (_, m) => new LinkNode(m.Groups[1].Value)
    );

+    private static readonly IMatcher<MarkdownNode> MaskedLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
+        // Capture [title](link)
+        new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
+        (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
+    );
+
    /* Text */

    private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
-        // Capture the shrug kaomoji
-        // This escapes it from matching for formatting
+        // Capture the shrug kaomoji.
+        // This escapes it from matching for formatting.
        @"¯\_(ツ)_/¯",
        s => new TextNode(s.ToString())
    );

    private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture some specific emoji that don't get rendered
-        // This escapes it from matching for emoji
+        // Capture some specific emoji that don't get rendered.
+        // This escapes them from matching for emoji.
        new Regex(@"([\u26A7\u2640\u2642\u2695\u267E\u00A9\u00AE\u2122])", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );

    private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any "symbol/other" character or surrogate pair preceded by a backslash
-        // This escapes it from matching for emoji
+        // Capture any "symbol/other" character or surrogate pair preceded by a backslash.
+        // This escapes them from matching for emoji.
        new Regex(@"\\(\p{So}|\p{Cs}{2})", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );

    private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
-        // This escapes it from matching for formatting or other tokens
+        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash.
+        // This escapes them from matching for formatting or other tokens.
        new Regex(@"\\([^a-zA-Z0-9\s])", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );
@ -310,9 +308,9 @@ internal static partial class MarkdownParser
        }
    );

-    // Combine all matchers into one
-    // Matchers that have similar patterns are ordered from most specific to least specific
-    private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+    // Combine all matchers into one.
+    // Matchers that have similar patterns are ordered from most specific to least specific.
+    private static readonly IMatcher<MarkdownNode> NodeMatcher = new AggregateMatcher<MarkdownNode>(
        // Escaped text
        ShrugTextNodeMatcher,
        IgnoredEmojiTextNodeMatcher,
@ -344,7 +342,7 @@ internal static partial class MarkdownParser
        RoleMentionNodeMatcher,

        // Links
-        TitledLinkNodeMatcher,
+        MaskedLinkNodeMatcher,
        AutoLinkNodeMatcher,
        HiddenLinkNodeMatcher,

@ -358,7 +356,7 @@ internal static partial class MarkdownParser
    );

    // Minimal set of matchers for non-multimedia formats (e.g. plain text)
-    private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+    private static readonly IMatcher<MarkdownNode> MinimalNodeMatcher = new AggregateMatcher<MarkdownNode>(
        // Mentions
        EveryoneMentionNodeMatcher,
        HereMentionNodeMatcher,
@ -383,13 +381,13 @@ internal static partial class MarkdownParser
 internal static partial class MarkdownParser
 {
    private static IReadOnlyList<MarkdownNode> Parse(StringSegment segment) =>
-        Parse(segment, AggregateNodeMatcher);
+        Parse(segment, NodeMatcher);

    public static IReadOnlyList<MarkdownNode> Parse(string markdown) =>
        Parse(new StringSegment(markdown));

    private static IReadOnlyList<MarkdownNode> ParseMinimal(StringSegment segment) =>
-        Parse(segment, MinimalAggregateNodeMatcher);
+        Parse(segment, MinimalNodeMatcher);

    public static IReadOnlyList<MarkdownNode> ParseMinimal(string markdown) =>
        ParseMinimal(new StringSegment(markdown));