|
|
|
@ -24,41 +24,41 @@ internal static partial class MarkdownParser
|
|
|
|
|
/* Formatting */
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest double asterisk not followed by an asterisk
|
|
|
|
|
// Capture any character until the earliest double asterisk not followed by an asterisk.
|
|
|
|
|
new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
|
|
|
|
// Opening asterisk must not be followed by whitespace
|
|
|
|
|
// Closing asterisk must not be preceded by whitespace
|
|
|
|
|
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk.
|
|
|
|
|
// Opening asterisk must not be followed by whitespace.
|
|
|
|
|
// Closing asterisk must not be preceded by whitespace.
|
|
|
|
|
new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
|
|
|
|
// Capture any character until the earliest triple asterisk not followed by an asterisk.
|
|
|
|
|
new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character except underscore until an underscore
|
|
|
|
|
// Closing underscore must not be followed by a word character
|
|
|
|
|
// Capture any character except underscore until an underscore.
|
|
|
|
|
// Closing underscore must not be followed by a word character.
|
|
|
|
|
new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest double underscore not followed by an underscore
|
|
|
|
|
// Capture any character until the earliest double underscore not followed by an underscore.
|
|
|
|
|
new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest triple underscore not followed by an underscore
|
|
|
|
|
// Capture any character until the earliest triple underscore not followed by an underscore.
|
|
|
|
|
new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(
|
|
|
|
|
FormattingKind.Italic,
|
|
|
|
@ -66,31 +66,29 @@ internal static partial class MarkdownParser
|
|
|
|
|
)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest double tilde
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest double tilde.
|
|
|
|
|
new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the earliest double pipe
|
|
|
|
|
// Capture any character until the earliest double pipe.
|
|
|
|
|
new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the end of the line
|
|
|
|
|
// Opening 'greater than' character must be followed by whitespace
|
|
|
|
|
// Text content is optional
|
|
|
|
|
// Capture any character until the end of the line.
|
|
|
|
|
// Opening 'greater than' character must be followed by whitespace.
|
|
|
|
|
// Text content is optional.
|
|
|
|
|
new Regex(@"^>\s(.*\n?)", DefaultRegexOptions),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
|
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Repeatedly capture any character until the end of the line
|
|
|
|
|
// This one is tricky as it ends up producing multiple separate captures which need to be joined
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Repeatedly capture any character until the end of the line.
|
|
|
|
|
// This one is tricky as it ends up producing multiple separate captures which need to be joined.
|
|
|
|
|
new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions),
|
|
|
|
|
(_, m) => new FormattingNode(
|
|
|
|
|
FormattingKind.Quote,
|
|
|
|
@ -102,8 +100,8 @@ internal static partial class MarkdownParser
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character until the end of the input
|
|
|
|
|
// Opening 'greater than' characters must be followed by whitespace
|
|
|
|
|
// Capture any character until the end of the input.
|
|
|
|
|
// Opening 'greater than' characters must be followed by whitespace.
|
|
|
|
|
new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
@ -111,17 +109,17 @@ internal static partial class MarkdownParser
|
|
|
|
|
/* Code blocks */
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any character except backtick until a backtick
|
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
|
// There can be either one or two backticks, but equal number on both sides
|
|
|
|
|
// Capture any character except backtick until a backtick.
|
|
|
|
|
// Blank lines at the beginning and at the end of content are trimmed.
|
|
|
|
|
// There can be either one or two backticks, but equal number on both sides.
|
|
|
|
|
new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture language identifier and then any character until the earliest triple backtick
|
|
|
|
|
// Language identifier is one word immediately after opening backticks, followed immediately by newline
|
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
|
// Capture language identifier and then any character until the earliest triple backtick.
|
|
|
|
|
// Language identifier is one word immediately after opening backticks, followed immediately by newline.
|
|
|
|
|
// Blank lines at the beginning and at the end of content are trimmed.
|
|
|
|
|
new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
|
);
|
|
|
|
@ -224,12 +222,6 @@ internal static partial class MarkdownParser
|
|
|
|
|
|
|
|
|
|
/* Links */
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture [title](link)
|
|
|
|
|
new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
|
|
|
|
|
(s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any non-whitespace character after http:// or https://
|
|
|
|
|
// until the last punctuation character or whitespace
|
|
|
|
@ -243,32 +235,38 @@ internal static partial class MarkdownParser
|
|
|
|
|
(_, m) => new LinkNode(m.Groups[1].Value)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MaskedLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture [title](link)
|
|
|
|
|
new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
|
|
|
|
|
(s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
/* Text */
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
|
// Capture the shrug kaomoji
|
|
|
|
|
// This escapes it from matching for formatting
|
|
|
|
|
// Capture the shrug kaomoji.
|
|
|
|
|
// This escapes it from matching for formatting.
|
|
|
|
|
@"¯\_(ツ)_/¯",
|
|
|
|
|
s => new TextNode(s.ToString())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture some specific emoji that don't get rendered
|
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
|
// Capture some specific emoji that don't get rendered.
|
|
|
|
|
// This escapes them from matching for emoji.
|
|
|
|
|
new Regex(@"([\u26A7\u2640\u2642\u2695\u267E\u00A9\u00AE\u2122])", DefaultRegexOptions),
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
|
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
|
// Capture any "symbol/other" character or surrogate pair preceded by a backslash.
|
|
|
|
|
// This escapes them from matching for emoji.
|
|
|
|
|
new Regex(@"\\(\p{So}|\p{Cs}{2})", DefaultRegexOptions),
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
|
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
|
|
|
|
|
// This escapes it from matching for formatting or other tokens
|
|
|
|
|
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash.
|
|
|
|
|
// This escapes them from matching for formatting or other tokens.
|
|
|
|
|
new Regex(@"\\([^a-zA-Z0-9\s])", DefaultRegexOptions),
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
|
);
|
|
|
|
@ -310,9 +308,9 @@ internal static partial class MarkdownParser
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Combine all matchers into one
|
|
|
|
|
// Matchers that have similar patterns are ordered from most specific to least specific
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
|
|
|
|
// Combine all matchers into one.
|
|
|
|
|
// Matchers that have similar patterns are ordered from most specific to least specific.
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> NodeMatcher = new AggregateMatcher<MarkdownNode>(
|
|
|
|
|
// Escaped text
|
|
|
|
|
ShrugTextNodeMatcher,
|
|
|
|
|
IgnoredEmojiTextNodeMatcher,
|
|
|
|
@ -344,7 +342,7 @@ internal static partial class MarkdownParser
|
|
|
|
|
RoleMentionNodeMatcher,
|
|
|
|
|
|
|
|
|
|
// Links
|
|
|
|
|
TitledLinkNodeMatcher,
|
|
|
|
|
MaskedLinkNodeMatcher,
|
|
|
|
|
AutoLinkNodeMatcher,
|
|
|
|
|
HiddenLinkNodeMatcher,
|
|
|
|
|
|
|
|
|
@ -358,7 +356,7 @@ internal static partial class MarkdownParser
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Minimal set of matchers for non-multimedia formats (e.g. plain text)
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MinimalNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
|
|
|
|
// Mentions
|
|
|
|
|
EveryoneMentionNodeMatcher,
|
|
|
|
|
HereMentionNodeMatcher,
|
|
|
|
@ -383,13 +381,13 @@ internal static partial class MarkdownParser
|
|
|
|
|
internal static partial class MarkdownParser
|
|
|
|
|
{
|
|
|
|
|
private static IReadOnlyList<MarkdownNode> Parse(StringSegment segment) =>
|
|
|
|
|
Parse(segment, AggregateNodeMatcher);
|
|
|
|
|
Parse(segment, NodeMatcher);
|
|
|
|
|
|
|
|
|
|
public static IReadOnlyList<MarkdownNode> Parse(string markdown) =>
|
|
|
|
|
Parse(new StringSegment(markdown));
|
|
|
|
|
|
|
|
|
|
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringSegment segment) =>
|
|
|
|
|
Parse(segment, MinimalAggregateNodeMatcher);
|
|
|
|
|
Parse(segment, MinimalNodeMatcher);
|
|
|
|
|
|
|
|
|
|
public static IReadOnlyList<MarkdownNode> ParseMinimal(string markdown) =>
|
|
|
|
|
ParseMinimal(new StringSegment(markdown));
|
|
|
|
|