|
|
@ -5,6 +5,7 @@ using System.Linq;
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
using DiscordChatExporter.Core.Discord;
|
|
|
|
using DiscordChatExporter.Core.Discord;
|
|
|
|
using DiscordChatExporter.Core.Utils;
|
|
|
|
using DiscordChatExporter.Core.Utils;
|
|
|
|
|
|
|
|
using DiscordChatExporter.Core.Utils.Extensions;
|
|
|
|
|
|
|
|
|
|
|
|
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
|
|
|
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
|
|
|
|
|
|
|
|
|
|
@ -16,169 +17,206 @@ internal static partial class MarkdownParser
|
|
|
|
{
|
|
|
|
{
|
|
|
|
private const RegexOptions DefaultRegexOptions =
|
|
|
|
private const RegexOptions DefaultRegexOptions =
|
|
|
|
RegexOptions.Compiled |
|
|
|
|
RegexOptions.Compiled |
|
|
|
|
|
|
|
|
RegexOptions.IgnorePatternWhitespace |
|
|
|
|
RegexOptions.CultureInvariant |
|
|
|
|
RegexOptions.CultureInvariant |
|
|
|
|
RegexOptions.Multiline;
|
|
|
|
RegexOptions.Multiline;
|
|
|
|
|
|
|
|
|
|
|
|
/* Formatting */
|
|
|
|
/* Formatting */
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest double asterisk not followed by an asterisk
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest double asterisk not followed by an asterisk
|
|
|
|
|
|
|
|
new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
|
|
|
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
|
|
|
// Opening asterisk must not be followed by whitespace
|
|
|
|
// Opening asterisk must not be followed by whitespace
|
|
|
|
// Closing asterisk must not be preceded by whitespace
|
|
|
|
// Closing asterisk must not be preceded by whitespace
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
|
|
|
|
|
|
|
new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any character except underscore until an underscore
|
|
|
|
// Capture any character except underscore until an underscore
|
|
|
|
// Closing underscore must not be followed by a word character
|
|
|
|
// Closing underscore must not be followed by a word character
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest double underscore not followed by an underscore
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest double underscore not followed by an underscore
|
|
|
|
|
|
|
|
new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest triple underscore not followed by an underscore
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
|
|
|
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest triple underscore not followed by an underscore
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Italic,
|
|
|
|
new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
Parse(s.Relocate(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
|
|
|
(s, m) => new FormattingNode(
|
|
|
|
|
|
|
|
FormattingKind.Italic,
|
|
|
|
|
|
|
|
Parse(s.Relocate(m.Groups[1]), UnderlineFormattingNodeMatcher)
|
|
|
|
|
|
|
|
)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest double tilde
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
|
|
|
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest double tilde
|
|
|
|
|
|
|
|
new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any character until the earliest double pipe
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
// Capture any character until the earliest double pipe
|
|
|
|
|
|
|
|
new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any character until the end of the line
|
|
|
|
// Capture any character until the end of the line
|
|
|
|
// Opening 'greater than' character must be followed by whitespace
|
|
|
|
// Opening 'greater than' character must be followed by whitespace
|
|
|
|
// Text content is optional
|
|
|
|
// Text content is optional
|
|
|
|
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"^>\s(.*\n?)", DefaultRegexOptions),
|
|
|
|
new Regex("^>\\s(.*\n?)", DefaultRegexOptions),
|
|
|
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Repeatedly capture any character until the end of the line
|
|
|
|
|
|
|
|
// This one is tricky as it ends up producing multiple separate captures which need to be joined
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
|
|
|
|
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("(?:^>\\s(.*\n?)){2,}", DefaultRegexOptions),
|
|
|
|
// Repeatedly capture any character until the end of the line
|
|
|
|
(_, m) =>
|
|
|
|
// This one is tricky as it ends up producing multiple separate captures which need to be joined
|
|
|
|
{
|
|
|
|
new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions),
|
|
|
|
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
|
|
|
|
(_, m) => new FormattingNode(
|
|
|
|
return new FormattingNode(FormattingKind.Quote, Parse(content));
|
|
|
|
FormattingKind.Quote,
|
|
|
|
}
|
|
|
|
Parse(
|
|
|
|
|
|
|
|
// Combine all captures into a single string
|
|
|
|
|
|
|
|
string.Concat(m.Groups[1].Captures.Select(c => c.Value))
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any character until the end of the input
|
|
|
|
// Capture any character until the end of the input
|
|
|
|
// Opening 'greater than' characters must be followed by whitespace
|
|
|
|
// Opening 'greater than' characters must be followed by whitespace
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
/* Code blocks */
|
|
|
|
/* Code blocks */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any character except backtick until a backtick
|
|
|
|
// Capture any character except backtick until a backtick
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
// There can be either one or two backticks, but equal number on both sides
|
|
|
|
// There can be either one or two backticks, but equal number on both sides
|
|
|
|
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
|
|
|
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture language identifier and then any character until the earliest triple backtick
|
|
|
|
// Capture language identifier and then any character until the earliest triple backtick
|
|
|
|
// Language identifier is one word immediately after opening backticks, followed immediately by newline
|
|
|
|
// Language identifier is one word immediately after opening backticks, followed immediately by newline
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
// Blank lines at the beginning and end of content are trimmed
|
|
|
|
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
|
|
|
|
|
|
|
|
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
/* Mentions */
|
|
|
|
/* Mentions */
|
|
|
|
|
|
|
|
|
|
|
|
// Capture @everyone
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
"@everyone",
|
|
|
|
"@everyone",
|
|
|
|
_ => new MentionNode(null, MentionKind.Everyone)
|
|
|
|
_ => new MentionNode(null, MentionKind.Everyone)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture @here
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
"@here",
|
|
|
|
"@here",
|
|
|
|
_ => new MentionNode(null, MentionKind.Here)
|
|
|
|
_ => new MentionNode(null, MentionKind.Here)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture <@123456> or <@!123456>
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<@!?(\\d+)>", DefaultRegexOptions),
|
|
|
|
// Capture <@123456> or <@!123456>
|
|
|
|
|
|
|
|
new Regex(@"<@!?(\d+)>", DefaultRegexOptions),
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.User)
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.User)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture <#123456>
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<#!?(\\d+)>", DefaultRegexOptions),
|
|
|
|
// Capture <#123456>
|
|
|
|
|
|
|
|
new Regex(@"<\#!?(\d+)>", DefaultRegexOptions),
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.Channel)
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.Channel)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture <@&123456>
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<@&(\\d+)>", DefaultRegexOptions),
|
|
|
|
// Capture <@&123456>
|
|
|
|
|
|
|
|
new Regex(@"<@&(\d+)>", DefaultRegexOptions),
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.Role)
|
|
|
|
(_, m) => new MentionNode(Snowflake.TryParse(m.Groups[1].Value), MentionKind.Role)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
/* Emoji */
|
|
|
|
/* Emoji */
|
|
|
|
|
|
|
|
|
|
|
|
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
|
|
|
|
|
|
|
// ... or "miscellaneous symbol" character
|
|
|
|
|
|
|
|
// ... or surrogate pair
|
|
|
|
|
|
|
|
// ... or digit followed by enclosing mark
|
|
|
|
|
|
|
|
// (this does not match all emoji in Discord but it's reasonably accurate enough)
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u2604\\u260E\\u2611\\u2614-\\u2615\\u2618\\u261D\\u2620\\u2622-\\u2623\\u2626\\u262A\\u262E-\\u262F\\u2638-\\u263A\\u2640\\u2642\\u2648-\\u2653\\u265F-\\u2660\\u2663\\u2665-\\u2666\\u2668\\u267B\\u267E-\\u267F\\u2692-\\u2697\\u2699\\u269B-\\u269C\\u26A0-\\u26A1\\u26A7\\u26AA-\\u26AB\\u26B0-\\u26B1\\u26BD-\\u26BE\\u26C4-\\u26C5\\u26C8\\u26CE-\\u26CF\\u26D1\\u26D3-\\u26D4\\u26E9-\\u26EA\\u26F0-\\u26F5\\u26F7-\\u26FA\\u26FD]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
|
|
|
|
new Regex(
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
(
|
|
|
|
|
|
|
|
# Country flag emoji (two regional indicator surrogate pairs)
|
|
|
|
|
|
|
|
(?:\uD83C[\uDDE6-\uDDFF]){2}|
|
|
|
|
|
|
|
|
# Digit emoji (digit followed by enclosing mark)
|
|
|
|
|
|
|
|
\d\p{Me}|
|
|
|
|
|
|
|
|
# Surrogate pair
|
|
|
|
|
|
|
|
\p{Cs}{2}|
|
|
|
|
|
|
|
|
# Miscellaneous characters
|
|
|
|
|
|
|
|
[
|
|
|
|
|
|
|
|
\u2600-\u2604
|
|
|
|
|
|
|
|
\u260E\u2611
|
|
|
|
|
|
|
|
\u2614-\u2615
|
|
|
|
|
|
|
|
\u2618\u261D\u2620
|
|
|
|
|
|
|
|
\u2622-\u2623
|
|
|
|
|
|
|
|
\u2626\u262A
|
|
|
|
|
|
|
|
\u262E-\u262F
|
|
|
|
|
|
|
|
\u2638-\u263A
|
|
|
|
|
|
|
|
\u2640\u2642
|
|
|
|
|
|
|
|
\u2648-\u2653
|
|
|
|
|
|
|
|
\u265F-\u2660
|
|
|
|
|
|
|
|
\u2663
|
|
|
|
|
|
|
|
\u2665-\u2666
|
|
|
|
|
|
|
|
\u2668\u267B
|
|
|
|
|
|
|
|
\u267E-\u267F
|
|
|
|
|
|
|
|
\u2692-\u2697
|
|
|
|
|
|
|
|
\u2699
|
|
|
|
|
|
|
|
\u269B-\u269C
|
|
|
|
|
|
|
|
\u26A0-\u26A1
|
|
|
|
|
|
|
|
\u26A7
|
|
|
|
|
|
|
|
\u26AA-\u26AB
|
|
|
|
|
|
|
|
\u26B0-\u26B1
|
|
|
|
|
|
|
|
\u26BD-\u26BE
|
|
|
|
|
|
|
|
\u26C4-\u26C5
|
|
|
|
|
|
|
|
\u26C8
|
|
|
|
|
|
|
|
\u26CE-\u26CF
|
|
|
|
|
|
|
|
\u26D1
|
|
|
|
|
|
|
|
\u26D3-\u26D4
|
|
|
|
|
|
|
|
\u26E9-\u26EA
|
|
|
|
|
|
|
|
\u26F0-\u26F5
|
|
|
|
|
|
|
|
\u26F7-\u26FA
|
|
|
|
|
|
|
|
\u26FD
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
""", DefaultRegexOptions),
|
|
|
|
(_, m) => new EmojiNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new EmojiNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture :thinking: (but only for known emoji codes)
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(":([\\w_]+):", DefaultRegexOptions),
|
|
|
|
// Capture :thinking: for known emoji codes
|
|
|
|
(_, m) =>
|
|
|
|
new Regex(@":([\w_]+):", DefaultRegexOptions),
|
|
|
|
{
|
|
|
|
(_, m) => EmojiIndex.TryGetName(m.Groups[1].Value)?.Pipe(n => new EmojiNode(n))
|
|
|
|
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
|
|
|
|
|
|
|
|
return !string.IsNullOrWhiteSpace(name)
|
|
|
|
|
|
|
|
? new EmojiNode(name)
|
|
|
|
|
|
|
|
: null;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Capture <:lul:123456> or <a:lul:123456>
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
|
|
|
|
// Capture <:lul:123456> or <a:lul:123456>
|
|
|
|
|
|
|
|
new Regex(@"<(a)?:(.+?):(\d+?)>", DefaultRegexOptions),
|
|
|
|
(_, m) => new EmojiNode(
|
|
|
|
(_, m) => new EmojiNode(
|
|
|
|
Snowflake.TryParse(m.Groups[3].Value),
|
|
|
|
Snowflake.TryParse(m.Groups[3].Value),
|
|
|
|
m.Groups[2].Value,
|
|
|
|
m.Groups[2].Value,
|
|
|
@ -188,60 +226,60 @@ internal static partial class MarkdownParser
|
|
|
|
|
|
|
|
|
|
|
|
/* Links */
|
|
|
|
/* Links */
|
|
|
|
|
|
|
|
|
|
|
|
// Capture [title](link)
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
|
|
|
|
// Capture [title](link)
|
|
|
|
|
|
|
|
new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
|
|
|
|
(s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
(s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any non-whitespace character after http:// or https://
|
|
|
|
// Capture any non-whitespace character after http:// or https://
|
|
|
|
// until the last punctuation character or whitespace
|
|
|
|
// until the last punctuation character or whitespace
|
|
|
|
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"(https?://\S*[^\.,:;""'\s])", DefaultRegexOptions),
|
|
|
|
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
|
|
|
|
|
|
|
|
(_, m) => new LinkNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new LinkNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
// Same as auto link but also surrounded by angular brackets
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
|
|
|
|
// Same as auto link but also surrounded by angular brackets
|
|
|
|
|
|
|
|
new Regex(@"<(https?://\S*[^\.,:;""'\s])>", DefaultRegexOptions),
|
|
|
|
(_, m) => new LinkNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new LinkNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
/* Text */
|
|
|
|
/* Text */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
// Capture the shrug kaomoji
|
|
|
|
// Capture the shrug kaomoji
|
|
|
|
// This escapes it from matching for formatting
|
|
|
|
// This escapes it from matching for formatting
|
|
|
|
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
|
|
|
|
|
|
|
@"¯\_(ツ)_/¯",
|
|
|
|
@"¯\_(ツ)_/¯",
|
|
|
|
s => new TextNode(s.ToString())
|
|
|
|
s => new TextNode(s.ToString())
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture some specific emoji that don't get rendered
|
|
|
|
// Capture some specific emoji that don't get rendered
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"([\u26A7\u2640\u2642\u2695\u267E\u00A9\u00AE\u2122])", DefaultRegexOptions),
|
|
|
|
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
|
|
|
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
|
|
|
|
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
// This escapes it from matching for emoji
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"\\(\p{So}|\p{Cs}{2})", DefaultRegexOptions),
|
|
|
|
new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
|
|
|
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
|
|
|
|
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
|
|
|
|
// This escapes it from matching for formatting or other tokens
|
|
|
|
// This escapes it from matching for formatting or other tokens
|
|
|
|
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex(@"\\([^a-zA-Z0-9\s])", DefaultRegexOptions),
|
|
|
|
new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
|
|
|
|
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
(_, m) => new TextNode(m.Groups[1].Value)
|
|
|
|
);
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
/* Misc */
|
|
|
|
/* Misc */
|
|
|
|
|
|
|
|
|
|
|
|
// Capture <t:12345678> or <t:12345678:R>
|
|
|
|
|
|
|
|
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
|
|
|
|
new Regex("<t:(-?\\d+)(?::\\w)?>", DefaultRegexOptions),
|
|
|
|
// Capture <t:12345678> or <t:12345678:R>
|
|
|
|
|
|
|
|
new Regex(@"<t:(-?\d+)(?::\w)?>", DefaultRegexOptions),
|
|
|
|
(_, m) =>
|
|
|
|
(_, m) =>
|
|
|
|
{
|
|
|
|
{
|
|
|
|
// TODO: support formatting parameters
|
|
|
|
// TODO: support formatting parameters
|
|
|
|