Add support for lists in markdown

pull/1037/head
Tyrrrz 1 year ago
parent 469a731892
commit 6bbde4ccdc

@ -25,59 +25,59 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
_isJumbo = isJumbo; _isJumbo = isJumbo;
} }
protected override async ValueTask<MarkdownNode> VisitTextAsync( protected override ValueTask VisitTextAsync(
TextNode text, TextNode text,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
_buffer.Append(HtmlEncode(text.Text)); _buffer.Append(HtmlEncode(text.Text));
return await base.VisitTextAsync(text, cancellationToken); return default;
} }
protected override async ValueTask<MarkdownNode> VisitFormattingAsync( protected override async ValueTask VisitFormattingAsync(
FormattingNode formatting, FormattingNode formatting,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
var (openingTag, closingTag) = formatting.Kind switch var (openingTag, closingTag) = formatting.Kind switch
{ {
FormattingKind.Bold => ( FormattingKind.Bold => (
// language=HTML // lang=html
"<strong>", "<strong>",
// language=HTML // lang=html
"</strong>" "</strong>"
), ),
FormattingKind.Italic => ( FormattingKind.Italic => (
// language=HTML // lang=html
"<em>", "<em>",
// language=HTML // lang=html
"</em>" "</em>"
), ),
FormattingKind.Underline => ( FormattingKind.Underline => (
// language=HTML // lang=html
"<u>", "<u>",
// language=HTML // lang=html
"</u>" "</u>"
), ),
FormattingKind.Strikethrough => ( FormattingKind.Strikethrough => (
// language=HTML // lang=html
"<s>", "<s>",
// language=HTML // lang=html
"</s>" "</s>"
), ),
FormattingKind.Spoiler => ( FormattingKind.Spoiler => (
// language=HTML // lang=html
"""<span class="chatlog__markdown-spoiler chatlog__markdown-spoiler--hidden" onclick="showSpoiler(event, this)">""", """<span class="chatlog__markdown-spoiler chatlog__markdown-spoiler--hidden" onclick="showSpoiler(event, this)">""",
// language=HTML // lang=html
"""</span>""" """</span>"""
), ),
FormattingKind.Quote => ( FormattingKind.Quote => (
// language=HTML // lang=html
"""<div class="chatlog__markdown-quote"><div class="chatlog__markdown-quote-border"></div><div class="chatlog__markdown-quote-content">""", """<div class="chatlog__markdown-quote"><div class="chatlog__markdown-quote-border"></div><div class="chatlog__markdown-quote-content">""",
// language=HTML // lang=html
"""</div></div>""" """</div></div>"""
), ),
@ -85,13 +85,11 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
}; };
_buffer.Append(openingTag); _buffer.Append(openingTag);
var result = await base.VisitFormattingAsync(formatting, cancellationToken); await VisitAsync(formatting.Children, cancellationToken);
_buffer.Append(closingTag); _buffer.Append(closingTag);
return result;
} }
protected override async ValueTask<MarkdownNode> VisitHeaderAsync( protected override async ValueTask VisitHeaderAsync(
HeaderNode header, HeaderNode header,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -100,31 +98,63 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
$"<h{header.Level}>" $"<h{header.Level}>"
); );
var result = await base.VisitHeaderAsync(header, cancellationToken); await VisitAsync(header.Children, cancellationToken);
_buffer.Append( _buffer.Append(
// lang=html // lang=html
$"</h{header.Level}>" $"</h{header.Level}>"
); );
}
protected override async ValueTask VisitListAsync(
ListNode list,
CancellationToken cancellationToken = default)
{
_buffer.Append(
// lang=html
"<ul>"
);
await VisitAsync(list.Items, cancellationToken);
_buffer.Append(
// lang=html
"</ul>"
);
}
protected override async ValueTask VisitListItemAsync(
ListItemNode listItem,
CancellationToken cancellationToken = default)
{
_buffer.Append(
// lang=html
"<li>"
);
return result; await VisitAsync(listItem.Children, cancellationToken);
_buffer.Append(
// lang=html
"</li>"
);
} }
protected override async ValueTask<MarkdownNode> VisitInlineCodeBlockAsync( protected override ValueTask VisitInlineCodeBlockAsync(
InlineCodeBlockNode inlineCodeBlock, InlineCodeBlockNode inlineCodeBlock,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<code class="chatlog__markdown-pre chatlog__markdown-pre--inline">{HtmlEncode(inlineCodeBlock.Code)}</code> <code class="chatlog__markdown-pre chatlog__markdown-pre--inline">{HtmlEncode(inlineCodeBlock.Code)}</code>
""" """
); );
return await base.VisitInlineCodeBlockAsync(inlineCodeBlock, cancellationToken); return default;
} }
protected override async ValueTask<MarkdownNode> VisitMultiLineCodeBlockAsync( protected override ValueTask VisitMultiLineCodeBlockAsync(
MultiLineCodeBlockNode multiLineCodeBlock, MultiLineCodeBlockNode multiLineCodeBlock,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -133,16 +163,16 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
: "nohighlight"; : "nohighlight";
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<code class="chatlog__markdown-pre chatlog__markdown-pre--multiline {highlightClass}">{HtmlEncode(multiLineCodeBlock.Code)}</code> <code class="chatlog__markdown-pre chatlog__markdown-pre--multiline {highlightClass}">{HtmlEncode(multiLineCodeBlock.Code)}</code>
""" """
); );
return await base.VisitMultiLineCodeBlockAsync(multiLineCodeBlock, cancellationToken); return default;
} }
protected override async ValueTask<MarkdownNode> VisitLinkAsync( protected override async ValueTask VisitLinkAsync(
LinkNode link, LinkNode link,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -154,21 +184,21 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
_buffer.Append( _buffer.Append(
!string.IsNullOrWhiteSpace(linkedMessageId) !string.IsNullOrWhiteSpace(linkedMessageId)
// language=HTML // lang=html
? $"""<a href="{HtmlEncode(link.Url)}" onclick="scrollToMessage(event, '{linkedMessageId}')">""" ? $"""<a href="{HtmlEncode(link.Url)}" onclick="scrollToMessage(event, '{linkedMessageId}')">"""
// language=HTML // lang=html
: $"""<a href="{HtmlEncode(link.Url)}">""" : $"""<a href="{HtmlEncode(link.Url)}">"""
); );
var result = await base.VisitLinkAsync(link, cancellationToken); await VisitAsync(link.Children, cancellationToken);
// language=HTML _buffer.Append(
_buffer.Append("</a>"); // lang=html
"</a>"
return result; );
} }
protected override async ValueTask<MarkdownNode> VisitEmojiAsync( protected override async ValueTask VisitEmojiAsync(
EmojiNode emoji, EmojiNode emoji,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -176,7 +206,7 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
var jumboClass = _isJumbo ? "chatlog__emoji--large" : ""; var jumboClass = _isJumbo ? "chatlog__emoji--large" : "";
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<img <img
loading="lazy" loading="lazy"
@ -186,18 +216,15 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
src="{await _context.ResolveAssetUrlAsync(emojiImageUrl, cancellationToken)}"> src="{await _context.ResolveAssetUrlAsync(emojiImageUrl, cancellationToken)}">
""" """
); );
return await base.VisitEmojiAsync(emoji, cancellationToken);
} }
protected override async ValueTask<MarkdownNode> VisitMentionAsync( protected override async ValueTask VisitMentionAsync(MentionNode mention,
MentionNode mention,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
if (mention.Kind == MentionKind.Everyone) if (mention.Kind == MentionKind.Everyone)
{ {
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
""" """
<span class="chatlog__markdown-mention">@everyone</span> <span class="chatlog__markdown-mention">@everyone</span>
""" """
@ -206,7 +233,7 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
else if (mention.Kind == MentionKind.Here) else if (mention.Kind == MentionKind.Here)
{ {
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
""" """
<span class="chatlog__markdown-mention">@here</span> <span class="chatlog__markdown-mention">@here</span>
""" """
@ -225,7 +252,7 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
var nick = member?.Nick ?? member?.User.Name ?? "Unknown"; var nick = member?.Nick ?? member?.User.Name ?? "Unknown";
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<span class="chatlog__markdown-mention" title="{HtmlEncode(fullName)}">@{HtmlEncode(nick)}</span> <span class="chatlog__markdown-mention" title="{HtmlEncode(fullName)}">@{HtmlEncode(nick)}</span>
""" """
@ -238,7 +265,7 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
var name = channel?.Name ?? "deleted-channel"; var name = channel?.Name ?? "deleted-channel";
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<span class="chatlog__markdown-mention">{symbol}{HtmlEncode(name)}</span> <span class="chatlog__markdown-mention">{symbol}{HtmlEncode(name)}</span>
""" """
@ -254,20 +281,18 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
? $""" ? $"""
color: rgb({color.Value.R}, {color.Value.G}, {color.Value.B}); background-color: rgba({color.Value.R}, {color.Value.G}, {color.Value.B}, 0.1); color: rgb({color.Value.R}, {color.Value.G}, {color.Value.B}); background-color: rgba({color.Value.R}, {color.Value.G}, {color.Value.B}, 0.1);
""" """
: ""; : null;
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<span class="chatlog__markdown-mention" style="{style}">@{HtmlEncode(name)}</span> <span class="chatlog__markdown-mention" style="{style}">@{HtmlEncode(name)}</span>
""" """
); );
} }
return await base.VisitMentionAsync(mention, cancellationToken);
} }
protected override async ValueTask<MarkdownNode> VisitTimestampAsync( protected override ValueTask VisitTimestampAsync(
TimestampNode timestamp, TimestampNode timestamp,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -280,13 +305,13 @@ internal partial class HtmlMarkdownVisitor : MarkdownVisitor
var formattedLong = timestamp.Instant?.ToLocalString("dddd, MMMM d, yyyy h:mm tt") ?? ""; var formattedLong = timestamp.Instant?.ToLocalString("dddd, MMMM d, yyyy h:mm tt") ?? "";
_buffer.Append( _buffer.Append(
// language=HTML // lang=html
$""" $"""
<span class="chatlog__markdown-timestamp" title="{HtmlEncode(formattedLong)}">{HtmlEncode(formatted)}</span> <span class="chatlog__markdown-timestamp" title="{HtmlEncode(formattedLong)}">{HtmlEncode(formatted)}</span>
""" """
); );
return await base.VisitTimestampAsync(timestamp, cancellationToken); return default;
} }
} }

@ -18,15 +18,15 @@ internal partial class PlainTextMarkdownVisitor : MarkdownVisitor
_buffer = buffer; _buffer = buffer;
} }
protected override async ValueTask<MarkdownNode> VisitTextAsync( protected override ValueTask VisitTextAsync(
TextNode text, TextNode text,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
_buffer.Append(text.Text); _buffer.Append(text.Text);
return await base.VisitTextAsync(text, cancellationToken); return default;
} }
protected override async ValueTask<MarkdownNode> VisitEmojiAsync( protected override ValueTask VisitEmojiAsync(
EmojiNode emoji, EmojiNode emoji,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -36,11 +36,10 @@ internal partial class PlainTextMarkdownVisitor : MarkdownVisitor
: emoji.Name : emoji.Name
); );
return await base.VisitEmojiAsync(emoji, cancellationToken); return default;
} }
protected override async ValueTask<MarkdownNode> VisitMentionAsync( protected override async ValueTask VisitMentionAsync(MentionNode mention,
MentionNode mention,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
if (mention.Kind == MentionKind.Everyone) if (mention.Kind == MentionKind.Everyone)
@ -82,11 +81,9 @@ internal partial class PlainTextMarkdownVisitor : MarkdownVisitor
_buffer.Append($"@{name}"); _buffer.Append($"@{name}");
} }
return await base.VisitMentionAsync(mention, cancellationToken);
} }
protected override async ValueTask<MarkdownNode> VisitTimestampAsync( protected override ValueTask VisitTimestampAsync(
TimestampNode timestamp, TimestampNode timestamp,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default)
{ {
@ -98,7 +95,7 @@ internal partial class PlainTextMarkdownVisitor : MarkdownVisitor
: "Invalid date" : "Invalid date"
); );
return await base.VisitTimestampAsync(timestamp, cancellationToken); return default;
} }
} }

@ -759,27 +759,21 @@
} }
.chatlog__markdown h1 { .chatlog__markdown h1 {
margin-block: 0; margin: 1rem 0 0.5rem;
margin-top: 1rem;
margin-bottom: 0.5rem;
color: @Themed("#f2f3f5", "#060607"); color: @Themed("#f2f3f5", "#060607");
font-size: 1.5rem; font-size: 1.5rem;
line-height: 1; line-height: 1;
} }
.chatlog__markdown h2 { .chatlog__markdown h2 {
margin-block: 0; margin: 1rem 0 0.5rem;
margin-top: 1rem;
margin-bottom: 0.5rem;
color: @Themed("#f2f3f5", "#060607"); color: @Themed("#f2f3f5", "#060607");
font-size: 1.25rem; font-size: 1.25rem;
line-height: 1; line-height: 1;
} }
.chatlog__markdown h3 { .chatlog__markdown h3 {
margin-block: 0; margin: 1rem 0 0.5rem;
margin-top: 1rem;
margin-bottom: 0.5rem;
color: @Themed("#f2f3f5", "#060607"); color: @Themed("#f2f3f5", "#060607");
font-size: 1rem; font-size: 1rem;
line-height: 1; line-height: 1;
@ -789,6 +783,11 @@
margin-top: 0.5rem; margin-top: 0.5rem;
} }
.chatlog__markdown ul, ol {
margin: 0 0 0 1rem;
padding: 0;
}
.chatlog__markdown-preserve { .chatlog__markdown-preserve {
white-space: pre-wrap; white-space: pre-wrap;
} }

@ -0,0 +1,5 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown;
internal record ListItemNode(IReadOnlyList<MarkdownNode> Children) : MarkdownNode, IContainerNode;

@ -0,0 +1,5 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown;
internal record ListNode(IReadOnlyList<ListItemNode> Items) : MarkdownNode;

@ -24,13 +24,13 @@ internal static partial class MarkdownParser
/* Formatting */ /* Formatting */
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest double asterisk not followed by an asterisk. // There must be exactly two closing asterisks.
new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk. // There must be exactly one closing asterisk.
// Opening asterisk must not be followed by whitespace. // Opening asterisk must not be followed by whitespace.
// Closing asterisk must not be preceded by whitespace. // Closing asterisk must not be preceded by whitespace.
new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
@ -38,27 +38,26 @@ internal static partial class MarkdownParser
); );
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest triple asterisk not followed by an asterisk. // There must be exactly three closing asterisks.
new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher)) (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
); );
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character except underscore until an underscore.
// Closing underscore must not be followed by a word character. // Closing underscore must not be followed by a word character.
new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"_(.+?)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest double underscore not followed by an underscore. // There must be exactly two closing underscores.
new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher = private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>( new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest triple underscore not followed by an underscore. // There must be exactly three closing underscores.
new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode( (s, m) => new FormattingNode(
FormattingKind.Italic, FormattingKind.Italic,
@ -67,68 +66,61 @@ internal static partial class MarkdownParser
); );
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest double tilde.
new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the earliest double pipe.
new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the end of the line. // Include the linebreak in the content so that the lines are preserved in quotes.
// Opening 'greater than' character must be followed by whitespace.
// Consume the newline character so that it's not included in the content.
new Regex(@"^>\s(.+\n?)", DefaultRegexOptions), new Regex(@"^>\s(.+\n?)", DefaultRegexOptions),
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
); );
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
// Repeatedly capture any character until the end of the line. // Include the linebreaks in the content, so that the lines are preserved in quotes.
// Consume the newline character so that it's not included in the content.
new Regex(@"(?:^>\s(.+\n?)){2,}", DefaultRegexOptions), new Regex(@"(?:^>\s(.+\n?)){2,}", DefaultRegexOptions),
(_, m) => new FormattingNode( (s, m) => new FormattingNode(
FormattingKind.Quote, FormattingKind.Quote,
Parse( m.Groups[1].Captures.SelectMany(c => Parse(s.Relocate(c))).ToArray()
// Combine all captures into a single string
string.Concat(m.Groups[1].Captures.Select(c => c.Value))
)
) )
); );
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the end of the input.
// Opening 'greater than' characters must be followed by whitespace.
new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1]))) (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
); );
/* Headers */
private static readonly IMatcher<MarkdownNode> HeaderNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> HeaderNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character until the end of the line. // Consume the linebreak so that it's not attached to following nodes.
// Opening 'hash' character(s) must be followed by whitespace. new Regex(@"^(\#{1,3})\s(.+)\n", DefaultRegexOptions),
// Consume the newline character so that it's not included in the content.
new Regex(@"^(\#{1,3})\s(.+\n?)", DefaultRegexOptions),
(s, m) => new HeaderNode(m.Groups[1].Length, Parse(s.Relocate(m.Groups[2]))) (s, m) => new HeaderNode(m.Groups[1].Length, Parse(s.Relocate(m.Groups[2])))
); );
private static readonly IMatcher<MarkdownNode> ListNodeMatcher = new RegexMatcher<MarkdownNode>(
// Can be preceded by whitespace, which specifies the list's nesting level.
// Following lines that start with (level+1) whitespace are considered part of the list item.
// Consume the linebreak so that it's not attached to following nodes.
new Regex(@"^(\s*)(?:[\-\*]\s(.+(?:\n\s\1.*)*)?\n?)+", DefaultRegexOptions),
(s, m) => new ListNode(
m.Groups[2].Captures.Select(c => new ListItemNode(Parse(s.Relocate(c)))).ToArray()
)
);
/* Code blocks */ /* Code blocks */
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any character except backtick until a backtick. // One or two backticks are allowed, but they must match on both sides.
// Blank lines at the beginning and at the end of content are trimmed.
// There can be either one or two backticks, but equal number on both sides.
new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n')) (_, m) => new InlineCodeBlockNode(m.Groups[2].Value)
); );
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture language identifier and then any character until the earliest triple backtick. // Language identifier is one word immediately after opening backticks, followed immediately by a linebreak.
// Language identifier is one word immediately after opening backticks, followed immediately by newline.
// Blank lines at the beginning and at the end of content are trimmed. // Blank lines at the beginning and at the end of content are trimmed.
new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline), new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n')) (_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
@ -215,7 +207,7 @@ internal static partial class MarkdownParser
); );
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture :thinking: for known emoji codes // Capture :thinking:
new Regex(@":([\w_]+):", DefaultRegexOptions), new Regex(@":([\w_]+):", DefaultRegexOptions),
(_, m) => EmojiIndex.TryGetName(m.Groups[1].Value)?.Pipe(n => new EmojiNode(n)) (_, m) => EmojiIndex.TryGetName(m.Groups[1].Value)?.Pipe(n => new EmojiNode(n))
); );
@ -233,8 +225,8 @@ internal static partial class MarkdownParser
/* Links */ /* Links */
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
// Capture any non-whitespace character after http:// or https:// // Any non-whitespace character after http:// or https://
// until the last punctuation character or whitespace // until the last punctuation character or whitespace.
new Regex(@"(https?://\S*[^\.,:;""'\s])", DefaultRegexOptions), new Regex(@"(https?://\S*[^\.,:;""'\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value) (_, m) => new LinkNode(m.Groups[1].Value)
); );
@ -318,8 +310,7 @@ internal static partial class MarkdownParser
} }
); );
// Combine all matchers into one. // Matchers that have similar patterns are ordered from most specific to least specific
// Matchers that have similar patterns are ordered from most specific to least specific.
private static readonly IMatcher<MarkdownNode> NodeMatcher = new AggregateMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> NodeMatcher = new AggregateMatcher<MarkdownNode>(
// Escaped text // Escaped text
ShrugTextNodeMatcher, ShrugTextNodeMatcher,
@ -339,9 +330,8 @@ internal static partial class MarkdownParser
MultiLineQuoteNodeMatcher, MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher, RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher, SingleLineQuoteNodeMatcher,
// Headers
HeaderNodeMatcher, HeaderNodeMatcher,
ListNodeMatcher,
// Code blocks // Code blocks
MultiLineCodeBlockNodeMatcher, MultiLineCodeBlockNodeMatcher,

@ -7,93 +7,127 @@ namespace DiscordChatExporter.Core.Markdown.Parsing;
internal abstract class MarkdownVisitor internal abstract class MarkdownVisitor
{ {
protected virtual ValueTask<MarkdownNode> VisitTextAsync( protected virtual ValueTask VisitTextAsync(
TextNode text, TextNode text,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) => default;
new(text);
protected virtual async ValueTask<MarkdownNode> VisitFormattingAsync( protected virtual async ValueTask VisitFormattingAsync(
FormattingNode formatting, FormattingNode formatting,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default) =>
{
await VisitAsync(formatting.Children, cancellationToken); await VisitAsync(formatting.Children, cancellationToken);
return formatting;
}
protected virtual async ValueTask<MarkdownNode> VisitHeaderAsync( protected virtual async ValueTask VisitHeaderAsync(
HeaderNode header, HeaderNode header,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default) =>
{
await VisitAsync(header.Children, cancellationToken); await VisitAsync(header.Children, cancellationToken);
return header;
}
protected virtual ValueTask<MarkdownNode> VisitInlineCodeBlockAsync( protected virtual async ValueTask VisitListAsync(
InlineCodeBlockNode inlineCodeBlock, ListNode list,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) =>
new(inlineCodeBlock); await VisitAsync(list.Items, cancellationToken);
protected virtual ValueTask<MarkdownNode> VisitMultiLineCodeBlockAsync( protected virtual async ValueTask VisitListItemAsync(
MultiLineCodeBlockNode multiLineCodeBlock, ListItemNode listItem,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) =>
new(multiLineCodeBlock); await VisitAsync(listItem.Children, cancellationToken);
protected virtual ValueTask VisitInlineCodeBlockAsync(
InlineCodeBlockNode inlineCodeBlock,
CancellationToken cancellationToken = default) => default;
protected virtual ValueTask VisitMultiLineCodeBlockAsync(
MultiLineCodeBlockNode multiLineCodeBlock,
CancellationToken cancellationToken = default) => default;
protected virtual async ValueTask<MarkdownNode> VisitLinkAsync( protected virtual async ValueTask VisitLinkAsync(
LinkNode link, LinkNode link,
CancellationToken cancellationToken = default) CancellationToken cancellationToken = default) =>
{
await VisitAsync(link.Children, cancellationToken); await VisitAsync(link.Children, cancellationToken);
return link;
}
protected virtual ValueTask<MarkdownNode> VisitEmojiAsync( protected virtual ValueTask VisitEmojiAsync(
EmojiNode emoji, EmojiNode emoji,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) => default;
new(emoji);
protected virtual ValueTask<MarkdownNode> VisitMentionAsync( protected virtual ValueTask VisitMentionAsync(
MentionNode mention, MentionNode mention,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) => default;
new(mention);
protected virtual ValueTask<MarkdownNode> VisitTimestampAsync( protected virtual ValueTask VisitTimestampAsync(
TimestampNode timestamp, TimestampNode timestamp,
CancellationToken cancellationToken = default) => CancellationToken cancellationToken = default) => default;
new(timestamp);
public async ValueTask<MarkdownNode> VisitAsync( public async ValueTask VisitAsync(
MarkdownNode node, MarkdownNode node,
CancellationToken cancellationToken = default) => node switch CancellationToken cancellationToken = default)
{
if (node is TextNode text)
{ {
TextNode text => await VisitTextAsync(text, cancellationToken);
await VisitTextAsync(text, cancellationToken), return;
}
FormattingNode formatting => if (node is FormattingNode formatting)
await VisitFormattingAsync(formatting, cancellationToken), {
await VisitFormattingAsync(formatting, cancellationToken);
return;
}
HeaderNode header => if (node is HeaderNode header)
await VisitHeaderAsync(header, cancellationToken), {
await VisitHeaderAsync(header, cancellationToken);
return;
}
InlineCodeBlockNode inlineCodeBlock => if (node is ListNode list)
await VisitInlineCodeBlockAsync(inlineCodeBlock, cancellationToken), {
await VisitListAsync(list, cancellationToken);
return;
}
MultiLineCodeBlockNode multiLineCodeBlock => if (node is ListItemNode listItem)
await VisitMultiLineCodeBlockAsync(multiLineCodeBlock, cancellationToken), {
await VisitListItemAsync(listItem, cancellationToken);
return;
}
LinkNode link => if (node is InlineCodeBlockNode inlineCodeBlock)
await VisitLinkAsync(link, cancellationToken), {
await VisitInlineCodeBlockAsync(inlineCodeBlock, cancellationToken);
return;
}
EmojiNode emoji => if (node is MultiLineCodeBlockNode multiLineCodeBlock)
await VisitEmojiAsync(emoji, cancellationToken), {
await VisitMultiLineCodeBlockAsync(multiLineCodeBlock, cancellationToken);
return;
}
MentionNode mention => if (node is LinkNode link)
await VisitMentionAsync(mention, cancellationToken), {
await VisitLinkAsync(link, cancellationToken);
return;
}
TimestampNode timestamp => if (node is EmojiNode emoji)
await VisitTimestampAsync(timestamp, cancellationToken), {
await VisitEmojiAsync(emoji, cancellationToken);
return;
}
_ => throw new ArgumentOutOfRangeException(nameof(node)) if (node is MentionNode mention)
}; {
await VisitMentionAsync(mention, cancellationToken);
return;
}
if (node is TimestampNode timestamp)
{
await VisitTimestampAsync(timestamp, cancellationToken);
return;
}
throw new ArgumentOutOfRangeException(nameof(node));
}
public async ValueTask VisitAsync( public async ValueTask VisitAsync(
IEnumerable<MarkdownNode> nodes, IEnumerable<MarkdownNode> nodes,

Loading…
Cancel
Save