Allow link nodes to have markdown children instead of just text

Closes #640
pull/678/head
Tyrrrz 3 years ago
parent aae3790a5f
commit 6fa7cbe568

@ -30,22 +30,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
return base.VisitText(text); return base.VisitText(text);
} }
protected override MarkdownNode VisitFormatted(FormattedNode formatted) protected override MarkdownNode VisitFormatting(FormattingNode formatting)
{ {
var (tagOpen, tagClose) = formatted.Formatting switch var (tagOpen, tagClose) = formatting.Kind switch
{ {
TextFormatting.Bold => ("<strong>", "</strong>"), FormattingKind.Bold => ("<strong>", "</strong>"),
TextFormatting.Italic => ("<em>", "</em>"), FormattingKind.Italic => ("<em>", "</em>"),
TextFormatting.Underline => ("<u>", "</u>"), FormattingKind.Underline => ("<u>", "</u>"),
TextFormatting.Strikethrough => ("<s>", "</s>"), FormattingKind.Strikethrough => ("<s>", "</s>"),
TextFormatting.Spoiler => ( FormattingKind.Spoiler => (
"<span class=\"spoiler-text spoiler-text--hidden\" onclick=\"showSpoiler(event, this)\">", "</span>"), "<span class=\"spoiler-text spoiler-text--hidden\" onclick=\"showSpoiler(event, this)\">", "</span>"),
TextFormatting.Quote => ("<div class=\"quote\">", "</div>"), FormattingKind.Quote => ("<div class=\"quote\">", "</div>"),
_ => throw new ArgumentOutOfRangeException(nameof(formatted.Formatting)) _ => throw new ArgumentOutOfRangeException(nameof(formatting.Kind))
}; };
_buffer.Append(tagOpen); _buffer.Append(tagOpen);
var result = base.VisitFormatted(formatted); var result = base.VisitFormatting(formatting);
_buffer.Append(tagClose); _buffer.Append(tagClose);
return result; return result;
@ -77,25 +77,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
protected override MarkdownNode VisitLink(LinkNode link) protected override MarkdownNode VisitLink(LinkNode link)
{ {
// Extract message ID if the link points to a Discord message // Try to extract message ID if the link refers to a Discord message
var linkedMessageId = Regex.Match(link.Url, "^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$").Groups[1].Value; var linkedMessageId = Regex.Match(
link.Url,
"^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$"
).Groups[1].Value;
if (!string.IsNullOrWhiteSpace(linkedMessageId)) _buffer.Append(
{ !string.IsNullOrWhiteSpace(linkedMessageId)
_buffer ? $"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">"
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">") : $"<a href=\"{Uri.EscapeUriString(link.Url)}\">"
.Append(HtmlEncode(link.Title)) );
.Append("</a>");
}
else
{
_buffer
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\">")
.Append(HtmlEncode(link.Title))
.Append("</a>");
}
return base.VisitLink(link); var result = base.VisitLink(link);
_buffer.Append("</a>");
return result;
} }
protected override MarkdownNode VisitEmoji(EmojiNode emoji) protected override MarkdownNode VisitEmoji(EmojiNode emoji)

@ -1,21 +0,0 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
namespace DiscordChatExporter.Core.Markdown
{
internal class FormattedNode : MarkdownNode
{
public TextFormatting Formatting { get; }
public IReadOnlyList<MarkdownNode> Children { get; }
public FormattedNode(TextFormatting formatting, IReadOnlyList<MarkdownNode> children)
{
Formatting = formatting;
Children = children;
}
[ExcludeFromCodeCoverage]
public override string ToString() => $"<{Formatting}> (+{Children.Count})";
}
}

@ -1,6 +1,6 @@
namespace DiscordChatExporter.Core.Markdown namespace DiscordChatExporter.Core.Markdown
{ {
internal enum TextFormatting internal enum FormattingKind
{ {
Bold, Bold,
Italic, Italic,

@ -0,0 +1,29 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
namespace DiscordChatExporter.Core.Markdown
{
internal class FormattingNode : MarkdownNode
{
public FormattingKind Kind { get; }
public IReadOnlyList<MarkdownNode> Children { get; }
public FormattingNode(FormattingKind kind, IReadOnlyList<MarkdownNode> children)
{
Kind = kind;
Children = children;
}
[ExcludeFromCodeCoverage]
public override string ToString()
{
var childrenFormatted = Children.Count == 1
? Children.Single().ToString()
: "+" + Children.Count;
return $"<{Kind}> ({childrenFormatted})";
}
}
}

@ -1,4 +1,6 @@
using System.Diagnostics.CodeAnalysis; using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
namespace DiscordChatExporter.Core.Markdown namespace DiscordChatExporter.Core.Markdown
{ {
@ -6,20 +8,27 @@ namespace DiscordChatExporter.Core.Markdown
{ {
public string Url { get; } public string Url { get; }
public string Title { get; } public IReadOnlyList<MarkdownNode> Children { get; }
public LinkNode(string url, string title) public LinkNode(string url, IReadOnlyList<MarkdownNode> children)
{ {
Url = url; Url = url;
Title = title; Children = children;
} }
public LinkNode(string url) public LinkNode(string url)
: this(url, url) : this(url, new[] {new TextNode(url)})
{ {
} }
[ExcludeFromCodeCoverage] [ExcludeFromCodeCoverage]
public override string ToString() => $"<Link> {Title}"; public override string ToString()
{
var childrenFormatted = Children.Count == 1
? Children.Single().ToString()
: "+" + Children.Count;
return $"<Link> ({childrenFormatted})";
}
} }
} }

@ -7,7 +7,10 @@ using DiscordChatExporter.Core.Utils;
namespace DiscordChatExporter.Core.Markdown.Parsing namespace DiscordChatExporter.Core.Markdown.Parsing
{ {
// The following parsing logic is meant to replicate Discord's markdown grammar as close as possible // Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
// scenarios, like when multiple formatting nodes are nested together.
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
// expressions that are executed sequentially in a first-match-first-serve manner.
internal static partial class MarkdownParser internal static partial class MarkdownParser
{ {
private const RegexOptions DefaultRegexOptions = private const RegexOptions DefaultRegexOptions =
@ -18,64 +21,64 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Formatting */ /* Formatting */
// Capture any character until the earliest double asterisk not followed by an asterisk // Capture any character until the earliest double asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> BoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk // Capture any character until the earliest single asterisk not preceded or followed by an asterisk
// Opening asterisk must not be followed by whitespace // Opening asterisk must not be followed by whitespace
// Closing asterisk must not be preceded by whitespace // Closing asterisk must not be preceded by whitespace
private static readonly IMatcher<MarkdownNode> ItalicFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the earliest triple asterisk not followed by an asterisk // Capture any character until the earliest triple asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattedNodeMatcher)) (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
); );
// Capture any character except underscore until an underscore // Capture any character except underscore until an underscore
// Closing underscore must not be followed by a word character // Closing underscore must not be followed by a word character
private static readonly IMatcher<MarkdownNode> ItalicAltFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the earliest double underscore not followed by an underscore // Capture any character until the earliest double underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> UnderlineFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the earliest triple underscore not followed by an underscore // Capture any character until the earliest triple underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattedNodeMatcher = private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>( new RegexMatcher<MarkdownNode>(
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, (p, m) => new FormattingNode(FormattingKind.Italic,
Parse(p.Slice(m.Groups[1]), UnderlineFormattedNodeMatcher)) Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
); );
// Capture any character until the earliest double tilde // Capture any character until the earliest double tilde
private static readonly IMatcher<MarkdownNode> StrikethroughFormattedNodeMatcher = private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>( new RegexMatcher<MarkdownNode>(
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline), new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the earliest double pipe // Capture any character until the earliest double pipe
private static readonly IMatcher<MarkdownNode> SpoilerFormattedNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline), new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
); );
// Capture any character until the end of the line // Capture any character until the end of the line
// Opening 'greater than' character must be followed by whitespace // Opening 'greater than' character must be followed by whitespace
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>\\s(.+\n?)", DefaultRegexOptions), new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
); );
// Repeatedly capture any character until the end of the line // Repeatedly capture any character until the end of the line
@ -86,7 +89,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) => (_, m) =>
{ {
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value)); var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
return new FormattedNode(TextFormatting.Quote, Parse(content)); return new FormattingNode(FormattingKind.Quote, Parse(content));
} }
); );
@ -94,7 +97,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Opening 'greater than' characters must be followed by whitespace // Opening 'greater than' characters must be followed by whitespace
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline), new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1]))) (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
); );
/* Code blocks */ /* Code blocks */
@ -147,7 +150,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role) (_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
); );
/* Emojis */ /* Emoji */
// Capture any country flag emoji (two regional indicator surrogate pairs) // Capture any country flag emoji (two regional indicator surrogate pairs)
// ... or "miscellaneous symbol" character // ... or "miscellaneous symbol" character
@ -165,7 +168,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) => (_, m) =>
{ {
var name = EmojiIndex.TryGetName(m.Groups[1].Value); var name = EmojiIndex.TryGetName(m.Groups[1].Value);
return name is not null return !string.IsNullOrWhiteSpace(name)
? new EmojiNode(name) ? new EmojiNode(name)
: null; : null;
} }
@ -182,10 +185,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Capture [title](link) // Capture [title](link)
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions), new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[2].Value, m.Groups[1].Value) (p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
); );
// Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace // Capture any non-whitespace character after http:// or https://
// until the last punctuation character or whitespace
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions), new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value) (_, m) => new LinkNode(m.Groups[1].Value)
@ -199,14 +203,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Text */ /* Text */
// Capture the shrug emoticon // Capture the shrug kaomoji
// This escapes it from matching for formatting // This escapes it from matching for formatting
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
@"¯\_(ツ)_/¯", @"¯\_(ツ)_/¯",
p => new TextNode(p.ToString()) p => new TextNode(p.ToString())
); );
// Capture some specific emojis that don't get rendered // Capture some specific emoji that don't get rendered
// This escapes it from matching for emoji // This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>( private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions), new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
@ -257,14 +261,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
EscapedCharacterTextNodeMatcher, EscapedCharacterTextNodeMatcher,
// Formatting // Formatting
ItalicBoldFormattedNodeMatcher, ItalicBoldFormattingNodeMatcher,
ItalicUnderlineFormattedNodeMatcher, ItalicUnderlineFormattingNodeMatcher,
BoldFormattedNodeMatcher, BoldFormattingNodeMatcher,
ItalicFormattedNodeMatcher, ItalicFormattingNodeMatcher,
UnderlineFormattedNodeMatcher, UnderlineFormattingNodeMatcher,
ItalicAltFormattedNodeMatcher, ItalicAltFormattingNodeMatcher,
StrikethroughFormattedNodeMatcher, StrikethroughFormattingNodeMatcher,
SpoilerFormattedNodeMatcher, SpoilerFormattingNodeMatcher,
MultiLineQuoteNodeMatcher, MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher, RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher, SingleLineQuoteNodeMatcher,

@ -8,10 +8,10 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitText(TextNode text) => protected virtual MarkdownNode VisitText(TextNode text) =>
text; text;
protected virtual MarkdownNode VisitFormatted(FormattedNode formatted) protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
{ {
Visit(formatted.Children); Visit(formatting.Children);
return formatted; return formatting;
} }
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) => protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
@ -20,8 +20,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) => protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
multiLineCodeBlock; multiLineCodeBlock;
protected virtual MarkdownNode VisitLink(LinkNode link) => protected virtual MarkdownNode VisitLink(LinkNode link)
link; {
Visit(link.Children);
return link;
}
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) => protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
emoji; emoji;
@ -35,7 +38,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
public MarkdownNode Visit(MarkdownNode node) => node switch public MarkdownNode Visit(MarkdownNode node) => node switch
{ {
TextNode text => VisitText(text), TextNode text => VisitText(text),
FormattedNode formatted => VisitFormatted(formatted), FormattingNode formatting => VisitFormatting(formatting),
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock), InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock), MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
LinkNode link => VisitLink(link), LinkNode link => VisitLink(link),

Loading…
Cancel
Save