Allow link nodes to have markdown children instead of just text

Closes #640
pull/678/head
Tyrrrz 3 years ago
parent aae3790a5f
commit 6fa7cbe568

@ -30,22 +30,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
return base.VisitText(text);
}
protected override MarkdownNode VisitFormatted(FormattedNode formatted)
protected override MarkdownNode VisitFormatting(FormattingNode formatting)
{
var (tagOpen, tagClose) = formatted.Formatting switch
var (tagOpen, tagClose) = formatting.Kind switch
{
TextFormatting.Bold => ("<strong>", "</strong>"),
TextFormatting.Italic => ("<em>", "</em>"),
TextFormatting.Underline => ("<u>", "</u>"),
TextFormatting.Strikethrough => ("<s>", "</s>"),
TextFormatting.Spoiler => (
FormattingKind.Bold => ("<strong>", "</strong>"),
FormattingKind.Italic => ("<em>", "</em>"),
FormattingKind.Underline => ("<u>", "</u>"),
FormattingKind.Strikethrough => ("<s>", "</s>"),
FormattingKind.Spoiler => (
"<span class=\"spoiler-text spoiler-text--hidden\" onclick=\"showSpoiler(event, this)\">", "</span>"),
TextFormatting.Quote => ("<div class=\"quote\">", "</div>"),
_ => throw new ArgumentOutOfRangeException(nameof(formatted.Formatting))
FormattingKind.Quote => ("<div class=\"quote\">", "</div>"),
_ => throw new ArgumentOutOfRangeException(nameof(formatting.Kind))
};
_buffer.Append(tagOpen);
var result = base.VisitFormatted(formatted);
var result = base.VisitFormatting(formatting);
_buffer.Append(tagClose);
return result;
@ -77,25 +77,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
protected override MarkdownNode VisitLink(LinkNode link)
{
// Extract message ID if the link points to a Discord message
var linkedMessageId = Regex.Match(link.Url, "^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$").Groups[1].Value;
// Try to extract message ID if the link refers to a Discord message
var linkedMessageId = Regex.Match(
link.Url,
"^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$"
).Groups[1].Value;
if (!string.IsNullOrWhiteSpace(linkedMessageId))
{
_buffer
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">")
.Append(HtmlEncode(link.Title))
.Append("</a>");
}
else
{
_buffer
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\">")
.Append(HtmlEncode(link.Title))
.Append("</a>");
}
_buffer.Append(
!string.IsNullOrWhiteSpace(linkedMessageId)
? $"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">"
: $"<a href=\"{Uri.EscapeUriString(link.Url)}\">"
);
return base.VisitLink(link);
var result = base.VisitLink(link);
_buffer.Append("</a>");
return result;
}
protected override MarkdownNode VisitEmoji(EmojiNode emoji)

@ -1,21 +0,0 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
namespace DiscordChatExporter.Core.Markdown
{
internal class FormattedNode : MarkdownNode
{
public TextFormatting Formatting { get; }
public IReadOnlyList<MarkdownNode> Children { get; }
public FormattedNode(TextFormatting formatting, IReadOnlyList<MarkdownNode> children)
{
Formatting = formatting;
Children = children;
}
[ExcludeFromCodeCoverage]
public override string ToString() => $"<{Formatting}> (+{Children.Count})";
}
}

@ -1,6 +1,6 @@
namespace DiscordChatExporter.Core.Markdown
{
internal enum TextFormatting
internal enum FormattingKind
{
Bold,
Italic,

@ -0,0 +1,29 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
namespace DiscordChatExporter.Core.Markdown
{
internal class FormattingNode : MarkdownNode
{
public FormattingKind Kind { get; }
public IReadOnlyList<MarkdownNode> Children { get; }
public FormattingNode(FormattingKind kind, IReadOnlyList<MarkdownNode> children)
{
Kind = kind;
Children = children;
}
[ExcludeFromCodeCoverage]
public override string ToString()
{
var childrenFormatted = Children.Count == 1
? Children.Single().ToString()
: "+" + Children.Count;
return $"<{Kind}> ({childrenFormatted})";
}
}
}

@ -1,4 +1,6 @@
using System.Diagnostics.CodeAnalysis;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
namespace DiscordChatExporter.Core.Markdown
{
@ -6,20 +8,27 @@ namespace DiscordChatExporter.Core.Markdown
{
public string Url { get; }
public string Title { get; }
public IReadOnlyList<MarkdownNode> Children { get; }
public LinkNode(string url, string title)
public LinkNode(string url, IReadOnlyList<MarkdownNode> children)
{
Url = url;
Title = title;
Children = children;
}
public LinkNode(string url)
: this(url, url)
: this(url, new[] {new TextNode(url)})
{
}
[ExcludeFromCodeCoverage]
public override string ToString() => $"<Link> {Title}";
public override string ToString()
{
var childrenFormatted = Children.Count == 1
? Children.Single().ToString()
: "+" + Children.Count;
return $"<Link> ({childrenFormatted})";
}
}
}

@ -7,7 +7,10 @@ using DiscordChatExporter.Core.Utils;
namespace DiscordChatExporter.Core.Markdown.Parsing
{
// The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
// scenarios, like when multiple formatting nodes are nested together.
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
// expressions that are executed sequentially in a first-match-first-serve manner.
internal static partial class MarkdownParser
{
private const RegexOptions DefaultRegexOptions =
@ -18,64 +21,64 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Formatting */
// Capture any character until the earliest double asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> BoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
// Opening asterisk must not be followed by whitespace
// Closing asterisk must not be preceded by whitespace
private static readonly IMatcher<MarkdownNode> ItalicFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattedNodeMatcher))
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
);
// Capture any character except underscore until an underscore
// Closing underscore must not be followed by a word character
private static readonly IMatcher<MarkdownNode> ItalicAltFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> UnderlineFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattedNodeMatcher =
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Italic,
Parse(p.Slice(m.Groups[1]), UnderlineFormattedNodeMatcher))
(p, m) => new FormattingNode(FormattingKind.Italic,
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
);
// Capture any character until the earliest double tilde
private static readonly IMatcher<MarkdownNode> StrikethroughFormattedNodeMatcher =
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double pipe
private static readonly IMatcher<MarkdownNode> SpoilerFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the end of the line
// Opening 'greater than' character must be followed by whitespace
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
// Repeatedly capture any character until the end of the line
@ -86,7 +89,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) =>
{
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
return new FormattedNode(TextFormatting.Quote, Parse(content));
return new FormattingNode(FormattingKind.Quote, Parse(content));
}
);
@ -94,7 +97,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Opening 'greater than' characters must be followed by whitespace
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
/* Code blocks */
@ -147,7 +150,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
);
/* Emojis */
/* Emoji */
// Capture any country flag emoji (two regional indicator surrogate pairs)
// ... or "miscellaneous symbol" character
@ -165,7 +168,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) =>
{
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
return name is not null
return !string.IsNullOrWhiteSpace(name)
? new EmojiNode(name)
: null;
}
@ -182,10 +185,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Capture [title](link)
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
);
// Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
// Capture any non-whitespace character after http:// or https://
// until the last punctuation character or whitespace
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
@ -199,14 +203,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Text */
// Capture the shrug emoticon
// Capture the shrug kaomoji
// This escapes it from matching for formatting
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
@"¯\_(ツ)_/¯",
p => new TextNode(p.ToString())
);
// Capture some specific emojis that don't get rendered
// Capture some specific emoji that don't get rendered
// This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
@ -257,14 +261,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
EscapedCharacterTextNodeMatcher,
// Formatting
ItalicBoldFormattedNodeMatcher,
ItalicUnderlineFormattedNodeMatcher,
BoldFormattedNodeMatcher,
ItalicFormattedNodeMatcher,
UnderlineFormattedNodeMatcher,
ItalicAltFormattedNodeMatcher,
StrikethroughFormattedNodeMatcher,
SpoilerFormattedNodeMatcher,
ItalicBoldFormattingNodeMatcher,
ItalicUnderlineFormattingNodeMatcher,
BoldFormattingNodeMatcher,
ItalicFormattingNodeMatcher,
UnderlineFormattingNodeMatcher,
ItalicAltFormattingNodeMatcher,
StrikethroughFormattingNodeMatcher,
SpoilerFormattingNodeMatcher,
MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher,

@ -8,10 +8,10 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitText(TextNode text) =>
text;
protected virtual MarkdownNode VisitFormatted(FormattedNode formatted)
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
{
Visit(formatted.Children);
return formatted;
Visit(formatting.Children);
return formatting;
}
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
@ -20,8 +20,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
multiLineCodeBlock;
protected virtual MarkdownNode VisitLink(LinkNode link) =>
link;
protected virtual MarkdownNode VisitLink(LinkNode link)
{
Visit(link.Children);
return link;
}
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
emoji;
@ -35,7 +38,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
public MarkdownNode Visit(MarkdownNode node) => node switch
{
TextNode text => VisitText(text),
FormattedNode formatted => VisitFormatted(formatted),
FormattingNode formatting => VisitFormatting(formatting),
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
LinkNode link => VisitLink(link),

Loading…
Cancel
Save