Implement a more sophisticated markdown parsing engine (#145)

pull/162/head
Alexey Golub 6 years ago committed by GitHub
parent 88727a1fe6
commit f09f30c7bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net461</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Sprache" Version="2.2.0" />
<PackageReference Include="Tyrrrz.Extensions" Version="1.5.1" />
</ItemGroup>
</Project>

@ -0,0 +1,21 @@
namespace DiscordChatExporter.Core.Markdown
{
public class EmojiNode : Node
{
public string Id { get; }
public string Name { get; }
public bool IsAnimated { get; }
public EmojiNode(string lexeme, string id, string name, bool isAnimated)
: base(lexeme)
{
Id = id;
Name = name;
IsAnimated = isAnimated;
}
public override string ToString() => $"<Emoji> {Name}";
}
}

@ -0,0 +1,23 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown
{
public class FormattedNode : Node
{
public string Token { get; }
public TextFormatting Formatting { get; }
public IReadOnlyList<Node> Children { get; }
public FormattedNode(string lexeme, string token, TextFormatting formatting, IReadOnlyList<Node> children)
: base(lexeme)
{
Token = token;
Formatting = formatting;
Children = children;
}
public override string ToString() => $"<{Formatting}> ({Children.Count} direct children)";
}
}

@ -0,0 +1,15 @@
namespace DiscordChatExporter.Core.Markdown
{
public class InlineCodeBlockNode : Node
{
public string Code { get; }
public InlineCodeBlockNode(string lexeme, string code)
: base(lexeme)
{
Code = code;
}
public override string ToString() => $"<Code> {Code}";
}
}

@ -0,0 +1,157 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Sprache;
using Tyrrrz.Extensions;
namespace DiscordChatExporter.Core.Markdown.Internal
{
// The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
internal static class Grammar
{
/* Formatting */
// Capture until the earliest double asterisk not followed by an asterisk
private static readonly Parser<Node> BoldFormattedNode =
Parse.RegexMatch(new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "**", TextFormatting.Bold, BuildTree(m.Groups[1].Value)));
// Capture until the earliest single asterisk not preceded or followed by an asterisk
// Can't have whitespace right after opening or right before closing asterisk
private static readonly Parser<Node> ItalicFormattedNode =
Parse.RegexMatch(new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "*", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Can't have underscores inside
// Can't have word characters right after closing underscore
private static readonly Parser<Node> ItalicAltFormattedNode =
Parse.RegexMatch(new Regex("_([^_]+?)_(?!\\w)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "_", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Treated as a separate entity for simplicity
// Capture until the earliest triple asterisk not preceded or followed by an asterisk
private static readonly Parser<Node> ItalicBoldFormattedNode =
Parse.RegexMatch(new Regex("\\*(\\*\\*(?:.+?)\\*\\*)\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "*", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Capture until the earliest double underscore not followed by an underscore
private static readonly Parser<Node> UnderlineFormattedNode =
Parse.RegexMatch(new Regex("__(.+?)__(?!_)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "__", TextFormatting.Underline, BuildTree(m.Groups[1].Value)));
// Treated as a separate entity for simplicity
// Capture until the earliest triple underscore not preceded or followed by an underscore
private static readonly Parser<Node> ItalicUnderlineFormattedNode =
Parse.RegexMatch(new Regex("_(__(?:.+?)__)_(?!_)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "_", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Strikethrough is safe
private static readonly Parser<Node> StrikethroughFormattedNode =
Parse.RegexMatch(new Regex("~~(.+?)~~", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "~~", TextFormatting.Strikethrough, BuildTree(m.Groups[1].Value)));
// Spoiler is safe
private static readonly Parser<Node> SpoilerFormattedNode =
Parse.RegexMatch(new Regex("\\|\\|(.+?)\\|\\|", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "||", TextFormatting.Spoiler, BuildTree(m.Groups[1].Value)));
// Aggregator, order matters
private static readonly Parser<Node> AnyFormattedNode =
ItalicBoldFormattedNode.Or(ItalicUnderlineFormattedNode)
.Or(BoldFormattedNode).Or(ItalicFormattedNode)
.Or(UnderlineFormattedNode).Or(ItalicAltFormattedNode)
.Or(StrikethroughFormattedNode).Or(SpoilerFormattedNode);
/* Code blocks */
// Can't have backticks inside and surrounding whitespace is trimmed
private static readonly Parser<Node> InlineCodeBlockNode =
Parse.RegexMatch(new Regex("`\\s*([^`]+?)\\s*`", RegexOptions.Singleline))
.Select(m => new InlineCodeBlockNode(m.Value, m.Groups[1].Value));
// The first word is a language identifier if it's the only word followed by a newline, the rest is code
private static readonly Parser<Node> MultilineCodeBlockNode =
Parse.RegexMatch(new Regex("```(?:(\\w*?)?(?:\\s*?\\n))?(.+)```", RegexOptions.Singleline))
.Select(m => new MultilineCodeBlockNode(m.Value, m.Groups[1].Value, m.Groups[2].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyCodeBlockNode = MultilineCodeBlockNode.Or(InlineCodeBlockNode);
/* Mentions */
// @everyone or @here
private static readonly Parser<Node> MetaMentionNode = Parse.RegexMatch("@(everyone|here)")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Meta));
// <@123456> or <@!123456>
private static readonly Parser<Node> UserMentionNode = Parse.RegexMatch("<@!?(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.User));
// <#123456>
private static readonly Parser<Node> ChannelMentionNode = Parse.RegexMatch("<#(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Channel));
// <@&123456>
private static readonly Parser<Node> RoleMentionNode = Parse.RegexMatch("<@&(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Role));
// Aggregator, order matters
private static readonly Parser<Node> AnyMentionNode =
MetaMentionNode.Or(UserMentionNode).Or(ChannelMentionNode).Or(RoleMentionNode);
/* Emojis */
// <:lul:123456> or <a:lul:123456>
private static readonly Parser<Node> EmojiNode = Parse.RegexMatch("<(a)?:(.+):(\\d+)>")
.Select(m => new EmojiNode(m.Value, m.Groups[3].Value, m.Groups[2].Value, m.Groups[1].Value.IsNotBlank()));
// Aggregator, order matters
private static readonly Parser<Node> AnyEmojiNode = EmojiNode;
/* Links */
// [title](link)
private static readonly Parser<Node> TitledLinkNode = Parse.RegexMatch("\\[(.+)\\]\\((.+)\\)")
.Select(m => new LinkNode(m.Value, m.Groups[2].Value, m.Groups[1].Value));
// Starts with http:// or https://, stops at the last non-whitespace character followed by whitespace or punctuation character
private static readonly Parser<Node> AutoLinkNode = Parse.RegexMatch("(https?://\\S*[^\\.,:;\"\'\\s])")
.Select(m => new LinkNode(m.Value, m.Groups[1].Value));
// Autolink surrounded by angular brackets
private static readonly Parser<Node> HiddenLinkNode = Parse.RegexMatch("<(https?://\\S*[^\\.,:;\"\'\\s])>")
.Select(m => new LinkNode(m.Value, m.Groups[1].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyLinkNode = TitledLinkNode.Or(HiddenLinkNode).Or(AutoLinkNode);
/* Text */
// Shrug is an exception and needs to be exempt from formatting
private static readonly Parser<Node> ShrugTextNode =
Parse.String("¯\\_(ツ)_/¯").Text().Select(s => new TextNode(s));
// Backslash escapes any following non-whitespace character except for digits and latin letters
private static readonly Parser<Node> EscapedTextNode =
Parse.RegexMatch("\\\\([^a-zA-Z0-9\\s])").Select(m => new TextNode(m.Value, m.Groups[1].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyTextNode = ShrugTextNode.Or(EscapedTextNode);
/* Aggregator and fallback */
// Any node recognized by above patterns
private static readonly Parser<Node> AnyRecognizedNode = AnyFormattedNode.Or(AnyCodeBlockNode)
.Or(AnyMentionNode).Or(AnyEmojiNode).Or(AnyLinkNode).Or(AnyTextNode);
// Any node not recognized by above patterns (treated as plain text)
private static readonly Parser<Node> FallbackNode =
Parse.AnyChar.Except(AnyRecognizedNode).AtLeastOnce().Text().Select(s => new TextNode(s));
// Any node
private static readonly Parser<Node> AnyNode = AnyRecognizedNode.Or(FallbackNode);
// Entry point
public static IReadOnlyList<Node> BuildTree(string input) => AnyNode.Many().Parse(input).ToArray();
}
}

@ -0,0 +1,22 @@
namespace DiscordChatExporter.Core.Markdown
{
public class LinkNode : Node
{
public string Url { get; }
public string Title { get; }
public LinkNode(string lexeme, string url, string title)
: base(lexeme)
{
Url = url;
Title = title;
}
public LinkNode(string lexeme, string url) : this(lexeme, url, url)
{
}
public override string ToString() => $"<Link> {Title}";
}
}

@ -0,0 +1,10 @@
using System.Collections.Generic;
using DiscordChatExporter.Core.Markdown.Internal;
namespace DiscordChatExporter.Core.Markdown
{
public static class MarkdownParser
{
public static IReadOnlyList<Node> Parse(string input) => Grammar.BuildTree(input);
}
}

@ -0,0 +1,18 @@
namespace DiscordChatExporter.Core.Markdown
{
public class MentionNode : Node
{
public string Id { get; }
public MentionType Type { get; }
public MentionNode(string lexeme, string id, MentionType type)
: base(lexeme)
{
Id = id;
Type = type;
}
public override string ToString() => $"<{Type} mention> {Id}";
}
}

@ -0,0 +1,10 @@
namespace DiscordChatExporter.Core.Markdown
{
public enum MentionType
{
Meta,
User,
Channel,
Role
}
}

@ -0,0 +1,18 @@
namespace DiscordChatExporter.Core.Markdown
{
public class MultilineCodeBlockNode : Node
{
public string Language { get; }
public string Code { get; }
public MultilineCodeBlockNode(string lexeme, string language, string code)
: base(lexeme)
{
Language = language;
Code = code;
}
public override string ToString() => $"<Code [{Language}]> {Code}";
}
}

@ -0,0 +1,12 @@
namespace DiscordChatExporter.Core.Markdown
{
public abstract class Node
{
public string Lexeme { get; }
protected Node(string lexeme)
{
Lexeme = lexeme;
}
}
}

@ -0,0 +1,11 @@
namespace DiscordChatExporter.Core.Markdown
{
public enum TextFormatting
{
Bold,
Italic,
Underline,
Strikethrough,
Spoiler
}
}

@ -0,0 +1,19 @@
namespace DiscordChatExporter.Core.Markdown
{
public class TextNode : Node
{
public string Text { get; }
public TextNode(string lexeme, string text)
: base(lexeme)
{
Text = text;
}
public TextNode(string text) : this(text, text)
{
}
public override string ToString() => Text;
}
}

@ -2,18 +2,17 @@
<PropertyGroup>
<TargetFramework>net461</TargetFramework>
<Version>2.9.1</Version>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="Resources\ExportTemplates\PlainText.txt" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlDark.html" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlLight.html" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\Core.html" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\Shared.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\DarkTheme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\LightTheme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Csv.csv" />
<EmbeddedResource Include="Resources\ExportTemplates\PlainText\Template.txt" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlDark\Template.html" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlLight\Template.html" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlShared\Main.html" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlShared\Main.css" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlDark\Theme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlLight\Theme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Csv\Template.csv" />
</ItemGroup>
<ItemGroup>
@ -29,4 +28,8 @@
<PackageReference Include="Tyrrrz.Settings" Version="1.3.3" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\DiscordChatExporter.Core.Markdown\DiscordChatExporter.Core.Markdown.csproj" />
</ItemGroup>
</Project>

@ -1,6 +1,6 @@
using System;
using System.Drawing;
using Tyrrrz.Extensions;
using System.Net;
namespace DiscordChatExporter.Core.Internal
{
@ -14,10 +14,8 @@ namespace DiscordChatExporter.Core.Internal
return value.ToString();
}
public static string Base64Encode(this string str) => str.GetBytes().ToBase64();
public static string Base64Decode(this string str) => str.FromBase64().GetString();
public static Color ResetAlpha(this Color color) => Color.FromArgb(1, color);
public static string HtmlEncode(this string value) => WebUtility.HtmlEncode(value);
}
}

@ -4,7 +4,7 @@
{{- }}"{{ message.Timestamp | FormatDate }}";
{{- }}"{{ message.Content | FormatContent }}";
{{- }}"{{ message.Content | FormatMarkdown | string.replace "\"" "\"\"" }}";
{{- }}"{{ message.Attachments | array.map "Url" | array.join "," }}";
{{~ end -}}
Can't render this file because it has a wrong number of fields in line 2.

@ -1,7 +0,0 @@
{{
$SharedStyleSheet = include "Html.Shared.css"
$ThemeStyleSheet = include "Html.DarkTheme.css"
StyleSheet = $SharedStyleSheet + "\n" + $ThemeStyleSheet
}}
{{ include "Html.Core.html" }}

@ -0,0 +1,2 @@
{{~ ThemeStyleSheet = include "HtmlDark.Theme.css" ~}}
{{~ include "HtmlShared.Main.html" ~}}

@ -2,13 +2,17 @@
body {
background-color: #36393e;
color: #ffffffb3;
color: #dcddde;
}
a {
color: #0096cf;
}
.spoiler {
background-color: rgba(255, 255, 255, 0.1);
}
.pre {
background-color: #2f3136;
}
@ -19,7 +23,6 @@ a {
}
.mention {
background-color: #738bd71a;
color: #7289da;
}
@ -40,7 +43,7 @@ a {
/* === CHATLOG === */
.chatlog__message-group {
border-color: #ffffff0a;
border-color: rgba(255, 255, 255, 0.1);
}
.chatlog__author-name {
@ -48,16 +51,16 @@ a {
}
.chatlog__timestamp {
color: #ffffff33;
color: rgba(255, 255, 255, 0.2);
}
.chatlog__edited-timestamp {
color: #ffffff33;
color: rgba(255, 255, 255, 0.2);
}
.chatlog__embed-content-container {
background-color: #2e30364d;
border-color: #2e303699;
background-color: rgba(46, 48, 54, 0.3);
border-color: rgba(46, 48, 54, 0.6);
}
.chatlog__embed-author-name {
@ -73,7 +76,7 @@ a {
}
.chatlog__embed-description {
color: #ffffff99;
color: rgba(255, 255, 255, 0.6);
}
.chatlog__embed-field-name {
@ -81,17 +84,17 @@ a {
}
.chatlog__embed-field-value {
color: #ffffff99;
color: rgba(255, 255, 255, 0.6);
}
.chatlog__embed-footer {
color: #ffffff99;
color: rgba(255, 255, 255, 0.6);
}
.chatlog__reaction {
background-color: #ffffff0a;
background-color: rgba(255, 255, 255, 0.05);
}
.chatlog__reaction-count {
color: #ffffff4d;
color: rgba(255, 255, 255, 0.3);
}

@ -1,7 +0,0 @@
{{
$SharedStyleSheet = include "Html.Shared.css"
$ThemeStyleSheet = include "Html.LightTheme.css"
StyleSheet = $SharedStyleSheet + "\n" + $ThemeStyleSheet
}}
{{ include "Html.Core.html" }}

@ -0,0 +1,2 @@
{{~ ThemeStyleSheet = include "HtmlLight.Theme.css" ~}}
{{~ include "HtmlShared.Main.html" ~}}

@ -2,13 +2,17 @@
body {
background-color: #ffffff;
color: #737f8d;
color: #747f8d;
}
a {
color: #00b0f4;
}
.spoiler {
background-color: rgba(0, 0, 0, 0.1);
}
.pre {
background-color: #f9f9f9;
}
@ -56,8 +60,8 @@ a {
}
.chatlog__embed-content-container {
background-color: #f9f9f94d;
border-color: #cccccc4d;
background-color: rgba(249, 249, 249, 0.3);
border-color: rgba(204, 204, 204, 0.3);
}
.chatlog__embed-author-name {
@ -85,11 +89,11 @@ a {
}
.chatlog__embed-footer {
color: #4f535b99;
color: rgba(79, 83, 91, 0.4);
}
.chatlog__reaction {
background-color: #4f545c0f;
background-color: rgba(79, 84, 92, 0.06);
}
.chatlog__reaction-count {

@ -17,9 +17,17 @@ img {
object-fit: contain;
}
.markdown {
white-space: pre-wrap;
line-height: 1.3;
}
.spoiler {
border-radius: 3px;
}
.pre {
font-family: "Consolas", "Courier New", Courier, Monospace;
white-space: pre-wrap;
}
.pre--multiline {
@ -34,6 +42,10 @@ img {
border-radius: 3px;
}
.mention {
font-weight: 500;
}
.emoji {
width: 24px;
height: 24px;
@ -51,10 +63,6 @@ img {
height: 32px;
}
.mention {
font-weight: 600;
}
/* === INFO === */
.info {
@ -130,6 +138,7 @@ img {
.chatlog__author-name {
font-size: 1em;
font-weight: 500;
}
.chatlog__timestamp {
@ -144,7 +153,7 @@ img {
}
.chatlog__edited-timestamp {
margin-left: 5px;
margin-left: 3px;
font-size: .8em;
}

@ -6,7 +6,10 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width" />
<style>
{{ StyleSheet }}
{{ include "HtmlShared.Main.css" }}
</style>
<style>
{{ ThemeStyleSheet }}
</style>
</head>
<body>
@ -58,7 +61,7 @@
{{~ # Content ~}}
{{~ if message.Content ~}}
<div class="chatlog__content">
{{ message.Content | FormatContent }}
<span class="markdown">{{ message.Content | FormatMarkdown }}</span>
{{~ # Edited timestamp ~}}
{{~ if message.EditedTimestamp ~}}
@ -85,7 +88,7 @@
{{~ # Embeds ~}}
{{~ for embed in message.Embeds ~}}
<div class="chatlog__embed">
<div class="chatlog__embed-color-pill" style="background-color: rgba({{ embed.Color | FormatColor }})"></div>
<div class="chatlog__embed-color-pill" style="background-color: rgba({{ embed.Color.R }},{{ embed.Color.G }},{{ embed.Color.B }},{{ embed.Color.A }})"></div>
<div class="chatlog__embed-content-container">
<div class="chatlog__embed-content">
<div class="chatlog__embed-text">
@ -112,16 +115,16 @@
{{~ if embed.Title ~}}
<div class="chatlog__embed-title">
{{~ if embed.Url ~}}
<a class="chatlog__embed-title-link" href="{{ embed.Url }}">{{ embed.Title | FormatContent }}</a>
<a class="chatlog__embed-title-link" href="{{ embed.Url }}"><span class="markdown">{{ embed.Title | FormatMarkdown }}</span></a>
{{~ else ~}}
{{ embed.Title | FormatContent }}
<span class="markdown">{{ embed.Title | FormatMarkdown }}</span>
{{~ end ~}}
</div>
{{~ end ~}}
{{~ # Description ~}}
{{~ if embed.Description ~}}
<div class="chatlog__embed-description">{{ embed.Description | FormatContent true }}</div>
<div class="chatlog__embed-description"><span class="markdown">{{ embed.Description | FormatMarkdown }}</span></div>
{{~ end ~}}
{{~ # Fields ~}}
@ -130,10 +133,10 @@
{{~ for field in embed.Fields ~}}
<div class="chatlog__embed-field {{ if field.IsInline }} chatlog__embed-field--inline {{ end }}">
{{~ if field.Name ~}}
<div class="chatlog__embed-field-name">{{ field.Name | FormatContent }}</div>
<div class="chatlog__embed-field-name"><span class="markdown">{{ field.Name | FormatMarkdown }}</span></div>
{{~ end ~}}
{{~ if field.Value ~}}
<div class="chatlog__embed-field-value">{{ field.Value | FormatContent true }}</div>
<div class="chatlog__embed-field-value"><span class="markdown">{{ field.Value | FormatMarkdown }}</span></div>
{{~ end ~}}
</div>
{{~ end ~}}

@ -12,7 +12,7 @@ Range: {{ if Model.From }}{{ Model.From | FormatDate }} {{ end }}{{ if Model.
{{~ # Author name and timestamp ~}}
{{~ }}[{{ message.Timestamp | FormatDate }}] {{ message.Author.FullName }}
{{~ # Content ~}}
{{~ message.Content | FormatContent }}
{{~ message.Content | FormatMarkdown }}
{{~ # Attachments ~}}
{{~ for attachment in message.Attachments ~}}
{{~ attachment.Url }}

@ -20,7 +20,7 @@ namespace DiscordChatExporter.Core.Services
public string GetPath(ExportFormat format)
{
return $"{ResourceRootNamespace}.{format}.{format.GetFileExtension()}";
return $"{ResourceRootNamespace}.{format}.Template.{format.GetFileExtension()}";
}
public string Load(TemplateContext context, SourceSpan callerSpan, string templatePath)

@ -1,11 +1,10 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;
using DiscordChatExporter.Core.Internal;
using DiscordChatExporter.Core.Markdown;
using DiscordChatExporter.Core.Models;
using Scriban.Runtime;
using Tyrrrz.Extensions;
@ -73,8 +72,6 @@ namespace DiscordChatExporter.Core.Services
}
}
private string HtmlEncode(string str) => WebUtility.HtmlEncode(str);
private string Format(IFormattable obj, string format) =>
obj.ToString(format, CultureInfo.InvariantCulture);
@ -95,254 +92,150 @@ namespace DiscordChatExporter.Core.Services
return $"{size:0.#} {units[unit]}";
}
private string FormatColor(Color color)
private string FormatMarkdownPlainText(IEnumerable<Node> nodes)
{
return $"{color.R},{color.G},{color.B},{color.A}";
}
private string FormatContentPlainText(string content)
{
// New lines
content = content.Replace("\n", Environment.NewLine);
// User mentions (<@id> and <@!id>)
var mentionedUserIds = Regex.Matches(content, "<@!?(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
var buffer = new StringBuilder();
foreach (var mentionedUserId in mentionedUserIds)
foreach (var node in nodes)
{
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId);
content = Regex.Replace(content, $"<@!?{mentionedUserId}>", $"@{mentionedUser.FullName}");
}
// Channel mentions (<#id>)
var mentionedChannelIds = Regex.Matches(content, "<#(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
if (node is FormattedNode formattedNode)
{
var innerText = FormatMarkdownPlainText(formattedNode.Children);
buffer.Append($"{formattedNode.Token}{innerText}{formattedNode.Token}");
}
foreach (var mentionedChannelId in mentionedChannelIds)
{
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId);
content = content.Replace($"<#{mentionedChannelId}>", $"#{mentionedChannel.Name}");
}
else if (node is MentionNode mentionNode && mentionNode.Type != MentionType.Meta)
{
if (mentionNode.Type == MentionType.User)
{
var user = _log.Mentionables.GetUser(mentionNode.Id);
buffer.Append($"@{user.Name}");
}
else if (mentionNode.Type == MentionType.Channel)
{
var channel = _log.Mentionables.GetChannel(mentionNode.Id);
buffer.Append($"#{channel.Name}");
}
else if (mentionNode.Type == MentionType.Role)
{
var role = _log.Mentionables.GetRole(mentionNode.Id);
buffer.Append($"@{role.Name}");
}
}
// Role mentions (<@&id>)
var mentionedRoleIds = Regex.Matches(content, "<@&(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
else if (node is EmojiNode emojiNode)
{
buffer.Append($":{emojiNode.Name}:");
}
foreach (var mentionedRoleId in mentionedRoleIds)
{
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId);
content = content.Replace($"<@&{mentionedRoleId}>", $"@{mentionedRole.Name}");
else
{
buffer.Append(node.Lexeme);
}
}
// Custom emojis (<:name:id>)
content = Regex.Replace(content, "<(:.*?:)\\d*>", "$1");
return content;
return buffer.ToString();
}
private string FormatContentHtml(string content, bool allowLinks = false)
{
// HTML-encode content
content = HtmlEncode(content);
// Encode multiline codeblocks (```text```)
content = Regex.Replace(content,
@"```+(?:[^`]*?\n)?([^`]+)\n?```+",
m => $"\x1AM{m.Groups[1].Value.Base64Encode()}\x1AM");
private string FormatMarkdownPlainText(string input)
=> FormatMarkdownPlainText(MarkdownParser.Parse(input));
// Encode inline codeblocks (`text`)
content = Regex.Replace(content,
@"`([^`]+)`",
m => $"\x1AI{m.Groups[1].Value.Base64Encode()}\x1AI");
private string FormatMarkdownHtml(IEnumerable<Node> nodes)
{
var buffer = new StringBuilder();
// Encode links
if (allowLinks)
foreach (var node in nodes)
{
content = Regex.Replace(content, @"\[(.*?)\]\((.*?)\)",
m => $"\x1AL{m.Groups[1].Value.Base64Encode()}|{m.Groups[2].Value.Base64Encode()}\x1AL");
}
// Encode URLs
content = Regex.Replace(content,
@"(\b(?:(?:https?|ftp|file)://|www\.|ftp\.)(?:\([-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];]*\)|[-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];])*(?:\([-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];]*\)|[-a-zA-Z0-9+&@#/%=~_|$]))",
m => $"\x1AU{m.Groups[1].Value.Base64Encode()}\x1AU");
// Process bold (**text**)
content = Regex.Replace(content, @"(\*\*)(?=\S)(.+?[*_]*)(?<=\S)\1", "<b>$2</b>");
// Process underline (__text__)
content = Regex.Replace(content, @"(__)(?=\S)(.+?)(?<=\S)\1", "<u>$2</u>");
// Process italic (*text* or _text_)
content = Regex.Replace(content, @"(\*|_)(?=\S)(.+?)(?<=\S)\1", "<i>$2</i>");
if (node is TextNode textNode)
{
buffer.Append(textNode.Text.HtmlEncode());
}
// Process strike through (~~text~~)
content = Regex.Replace(content, @"(~~)(?=\S)(.+?)(?<=\S)\1", "<s>$2</s>");
else if (node is FormattedNode formattedNode)
{
var innerHtml = FormatMarkdownHtml(formattedNode.Children);
// Decode and process multiline codeblocks
content = Regex.Replace(content, "\x1AM(.*?)\x1AM",
m => $"<div class=\"pre pre--multiline\">{m.Groups[1].Value.Base64Decode()}</div>");
if (formattedNode.Formatting == TextFormatting.Bold)
buffer.Append($"<strong>{innerHtml}</strong>");
// Decode and process inline codeblocks
content = Regex.Replace(content, "\x1AI(.*?)\x1AI",
m => $"<span class=\"pre pre--inline\">{m.Groups[1].Value.Base64Decode()}</span>");
else if (formattedNode.Formatting == TextFormatting.Italic)
buffer.Append($"<em>{innerHtml}</em>");
// Decode and process links
if (allowLinks)
{
content = Regex.Replace(content, "\x1AL(.*?)\\|(.*?)\x1AL",
m => $"<a href=\"{m.Groups[2].Value.Base64Decode()}\">{m.Groups[1].Value.Base64Decode()}</a>");
}
else if (formattedNode.Formatting == TextFormatting.Underline)
buffer.Append($"<u>{innerHtml}</u>");
// Decode and process URLs
content = Regex.Replace(content, "\x1AU(.*?)\x1AU",
m => $"<a href=\"{m.Groups[1].Value.Base64Decode()}\">{m.Groups[1].Value.Base64Decode()}</a>");
else if (formattedNode.Formatting == TextFormatting.Strikethrough)
buffer.Append($"<s>{innerHtml}</s>");
// Process new lines
content = content.Replace("\n", "<br />");
// Meta mentions (@everyone)
content = content.Replace("@everyone", "<span class=\"mention\">@everyone</span>");
// Meta mentions (@here)
content = content.Replace("@here", "<span class=\"mention\">@here</span>");
else if (formattedNode.Formatting == TextFormatting.Spoiler)
buffer.Append($"<span class=\"spoiler\">{innerHtml}</span>");
}
// User mentions (<@id> and <@!id>)
var mentionedUserIds = Regex.Matches(content, "&lt;@!?(\\d+)&gt;")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
else if (node is InlineCodeBlockNode inlineCodeBlockNode)
{
buffer.Append($"<span class=\"pre pre--inline\">{inlineCodeBlockNode.Code.HtmlEncode()}</span>");
}
foreach (var mentionedUserId in mentionedUserIds)
{
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId);
content = Regex.Replace(content, $"&lt;@!?{mentionedUserId}&gt;",
$"<span class=\"mention\" title=\"{HtmlEncode(mentionedUser.FullName)}\">" +
$"@{HtmlEncode(mentionedUser.Name)}" +
"</span>");
}
else if (node is MultilineCodeBlockNode multilineCodeBlockNode)
{
var languageCssClass = multilineCodeBlockNode.Language.IsNotBlank()
? "language-" + multilineCodeBlockNode.Language
: null;
// Channel mentions (<#id>)
var mentionedChannelIds = Regex.Matches(content, "&lt;#(\\d+)&gt;")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
buffer.Append(
$"<div class=\"pre pre--multiline {languageCssClass}\">{multilineCodeBlockNode.Code.HtmlEncode()}</div>");
}
foreach (var mentionedChannelId in mentionedChannelIds)
{
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId);
content = content.Replace($"&lt;#{mentionedChannelId}&gt;",
"<span class=\"mention\">" +
$"#{HtmlEncode(mentionedChannel.Name)}" +
"</span>");
}
else if (node is MentionNode mentionNode)
{
if (mentionNode.Type == MentionType.Meta)
{
buffer.Append($"<span class=\"mention\">@{mentionNode.Id.HtmlEncode()}</span>");
}
else if (mentionNode.Type == MentionType.User)
{
var user = _log.Mentionables.GetUser(mentionNode.Id);
buffer.Append($"<span class=\"mention\" title=\"{user.FullName}\">@{user.Name.HtmlEncode()}</span>");
}
else if (mentionNode.Type == MentionType.Channel)
{
var channel = _log.Mentionables.GetChannel(mentionNode.Id);
buffer.Append($"<span class=\"mention\">#{channel.Name.HtmlEncode()}</span>");
}
else if (mentionNode.Type == MentionType.Role)
{
var role = _log.Mentionables.GetRole(mentionNode.Id);
buffer.Append($"<span class=\"mention\">@{role.Name.HtmlEncode()}</span>");
}
}
// Role mentions (<@&id>)
var mentionedRoleIds = Regex.Matches(content, "&lt;@&amp;(\\d+)&gt;")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
else if (node is EmojiNode emojiNode)
{
buffer.Append($"<img class=\"emoji\" title=\"{emojiNode.Name}\" src=\"https://cdn.discordapp.com/emojis/{emojiNode.Id}.png\" />");
}
foreach (var mentionedRoleId in mentionedRoleIds)
{
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId);
content = content.Replace($"&lt;@&amp;{mentionedRoleId}&gt;",
"<span class=\"mention\">" +
$"@{HtmlEncode(mentionedRole.Name)}" +
"</span>");
else if (node is LinkNode linkNode)
{
buffer.Append($"<a href=\"{Uri.EscapeUriString(linkNode.Url)}\">{linkNode.Title.HtmlEncode()}</a>");
}
}
// Custom emojis (<:name:id>)
var isJumboable = Regex.Replace(content, "&lt;(:.*?:)(\\d*)&gt;", "").IsBlank();
var emojiClass = isJumboable ? "emoji emoji--large" : "emoji";
content = Regex.Replace(content, "&lt;(:.*?:)(\\d*)&gt;",
$"<img class=\"{emojiClass}\" title=\"$1\" src=\"https://cdn.discordapp.com/emojis/$2.png\" />");
return content;
return buffer.ToString();
}
private string FormatContentCsv(string content)
{
// Escape quotes
content = content.Replace("\"", "\"\"");
// Escape commas and semicolons
if (content.Contains(",") || content.Contains(";"))
content = $"\"{content}\"";
// User mentions (<@id> and <@!id>)
var mentionedUserIds = Regex.Matches(content, "<@!?(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
private string FormatMarkdownHtml(string input)
=> FormatMarkdownHtml(MarkdownParser.Parse(input));
foreach (var mentionedUserId in mentionedUserIds)
{
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId);
content = Regex.Replace(content, $"<@!?{mentionedUserId}>", $"@{mentionedUser.FullName}");
}
// Channel mentions (<#id>)
var mentionedChannelIds = Regex.Matches(content, "<#(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedChannelId in mentionedChannelIds)
{
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId);
content = content.Replace($"<#{mentionedChannelId}>", $"#{mentionedChannel.Name}");
}
// Role mentions (<@&id>)
var mentionedRoleIds = Regex.Matches(content, "<@&(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedRoleId in mentionedRoleIds)
{
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId);
content = content.Replace($"<@&{mentionedRoleId}>", $"@{mentionedRole.Name}");
}
// Custom emojis (<:name:id>)
content = Regex.Replace(content, "<(:.*?:)\\d*>", "$1");
return content;
}
private string FormatContent(string content, bool allowLinks = false)
private string FormatMarkdown(string input)
{
if (_format == ExportFormat.PlainText)
return FormatContentPlainText(content);
if (_format == ExportFormat.HtmlDark)
return FormatContentHtml(content, allowLinks);
if (_format == ExportFormat.HtmlLight)
return FormatContentHtml(content, allowLinks);
if (_format == ExportFormat.Csv)
return FormatContentCsv(content);
throw new ArgumentOutOfRangeException(nameof(_format));
return _format == ExportFormat.HtmlDark || _format == ExportFormat.HtmlLight
? FormatMarkdownHtml(input)
: FormatMarkdownPlainText(input);
}
public ScriptObject GetScriptObject()
@ -350,7 +243,7 @@ namespace DiscordChatExporter.Core.Services
// Create instance
var scriptObject = new ScriptObject();
// Import chat log
// Import model
scriptObject.SetValue("Model", _log, true);
// Import functions
@ -358,8 +251,7 @@ namespace DiscordChatExporter.Core.Services
scriptObject.Import(nameof(Format), new Func<IFormattable, string, string>(Format));
scriptObject.Import(nameof(FormatDate), new Func<DateTime, string>(FormatDate));
scriptObject.Import(nameof(FormatFileSize), new Func<long, string>(FormatFileSize));
scriptObject.Import(nameof(FormatColor), new Func<Color, string>(FormatColor));
scriptObject.Import(nameof(FormatContent), new Func<string, bool, string>(FormatContent));
scriptObject.Import(nameof(FormatMarkdown), new Func<string, string>(FormatMarkdown));
return scriptObject;
}

@ -16,6 +16,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DiscordChatExporter.Core",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DiscordChatExporter.Cli", "DiscordChatExporter.Cli\DiscordChatExporter.Cli.csproj", "{D08624B6-3081-4BCB-91F8-E9832FACC6CE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DiscordChatExporter.Core.Markdown", "DiscordChatExporter.Core.Markdown\DiscordChatExporter.Core.Markdown.csproj", "{14D02A08-E820-4012-B805-663B9A3D73E9}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -34,6 +36,10 @@ Global
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.Build.0 = Release|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

Loading…
Cancel
Save