Implement a more sophisticated markdown parsing engine (#145)

pull/162/head
Alexey Golub 6 years ago committed by GitHub
parent 88727a1fe6
commit f09f30c7bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net461</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Sprache" Version="2.2.0" />
<PackageReference Include="Tyrrrz.Extensions" Version="1.5.1" />
</ItemGroup>
</Project>

@ -0,0 +1,21 @@
namespace DiscordChatExporter.Core.Markdown
{
public class EmojiNode : Node
{
public string Id { get; }
public string Name { get; }
public bool IsAnimated { get; }
public EmojiNode(string lexeme, string id, string name, bool isAnimated)
: base(lexeme)
{
Id = id;
Name = name;
IsAnimated = isAnimated;
}
public override string ToString() => $"<Emoji> {Name}";
}
}

@ -0,0 +1,23 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown
{
public class FormattedNode : Node
{
public string Token { get; }
public TextFormatting Formatting { get; }
public IReadOnlyList<Node> Children { get; }
public FormattedNode(string lexeme, string token, TextFormatting formatting, IReadOnlyList<Node> children)
: base(lexeme)
{
Token = token;
Formatting = formatting;
Children = children;
}
public override string ToString() => $"<{Formatting}> ({Children.Count} direct children)";
}
}

@ -0,0 +1,15 @@
namespace DiscordChatExporter.Core.Markdown
{
public class InlineCodeBlockNode : Node
{
public string Code { get; }
public InlineCodeBlockNode(string lexeme, string code)
: base(lexeme)
{
Code = code;
}
public override string ToString() => $"<Code> {Code}";
}
}

@ -0,0 +1,157 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Sprache;
using Tyrrrz.Extensions;
namespace DiscordChatExporter.Core.Markdown.Internal
{
// The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
internal static class Grammar
{
/* Formatting */
// Capture until the earliest double asterisk not followed by an asterisk
private static readonly Parser<Node> BoldFormattedNode =
Parse.RegexMatch(new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "**", TextFormatting.Bold, BuildTree(m.Groups[1].Value)));
// Capture until the earliest single asterisk not preceded or followed by an asterisk
// Can't have whitespace right after opening or right before closing asterisk
private static readonly Parser<Node> ItalicFormattedNode =
Parse.RegexMatch(new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "*", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Can't have underscores inside
// Can't have word characters right after closing underscore
private static readonly Parser<Node> ItalicAltFormattedNode =
Parse.RegexMatch(new Regex("_([^_]+?)_(?!\\w)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "_", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Treated as a separate entity for simplicity
// Capture until the earliest triple asterisk not preceded or followed by an asterisk
private static readonly Parser<Node> ItalicBoldFormattedNode =
Parse.RegexMatch(new Regex("\\*(\\*\\*(?:.+?)\\*\\*)\\*(?!\\*)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "*", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Capture until the earliest double underscore not followed by an underscore
private static readonly Parser<Node> UnderlineFormattedNode =
Parse.RegexMatch(new Regex("__(.+?)__(?!_)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "__", TextFormatting.Underline, BuildTree(m.Groups[1].Value)));
// Treated as a separate entity for simplicity
// Capture until the earliest triple underscore not preceded or followed by an underscore
private static readonly Parser<Node> ItalicUnderlineFormattedNode =
Parse.RegexMatch(new Regex("_(__(?:.+?)__)_(?!_)", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "_", TextFormatting.Italic, BuildTree(m.Groups[1].Value)));
// Strikethrough is safe
private static readonly Parser<Node> StrikethroughFormattedNode =
Parse.RegexMatch(new Regex("~~(.+?)~~", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "~~", TextFormatting.Strikethrough, BuildTree(m.Groups[1].Value)));
// Spoiler is safe
private static readonly Parser<Node> SpoilerFormattedNode =
Parse.RegexMatch(new Regex("\\|\\|(.+?)\\|\\|", RegexOptions.Singleline))
.Select(m => new FormattedNode(m.Value, "||", TextFormatting.Spoiler, BuildTree(m.Groups[1].Value)));
// Aggregator, order matters
private static readonly Parser<Node> AnyFormattedNode =
ItalicBoldFormattedNode.Or(ItalicUnderlineFormattedNode)
.Or(BoldFormattedNode).Or(ItalicFormattedNode)
.Or(UnderlineFormattedNode).Or(ItalicAltFormattedNode)
.Or(StrikethroughFormattedNode).Or(SpoilerFormattedNode);
/* Code blocks */
// Can't have backticks inside and surrounding whitespace is trimmed
private static readonly Parser<Node> InlineCodeBlockNode =
Parse.RegexMatch(new Regex("`\\s*([^`]+?)\\s*`", RegexOptions.Singleline))
.Select(m => new InlineCodeBlockNode(m.Value, m.Groups[1].Value));
// The first word is a language identifier if it's the only word followed by a newline, the rest is code
private static readonly Parser<Node> MultilineCodeBlockNode =
Parse.RegexMatch(new Regex("```(?:(\\w*?)?(?:\\s*?\\n))?(.+)```", RegexOptions.Singleline))
.Select(m => new MultilineCodeBlockNode(m.Value, m.Groups[1].Value, m.Groups[2].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyCodeBlockNode = MultilineCodeBlockNode.Or(InlineCodeBlockNode);
/* Mentions */
// @everyone or @here
private static readonly Parser<Node> MetaMentionNode = Parse.RegexMatch("@(everyone|here)")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Meta));
// <@123456> or <@!123456>
private static readonly Parser<Node> UserMentionNode = Parse.RegexMatch("<@!?(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.User));
// <#123456>
private static readonly Parser<Node> ChannelMentionNode = Parse.RegexMatch("<#(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Channel));
// <@&123456>
private static readonly Parser<Node> RoleMentionNode = Parse.RegexMatch("<@&(\\d+)>")
.Select(m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Role));
// Aggregator, order matters
private static readonly Parser<Node> AnyMentionNode =
MetaMentionNode.Or(UserMentionNode).Or(ChannelMentionNode).Or(RoleMentionNode);
/* Emojis */
// <:lul:123456> or <a:lul:123456>
private static readonly Parser<Node> EmojiNode = Parse.RegexMatch("<(a)?:(.+):(\\d+)>")
.Select(m => new EmojiNode(m.Value, m.Groups[3].Value, m.Groups[2].Value, m.Groups[1].Value.IsNotBlank()));
// Aggregator, order matters
private static readonly Parser<Node> AnyEmojiNode = EmojiNode;
/* Links */
// [title](link)
private static readonly Parser<Node> TitledLinkNode = Parse.RegexMatch("\\[(.+)\\]\\((.+)\\)")
.Select(m => new LinkNode(m.Value, m.Groups[2].Value, m.Groups[1].Value));
// Starts with http:// or https://, stops at the last non-whitespace character followed by whitespace or punctuation character
private static readonly Parser<Node> AutoLinkNode = Parse.RegexMatch("(https?://\\S*[^\\.,:;\"\'\\s])")
.Select(m => new LinkNode(m.Value, m.Groups[1].Value));
// Autolink surrounded by angular brackets
private static readonly Parser<Node> HiddenLinkNode = Parse.RegexMatch("<(https?://\\S*[^\\.,:;\"\'\\s])>")
.Select(m => new LinkNode(m.Value, m.Groups[1].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyLinkNode = TitledLinkNode.Or(HiddenLinkNode).Or(AutoLinkNode);
/* Text */
// Shrug is an exception and needs to be exempt from formatting
private static readonly Parser<Node> ShrugTextNode =
Parse.String("¯\\_(ツ)_/¯").Text().Select(s => new TextNode(s));
// Backslash escapes any following non-whitespace character except for digits and latin letters
private static readonly Parser<Node> EscapedTextNode =
Parse.RegexMatch("\\\\([^a-zA-Z0-9\\s])").Select(m => new TextNode(m.Value, m.Groups[1].Value));
// Aggregator, order matters
private static readonly Parser<Node> AnyTextNode = ShrugTextNode.Or(EscapedTextNode);
/* Aggregator and fallback */
// Any node recognized by above patterns
private static readonly Parser<Node> AnyRecognizedNode = AnyFormattedNode.Or(AnyCodeBlockNode)
.Or(AnyMentionNode).Or(AnyEmojiNode).Or(AnyLinkNode).Or(AnyTextNode);
// Any node not recognized by above patterns (treated as plain text)
private static readonly Parser<Node> FallbackNode =
Parse.AnyChar.Except(AnyRecognizedNode).AtLeastOnce().Text().Select(s => new TextNode(s));
// Any node
private static readonly Parser<Node> AnyNode = AnyRecognizedNode.Or(FallbackNode);
// Entry point
public static IReadOnlyList<Node> BuildTree(string input) => AnyNode.Many().Parse(input).ToArray();
}
}

@ -0,0 +1,22 @@
namespace DiscordChatExporter.Core.Markdown
{
public class LinkNode : Node
{
public string Url { get; }
public string Title { get; }
public LinkNode(string lexeme, string url, string title)
: base(lexeme)
{
Url = url;
Title = title;
}
public LinkNode(string lexeme, string url) : this(lexeme, url, url)
{
}
public override string ToString() => $"<Link> {Title}";
}
}

@ -0,0 +1,10 @@
using System.Collections.Generic;
using DiscordChatExporter.Core.Markdown.Internal;
namespace DiscordChatExporter.Core.Markdown
{
public static class MarkdownParser
{
public static IReadOnlyList<Node> Parse(string input) => Grammar.BuildTree(input);
}
}

@ -0,0 +1,18 @@
namespace DiscordChatExporter.Core.Markdown
{
public class MentionNode : Node
{
public string Id { get; }
public MentionType Type { get; }
public MentionNode(string lexeme, string id, MentionType type)
: base(lexeme)
{
Id = id;
Type = type;
}
public override string ToString() => $"<{Type} mention> {Id}";
}
}

@ -0,0 +1,10 @@
namespace DiscordChatExporter.Core.Markdown
{
public enum MentionType
{
Meta,
User,
Channel,
Role
}
}

@ -0,0 +1,18 @@
namespace DiscordChatExporter.Core.Markdown
{
public class MultilineCodeBlockNode : Node
{
public string Language { get; }
public string Code { get; }
public MultilineCodeBlockNode(string lexeme, string language, string code)
: base(lexeme)
{
Language = language;
Code = code;
}
public override string ToString() => $"<Code [{Language}]> {Code}";
}
}

@ -0,0 +1,12 @@
namespace DiscordChatExporter.Core.Markdown
{
public abstract class Node
{
public string Lexeme { get; }
protected Node(string lexeme)
{
Lexeme = lexeme;
}
}
}

@ -0,0 +1,11 @@
namespace DiscordChatExporter.Core.Markdown
{
public enum TextFormatting
{
Bold,
Italic,
Underline,
Strikethrough,
Spoiler
}
}

@ -0,0 +1,19 @@
namespace DiscordChatExporter.Core.Markdown
{
public class TextNode : Node
{
public string Text { get; }
public TextNode(string lexeme, string text)
: base(lexeme)
{
Text = text;
}
public TextNode(string text) : this(text, text)
{
}
public override string ToString() => Text;
}
}

@ -2,18 +2,17 @@
<PropertyGroup> <PropertyGroup>
<TargetFramework>net461</TargetFramework> <TargetFramework>net461</TargetFramework>
<Version>2.9.1</Version>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<EmbeddedResource Include="Resources\ExportTemplates\PlainText.txt" /> <EmbeddedResource Include="Resources\ExportTemplates\PlainText\Template.txt" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlDark.html" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlDark\Template.html" />
<EmbeddedResource Include="Resources\ExportTemplates\HtmlLight.html" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlLight\Template.html" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\Core.html" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlShared\Main.html" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\Shared.css" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlShared\Main.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\DarkTheme.css" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlDark\Theme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Html\LightTheme.css" /> <EmbeddedResource Include="Resources\ExportTemplates\HtmlLight\Theme.css" />
<EmbeddedResource Include="Resources\ExportTemplates\Csv.csv" /> <EmbeddedResource Include="Resources\ExportTemplates\Csv\Template.csv" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
@ -29,4 +28,8 @@
<PackageReference Include="Tyrrrz.Settings" Version="1.3.3" /> <PackageReference Include="Tyrrrz.Settings" Version="1.3.3" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<ProjectReference Include="..\DiscordChatExporter.Core.Markdown\DiscordChatExporter.Core.Markdown.csproj" />
</ItemGroup>
</Project> </Project>

@ -1,6 +1,6 @@
using System; using System;
using System.Drawing; using System.Drawing;
using Tyrrrz.Extensions; using System.Net;
namespace DiscordChatExporter.Core.Internal namespace DiscordChatExporter.Core.Internal
{ {
@ -14,10 +14,8 @@ namespace DiscordChatExporter.Core.Internal
return value.ToString(); return value.ToString();
} }
public static string Base64Encode(this string str) => str.GetBytes().ToBase64();
public static string Base64Decode(this string str) => str.FromBase64().GetString();
public static Color ResetAlpha(this Color color) => Color.FromArgb(1, color); public static Color ResetAlpha(this Color color) => Color.FromArgb(1, color);
public static string HtmlEncode(this string value) => WebUtility.HtmlEncode(value);
} }
} }

@ -4,7 +4,7 @@
{{- }}"{{ message.Timestamp | FormatDate }}"; {{- }}"{{ message.Timestamp | FormatDate }}";
{{- }}"{{ message.Content | FormatContent }}"; {{- }}"{{ message.Content | FormatMarkdown | string.replace "\"" "\"\"" }}";
{{- }}"{{ message.Attachments | array.map "Url" | array.join "," }}"; {{- }}"{{ message.Attachments | array.map "Url" | array.join "," }}";
{{~ end -}} {{~ end -}}
Can't render this file because it has a wrong number of fields in line 2.

@ -1,7 +0,0 @@
{{
$SharedStyleSheet = include "Html.Shared.css"
$ThemeStyleSheet = include "Html.DarkTheme.css"
StyleSheet = $SharedStyleSheet + "\n" + $ThemeStyleSheet
}}
{{ include "Html.Core.html" }}

@ -0,0 +1,2 @@
{{~ ThemeStyleSheet = include "HtmlDark.Theme.css" ~}}
{{~ include "HtmlShared.Main.html" ~}}

@ -2,13 +2,17 @@
body { body {
background-color: #36393e; background-color: #36393e;
color: #ffffffb3; color: #dcddde;
} }
a { a {
color: #0096cf; color: #0096cf;
} }
.spoiler {
background-color: rgba(255, 255, 255, 0.1);
}
.pre { .pre {
background-color: #2f3136; background-color: #2f3136;
} }
@ -19,7 +23,6 @@ a {
} }
.mention { .mention {
background-color: #738bd71a;
color: #7289da; color: #7289da;
} }
@ -40,7 +43,7 @@ a {
/* === CHATLOG === */ /* === CHATLOG === */
.chatlog__message-group { .chatlog__message-group {
border-color: #ffffff0a; border-color: rgba(255, 255, 255, 0.1);
} }
.chatlog__author-name { .chatlog__author-name {
@ -48,16 +51,16 @@ a {
} }
.chatlog__timestamp { .chatlog__timestamp {
color: #ffffff33; color: rgba(255, 255, 255, 0.2);
} }
.chatlog__edited-timestamp { .chatlog__edited-timestamp {
color: #ffffff33; color: rgba(255, 255, 255, 0.2);
} }
.chatlog__embed-content-container { .chatlog__embed-content-container {
background-color: #2e30364d; background-color: rgba(46, 48, 54, 0.3);
border-color: #2e303699; border-color: rgba(46, 48, 54, 0.6);
} }
.chatlog__embed-author-name { .chatlog__embed-author-name {
@ -73,7 +76,7 @@ a {
} }
.chatlog__embed-description { .chatlog__embed-description {
color: #ffffff99; color: rgba(255, 255, 255, 0.6);
} }
.chatlog__embed-field-name { .chatlog__embed-field-name {
@ -81,17 +84,17 @@ a {
} }
.chatlog__embed-field-value { .chatlog__embed-field-value {
color: #ffffff99; color: rgba(255, 255, 255, 0.6);
} }
.chatlog__embed-footer { .chatlog__embed-footer {
color: #ffffff99; color: rgba(255, 255, 255, 0.6);
} }
.chatlog__reaction { .chatlog__reaction {
background-color: #ffffff0a; background-color: rgba(255, 255, 255, 0.05);
} }
.chatlog__reaction-count { .chatlog__reaction-count {
color: #ffffff4d; color: rgba(255, 255, 255, 0.3);
} }

@ -1,7 +0,0 @@
{{
$SharedStyleSheet = include "Html.Shared.css"
$ThemeStyleSheet = include "Html.LightTheme.css"
StyleSheet = $SharedStyleSheet + "\n" + $ThemeStyleSheet
}}
{{ include "Html.Core.html" }}

@ -0,0 +1,2 @@
{{~ ThemeStyleSheet = include "HtmlLight.Theme.css" ~}}
{{~ include "HtmlShared.Main.html" ~}}

@ -2,13 +2,17 @@
body { body {
background-color: #ffffff; background-color: #ffffff;
color: #737f8d; color: #747f8d;
} }
a { a {
color: #00b0f4; color: #00b0f4;
} }
.spoiler {
background-color: rgba(0, 0, 0, 0.1);
}
.pre { .pre {
background-color: #f9f9f9; background-color: #f9f9f9;
} }
@ -56,8 +60,8 @@ a {
} }
.chatlog__embed-content-container { .chatlog__embed-content-container {
background-color: #f9f9f94d; background-color: rgba(249, 249, 249, 0.3);
border-color: #cccccc4d; border-color: rgba(204, 204, 204, 0.3);
} }
.chatlog__embed-author-name { .chatlog__embed-author-name {
@ -85,11 +89,11 @@ a {
} }
.chatlog__embed-footer { .chatlog__embed-footer {
color: #4f535b99; color: rgba(79, 83, 91, 0.4);
} }
.chatlog__reaction { .chatlog__reaction {
background-color: #4f545c0f; background-color: rgba(79, 84, 92, 0.06);
} }
.chatlog__reaction-count { .chatlog__reaction-count {

@ -17,9 +17,17 @@ img {
object-fit: contain; object-fit: contain;
} }
.markdown {
white-space: pre-wrap;
line-height: 1.3;
}
.spoiler {
border-radius: 3px;
}
.pre { .pre {
font-family: "Consolas", "Courier New", Courier, Monospace; font-family: "Consolas", "Courier New", Courier, Monospace;
white-space: pre-wrap;
} }
.pre--multiline { .pre--multiline {
@ -34,6 +42,10 @@ img {
border-radius: 3px; border-radius: 3px;
} }
.mention {
font-weight: 500;
}
.emoji { .emoji {
width: 24px; width: 24px;
height: 24px; height: 24px;
@ -51,10 +63,6 @@ img {
height: 32px; height: 32px;
} }
.mention {
font-weight: 600;
}
/* === INFO === */ /* === INFO === */
.info { .info {
@ -130,6 +138,7 @@ img {
.chatlog__author-name { .chatlog__author-name {
font-size: 1em; font-size: 1em;
font-weight: 500;
} }
.chatlog__timestamp { .chatlog__timestamp {
@ -144,7 +153,7 @@ img {
} }
.chatlog__edited-timestamp { .chatlog__edited-timestamp {
margin-left: 5px; margin-left: 3px;
font-size: .8em; font-size: .8em;
} }

@ -6,7 +6,10 @@
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta name="viewport" content="width=device-width" /> <meta name="viewport" content="width=device-width" />
<style> <style>
{{ StyleSheet }} {{ include "HtmlShared.Main.css" }}
</style>
<style>
{{ ThemeStyleSheet }}
</style> </style>
</head> </head>
<body> <body>
@ -58,7 +61,7 @@
{{~ # Content ~}} {{~ # Content ~}}
{{~ if message.Content ~}} {{~ if message.Content ~}}
<div class="chatlog__content"> <div class="chatlog__content">
{{ message.Content | FormatContent }} <span class="markdown">{{ message.Content | FormatMarkdown }}</span>
{{~ # Edited timestamp ~}} {{~ # Edited timestamp ~}}
{{~ if message.EditedTimestamp ~}} {{~ if message.EditedTimestamp ~}}
@ -85,7 +88,7 @@
{{~ # Embeds ~}} {{~ # Embeds ~}}
{{~ for embed in message.Embeds ~}} {{~ for embed in message.Embeds ~}}
<div class="chatlog__embed"> <div class="chatlog__embed">
<div class="chatlog__embed-color-pill" style="background-color: rgba({{ embed.Color | FormatColor }})"></div> <div class="chatlog__embed-color-pill" style="background-color: rgba({{ embed.Color.R }},{{ embed.Color.G }},{{ embed.Color.B }},{{ embed.Color.A }})"></div>
<div class="chatlog__embed-content-container"> <div class="chatlog__embed-content-container">
<div class="chatlog__embed-content"> <div class="chatlog__embed-content">
<div class="chatlog__embed-text"> <div class="chatlog__embed-text">
@ -112,16 +115,16 @@
{{~ if embed.Title ~}} {{~ if embed.Title ~}}
<div class="chatlog__embed-title"> <div class="chatlog__embed-title">
{{~ if embed.Url ~}} {{~ if embed.Url ~}}
<a class="chatlog__embed-title-link" href="{{ embed.Url }}">{{ embed.Title | FormatContent }}</a> <a class="chatlog__embed-title-link" href="{{ embed.Url }}"><span class="markdown">{{ embed.Title | FormatMarkdown }}</span></a>
{{~ else ~}} {{~ else ~}}
{{ embed.Title | FormatContent }} <span class="markdown">{{ embed.Title | FormatMarkdown }}</span>
{{~ end ~}} {{~ end ~}}
</div> </div>
{{~ end ~}} {{~ end ~}}
{{~ # Description ~}} {{~ # Description ~}}
{{~ if embed.Description ~}} {{~ if embed.Description ~}}
<div class="chatlog__embed-description">{{ embed.Description | FormatContent true }}</div> <div class="chatlog__embed-description"><span class="markdown">{{ embed.Description | FormatMarkdown }}</span></div>
{{~ end ~}} {{~ end ~}}
{{~ # Fields ~}} {{~ # Fields ~}}
@ -130,10 +133,10 @@
{{~ for field in embed.Fields ~}} {{~ for field in embed.Fields ~}}
<div class="chatlog__embed-field {{ if field.IsInline }} chatlog__embed-field--inline {{ end }}"> <div class="chatlog__embed-field {{ if field.IsInline }} chatlog__embed-field--inline {{ end }}">
{{~ if field.Name ~}} {{~ if field.Name ~}}
<div class="chatlog__embed-field-name">{{ field.Name | FormatContent }}</div> <div class="chatlog__embed-field-name"><span class="markdown">{{ field.Name | FormatMarkdown }}</span></div>
{{~ end ~}} {{~ end ~}}
{{~ if field.Value ~}} {{~ if field.Value ~}}
<div class="chatlog__embed-field-value">{{ field.Value | FormatContent true }}</div> <div class="chatlog__embed-field-value"><span class="markdown">{{ field.Value | FormatMarkdown }}</span></div>
{{~ end ~}} {{~ end ~}}
</div> </div>
{{~ end ~}} {{~ end ~}}

@ -12,7 +12,7 @@ Range: {{ if Model.From }}{{ Model.From | FormatDate }} {{ end }}{{ if Model.
{{~ # Author name and timestamp ~}} {{~ # Author name and timestamp ~}}
{{~ }}[{{ message.Timestamp | FormatDate }}] {{ message.Author.FullName }} {{~ }}[{{ message.Timestamp | FormatDate }}] {{ message.Author.FullName }}
{{~ # Content ~}} {{~ # Content ~}}
{{~ message.Content | FormatContent }} {{~ message.Content | FormatMarkdown }}
{{~ # Attachments ~}} {{~ # Attachments ~}}
{{~ for attachment in message.Attachments ~}} {{~ for attachment in message.Attachments ~}}
{{~ attachment.Url }} {{~ attachment.Url }}

@ -20,7 +20,7 @@ namespace DiscordChatExporter.Core.Services
public string GetPath(ExportFormat format) public string GetPath(ExportFormat format)
{ {
return $"{ResourceRootNamespace}.{format}.{format.GetFileExtension()}"; return $"{ResourceRootNamespace}.{format}.Template.{format.GetFileExtension()}";
} }
public string Load(TemplateContext context, SourceSpan callerSpan, string templatePath) public string Load(TemplateContext context, SourceSpan callerSpan, string templatePath)

@ -1,11 +1,10 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Drawing;
using System.Globalization; using System.Globalization;
using System.Linq; using System.Linq;
using System.Net; using System.Text;
using System.Text.RegularExpressions;
using DiscordChatExporter.Core.Internal; using DiscordChatExporter.Core.Internal;
using DiscordChatExporter.Core.Markdown;
using DiscordChatExporter.Core.Models; using DiscordChatExporter.Core.Models;
using Scriban.Runtime; using Scriban.Runtime;
using Tyrrrz.Extensions; using Tyrrrz.Extensions;
@ -73,8 +72,6 @@ namespace DiscordChatExporter.Core.Services
} }
} }
private string HtmlEncode(string str) => WebUtility.HtmlEncode(str);
private string Format(IFormattable obj, string format) => private string Format(IFormattable obj, string format) =>
obj.ToString(format, CultureInfo.InvariantCulture); obj.ToString(format, CultureInfo.InvariantCulture);
@ -95,254 +92,150 @@ namespace DiscordChatExporter.Core.Services
return $"{size:0.#} {units[unit]}"; return $"{size:0.#} {units[unit]}";
} }
private string FormatColor(Color color) private string FormatMarkdownPlainText(IEnumerable<Node> nodes)
{ {
return $"{color.R},{color.G},{color.B},{color.A}"; var buffer = new StringBuilder();
}
private string FormatContentPlainText(string content)
{
// New lines
content = content.Replace("\n", Environment.NewLine);
// User mentions (<@id> and <@!id>)
var mentionedUserIds = Regex.Matches(content, "<@!?(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedUserId in mentionedUserIds) foreach (var node in nodes)
{ {
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId); if (node is FormattedNode formattedNode)
content = Regex.Replace(content, $"<@!?{mentionedUserId}>", $"@{mentionedUser.FullName}"); {
} var innerText = FormatMarkdownPlainText(formattedNode.Children);
buffer.Append($"{formattedNode.Token}{innerText}{formattedNode.Token}");
// Channel mentions (<#id>) }
var mentionedChannelIds = Regex.Matches(content, "<#(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedChannelId in mentionedChannelIds) else if (node is MentionNode mentionNode && mentionNode.Type != MentionType.Meta)
{ {
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId); if (mentionNode.Type == MentionType.User)
content = content.Replace($"<#{mentionedChannelId}>", $"#{mentionedChannel.Name}"); {
} var user = _log.Mentionables.GetUser(mentionNode.Id);
buffer.Append($"@{user.Name}");
}
else if (mentionNode.Type == MentionType.Channel)
{
var channel = _log.Mentionables.GetChannel(mentionNode.Id);
buffer.Append($"#{channel.Name}");
}
else if (mentionNode.Type == MentionType.Role)
{
var role = _log.Mentionables.GetRole(mentionNode.Id);
buffer.Append($"@{role.Name}");
}
}
// Role mentions (<@&id>) else if (node is EmojiNode emojiNode)
var mentionedRoleIds = Regex.Matches(content, "<@&(\\d+)>") {
.Cast<Match>() buffer.Append($":{emojiNode.Name}:");
.Select(m => m.Groups[1].Value) }
.ExceptBlank()
.ToArray();
foreach (var mentionedRoleId in mentionedRoleIds) else
{ {
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId); buffer.Append(node.Lexeme);
content = content.Replace($"<@&{mentionedRoleId}>", $"@{mentionedRole.Name}"); }
} }
// Custom emojis (<:name:id>) return buffer.ToString();
content = Regex.Replace(content, "<(:.*?:)\\d*>", "$1");
return content;
} }
private string FormatContentHtml(string content, bool allowLinks = false) private string FormatMarkdownPlainText(string input)
{ => FormatMarkdownPlainText(MarkdownParser.Parse(input));
// HTML-encode content
content = HtmlEncode(content);
// Encode multiline codeblocks (```text```)
content = Regex.Replace(content,
@"```+(?:[^`]*?\n)?([^`]+)\n?```+",
m => $"\x1AM{m.Groups[1].Value.Base64Encode()}\x1AM");
// Encode inline codeblocks (`text`) private string FormatMarkdownHtml(IEnumerable<Node> nodes)
content = Regex.Replace(content, {
@"`([^`]+)`", var buffer = new StringBuilder();
m => $"\x1AI{m.Groups[1].Value.Base64Encode()}\x1AI");
// Encode links foreach (var node in nodes)
if (allowLinks)
{ {
content = Regex.Replace(content, @"\[(.*?)\]\((.*?)\)", if (node is TextNode textNode)
m => $"\x1AL{m.Groups[1].Value.Base64Encode()}|{m.Groups[2].Value.Base64Encode()}\x1AL"); {
} buffer.Append(textNode.Text.HtmlEncode());
}
// Encode URLs
content = Regex.Replace(content,
@"(\b(?:(?:https?|ftp|file)://|www\.|ftp\.)(?:\([-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];]*\)|[-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];])*(?:\([-a-zA-Z0-9+&@#/%?=~_|!:,\.\[\];]*\)|[-a-zA-Z0-9+&@#/%=~_|$]))",
m => $"\x1AU{m.Groups[1].Value.Base64Encode()}\x1AU");
// Process bold (**text**)
content = Regex.Replace(content, @"(\*\*)(?=\S)(.+?[*_]*)(?<=\S)\1", "<b>$2</b>");
// Process underline (__text__)
content = Regex.Replace(content, @"(__)(?=\S)(.+?)(?<=\S)\1", "<u>$2</u>");
// Process italic (*text* or _text_)
content = Regex.Replace(content, @"(\*|_)(?=\S)(.+?)(?<=\S)\1", "<i>$2</i>");
// Process strike through (~~text~~) else if (node is FormattedNode formattedNode)
content = Regex.Replace(content, @"(~~)(?=\S)(.+?)(?<=\S)\1", "<s>$2</s>"); {
var innerHtml = FormatMarkdownHtml(formattedNode.Children);
// Decode and process multiline codeblocks if (formattedNode.Formatting == TextFormatting.Bold)
content = Regex.Replace(content, "\x1AM(.*?)\x1AM", buffer.Append($"<strong>{innerHtml}</strong>");
m => $"<div class=\"pre pre--multiline\">{m.Groups[1].Value.Base64Decode()}</div>");
// Decode and process inline codeblocks else if (formattedNode.Formatting == TextFormatting.Italic)
content = Regex.Replace(content, "\x1AI(.*?)\x1AI", buffer.Append($"<em>{innerHtml}</em>");
m => $"<span class=\"pre pre--inline\">{m.Groups[1].Value.Base64Decode()}</span>");
// Decode and process links else if (formattedNode.Formatting == TextFormatting.Underline)
if (allowLinks) buffer.Append($"<u>{innerHtml}</u>");
{
content = Regex.Replace(content, "\x1AL(.*?)\\|(.*?)\x1AL",
m => $"<a href=\"{m.Groups[2].Value.Base64Decode()}\">{m.Groups[1].Value.Base64Decode()}</a>");
}
// Decode and process URLs else if (formattedNode.Formatting == TextFormatting.Strikethrough)
content = Regex.Replace(content, "\x1AU(.*?)\x1AU", buffer.Append($"<s>{innerHtml}</s>");
m => $"<a href=\"{m.Groups[1].Value.Base64Decode()}\">{m.Groups[1].Value.Base64Decode()}</a>");
// Process new lines else if (formattedNode.Formatting == TextFormatting.Spoiler)
content = content.Replace("\n", "<br />"); buffer.Append($"<span class=\"spoiler\">{innerHtml}</span>");
}
// Meta mentions (@everyone)
content = content.Replace("@everyone", "<span class=\"mention\">@everyone</span>");
// Meta mentions (@here)
content = content.Replace("@here", "<span class=\"mention\">@here</span>");
// User mentions (<@id> and <@!id>) else if (node is InlineCodeBlockNode inlineCodeBlockNode)
var mentionedUserIds = Regex.Matches(content, "&lt;@!?(\\d+)&gt;") {
.Cast<Match>() buffer.Append($"<span class=\"pre pre--inline\">{inlineCodeBlockNode.Code.HtmlEncode()}</span>");
.Select(m => m.Groups[1].Value) }
.ExceptBlank()
.ToArray();
foreach (var mentionedUserId in mentionedUserIds) else if (node is MultilineCodeBlockNode multilineCodeBlockNode)
{ {
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId); var languageCssClass = multilineCodeBlockNode.Language.IsNotBlank()
content = Regex.Replace(content, $"&lt;@!?{mentionedUserId}&gt;", ? "language-" + multilineCodeBlockNode.Language
$"<span class=\"mention\" title=\"{HtmlEncode(mentionedUser.FullName)}\">" + : null;
$"@{HtmlEncode(mentionedUser.Name)}" +
"</span>");
}
// Channel mentions (<#id>) buffer.Append(
var mentionedChannelIds = Regex.Matches(content, "&lt;#(\\d+)&gt;") $"<div class=\"pre pre--multiline {languageCssClass}\">{multilineCodeBlockNode.Code.HtmlEncode()}</div>");
.Cast<Match>() }
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedChannelId in mentionedChannelIds) else if (node is MentionNode mentionNode)
{ {
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId); if (mentionNode.Type == MentionType.Meta)
content = content.Replace($"&lt;#{mentionedChannelId}&gt;", {
"<span class=\"mention\">" + buffer.Append($"<span class=\"mention\">@{mentionNode.Id.HtmlEncode()}</span>");
$"#{HtmlEncode(mentionedChannel.Name)}" + }
"</span>");
} else if (mentionNode.Type == MentionType.User)
{
var user = _log.Mentionables.GetUser(mentionNode.Id);
buffer.Append($"<span class=\"mention\" title=\"{user.FullName}\">@{user.Name.HtmlEncode()}</span>");
}
else if (mentionNode.Type == MentionType.Channel)
{
var channel = _log.Mentionables.GetChannel(mentionNode.Id);
buffer.Append($"<span class=\"mention\">#{channel.Name.HtmlEncode()}</span>");
}
else if (mentionNode.Type == MentionType.Role)
{
var role = _log.Mentionables.GetRole(mentionNode.Id);
buffer.Append($"<span class=\"mention\">@{role.Name.HtmlEncode()}</span>");
}
}
// Role mentions (<@&id>) else if (node is EmojiNode emojiNode)
var mentionedRoleIds = Regex.Matches(content, "&lt;@&amp;(\\d+)&gt;") {
.Cast<Match>() buffer.Append($"<img class=\"emoji\" title=\"{emojiNode.Name}\" src=\"https://cdn.discordapp.com/emojis/{emojiNode.Id}.png\" />");
.Select(m => m.Groups[1].Value) }
.ExceptBlank()
.ToArray();
foreach (var mentionedRoleId in mentionedRoleIds) else if (node is LinkNode linkNode)
{ {
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId); buffer.Append($"<a href=\"{Uri.EscapeUriString(linkNode.Url)}\">{linkNode.Title.HtmlEncode()}</a>");
content = content.Replace($"&lt;@&amp;{mentionedRoleId}&gt;", }
"<span class=\"mention\">" +
$"@{HtmlEncode(mentionedRole.Name)}" +
"</span>");
} }
// Custom emojis (<:name:id>) return buffer.ToString();
var isJumboable = Regex.Replace(content, "&lt;(:.*?:)(\\d*)&gt;", "").IsBlank();
var emojiClass = isJumboable ? "emoji emoji--large" : "emoji";
content = Regex.Replace(content, "&lt;(:.*?:)(\\d*)&gt;",
$"<img class=\"{emojiClass}\" title=\"$1\" src=\"https://cdn.discordapp.com/emojis/$2.png\" />");
return content;
} }
private string FormatContentCsv(string content) private string FormatMarkdownHtml(string input)
{ => FormatMarkdownHtml(MarkdownParser.Parse(input));
// Escape quotes
content = content.Replace("\"", "\"\"");
// Escape commas and semicolons
if (content.Contains(",") || content.Contains(";"))
content = $"\"{content}\"";
// User mentions (<@id> and <@!id>)
var mentionedUserIds = Regex.Matches(content, "<@!?(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedUserId in mentionedUserIds) private string FormatMarkdown(string input)
{
var mentionedUser = _log.Mentionables.GetUser(mentionedUserId);
content = Regex.Replace(content, $"<@!?{mentionedUserId}>", $"@{mentionedUser.FullName}");
}
// Channel mentions (<#id>)
var mentionedChannelIds = Regex.Matches(content, "<#(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedChannelId in mentionedChannelIds)
{
var mentionedChannel = _log.Mentionables.GetChannel(mentionedChannelId);
content = content.Replace($"<#{mentionedChannelId}>", $"#{mentionedChannel.Name}");
}
// Role mentions (<@&id>)
var mentionedRoleIds = Regex.Matches(content, "<@&(\\d+)>")
.Cast<Match>()
.Select(m => m.Groups[1].Value)
.ExceptBlank()
.ToArray();
foreach (var mentionedRoleId in mentionedRoleIds)
{
var mentionedRole = _log.Mentionables.GetRole(mentionedRoleId);
content = content.Replace($"<@&{mentionedRoleId}>", $"@{mentionedRole.Name}");
}
// Custom emojis (<:name:id>)
content = Regex.Replace(content, "<(:.*?:)\\d*>", "$1");
return content;
}
private string FormatContent(string content, bool allowLinks = false)
{ {
if (_format == ExportFormat.PlainText) return _format == ExportFormat.HtmlDark || _format == ExportFormat.HtmlLight
return FormatContentPlainText(content); ? FormatMarkdownHtml(input)
: FormatMarkdownPlainText(input);
if (_format == ExportFormat.HtmlDark)
return FormatContentHtml(content, allowLinks);
if (_format == ExportFormat.HtmlLight)
return FormatContentHtml(content, allowLinks);
if (_format == ExportFormat.Csv)
return FormatContentCsv(content);
throw new ArgumentOutOfRangeException(nameof(_format));
} }
public ScriptObject GetScriptObject() public ScriptObject GetScriptObject()
@ -350,7 +243,7 @@ namespace DiscordChatExporter.Core.Services
// Create instance // Create instance
var scriptObject = new ScriptObject(); var scriptObject = new ScriptObject();
// Import chat log // Import model
scriptObject.SetValue("Model", _log, true); scriptObject.SetValue("Model", _log, true);
// Import functions // Import functions
@ -358,8 +251,7 @@ namespace DiscordChatExporter.Core.Services
scriptObject.Import(nameof(Format), new Func<IFormattable, string, string>(Format)); scriptObject.Import(nameof(Format), new Func<IFormattable, string, string>(Format));
scriptObject.Import(nameof(FormatDate), new Func<DateTime, string>(FormatDate)); scriptObject.Import(nameof(FormatDate), new Func<DateTime, string>(FormatDate));
scriptObject.Import(nameof(FormatFileSize), new Func<long, string>(FormatFileSize)); scriptObject.Import(nameof(FormatFileSize), new Func<long, string>(FormatFileSize));
scriptObject.Import(nameof(FormatColor), new Func<Color, string>(FormatColor)); scriptObject.Import(nameof(FormatMarkdown), new Func<string, string>(FormatMarkdown));
scriptObject.Import(nameof(FormatContent), new Func<string, bool, string>(FormatContent));
return scriptObject; return scriptObject;
} }

@ -16,6 +16,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DiscordChatExporter.Core",
EndProject EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DiscordChatExporter.Cli", "DiscordChatExporter.Cli\DiscordChatExporter.Cli.csproj", "{D08624B6-3081-4BCB-91F8-E9832FACC6CE}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DiscordChatExporter.Cli", "DiscordChatExporter.Cli\DiscordChatExporter.Cli.csproj", "{D08624B6-3081-4BCB-91F8-E9832FACC6CE}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DiscordChatExporter.Core.Markdown", "DiscordChatExporter.Core.Markdown\DiscordChatExporter.Core.Markdown.csproj", "{14D02A08-E820-4012-B805-663B9A3D73E9}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
@ -34,6 +36,10 @@ Global
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Debug|Any CPU.Build.0 = Debug|Any CPU {D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.ActiveCfg = Release|Any CPU {D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.Build.0 = Release|Any CPU {D08624B6-3081-4BCB-91F8-E9832FACC6CE}.Release|Any CPU.Build.0 = Release|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{14D02A08-E820-4012-B805-663B9A3D73E9}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

Loading…
Cancel
Save