[HTML] Recognize standard emoji by code and show emoji code in tooltips

Closes #549 Closes #599
3 years ago · 24a80f915f
parent de57cd714d
commit 24a80f915f
14 changed files with 8960 additions and 77 deletions
--- a/DiscordChatExporter.Core/Discord/Data/Emoji.cs
+++ b/DiscordChatExporter.Core/Discord/Data/Emoji.cs
@ -1,7 +1,7 @@
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
+using System.Linq;
 using System.Text.Json;
+using DiscordChatExporter.Core.Utils;
+using DiscordChatExporter.Core.Utils.Extensions;
 using JsonExtensions.Reading;
 using Tyrrrz.Extensions;

@ -10,10 +10,17 @@ namespace DiscordChatExporter.Core.Discord.Data
    // https://discord.com/developers/docs/resources/emoji#emoji-object
    public partial class Emoji
    {
+        // Only present on custom emoji
        public string? Id { get; }

+        // Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
        public string Name { get; }

+        // Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
+        public string Code => !string.IsNullOrWhiteSpace(Id)
+            ? Name
+            : EmojiIndex.TryGetCode(Name) ?? Name;
+
        public bool IsAnimated { get; }

        public string ImageUrl { get; }
@ -31,21 +38,12 @@ namespace DiscordChatExporter.Core.Discord.Data

    public partial class Emoji
    {
-        private static IEnumerable<Rune> GetRunes(string emoji)
-        {
-            var lastIndex = 0;
-            while (lastIndex < emoji.Length && Rune.TryGetRuneAt(emoji, lastIndex, out var rune))
-            {
-                // Skip variant selector rune
-                if (rune.Value != 0xfe0f)
-                    yield return rune;
-
-                lastIndex += rune.Utf16SequenceLength;
-            }
-        }
-
-        private static string GetTwemojiName(IEnumerable<Rune> runes) =>
-            runes.Select(r => r.Value.ToString("x")).JoinToString("-");
+        private static string GetTwemojiName(string name) => name
+            .GetRunes()
+            // Variant selector rune is skipped in Twemoji names
+            .Where(r => r.Value != 0xfe0f)
+            .Select(r => r.Value.ToString("x"))
+            .JoinToString("-");

        public static string GetImageUrl(string? id, string name, bool isAnimated)
        {
@ -58,8 +56,7 @@ namespace DiscordChatExporter.Core.Discord.Data
            }

            // Standard emoji
-            var emojiRunes = GetRunes(name).ToArray();
-            var twemojiName = GetTwemojiName(emojiRunes);
+            var twemojiName = GetTwemojiName(name);
            return $"https://twemoji.maxcdn.com/2/72x72/{twemojiName}.png";
        }

--- a/DiscordChatExporter.Core/Exporting/Writers/Html/MessageGroupTemplate.cshtml
+++ b/DiscordChatExporter.Core/Exporting/Writers/Html/MessageGroupTemplate.cshtml
@ -311,7 +311,7 @@
                    <div class="chatlog__reactions">
                        @foreach (var reaction in message.Reactions)
                        {
-                            <div class="chatlog__reaction" title="@reaction.Emoji.Name">
+                            <div class="chatlog__reaction" title="@reaction.Emoji.Code">
                                <img class="emoji emoji--small" alt="@reaction.Emoji.Name" src="@await ResolveUrlAsync(reaction.Emoji.ImageUrl)">
                                <span class="chatlog__reaction-count">@reaction.Count</span>
                            </div>
--- a/DiscordChatExporter.Core/Exporting/Writers/MarkdownVisitors/HtmlMarkdownVisitor.cs
+++ b/DiscordChatExporter.Core/Exporting/Writers/MarkdownVisitors/HtmlMarkdownVisitor.cs
@ -131,7 +131,7 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
            var jumboClass = _isJumbo ? "emoji--large" : "";

            _buffer
-                .Append($"<img class=\"emoji {jumboClass}\" alt=\"{emoji.Name}\" title=\"{emoji.Name}\" src=\"{emojiImageUrl}\">");
+                .Append($"<img class=\"emoji {jumboClass}\" alt=\"{emoji.Name}\" title=\"{emoji.Code}\" src=\"{emojiImageUrl}\">");

            return base.VisitEmoji(emoji);
        }
--- a/DiscordChatExporter.Core/Markdown/Ast/EmojiNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/EmojiNode.cs
@ -1,11 +1,20 @@
-namespace DiscordChatExporter.Core.Markdown.Ast
+using DiscordChatExporter.Core.Utils;
+
+namespace DiscordChatExporter.Core.Markdown.Ast
 {
    internal class EmojiNode : MarkdownNode
    {
+        // Only present on custom emoji
        public string? Id { get; }

+        // Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
        public string Name { get; }

+        // Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
+        public string Code => !string.IsNullOrWhiteSpace(Id)
+            ? Name
+            : EmojiIndex.TryGetCode(Name) ?? Name;
+
        public bool IsAnimated { get; }

        public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
--- a/DiscordChatExporter.Core/Markdown/Ast/FormattedNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/FormattedNode.cs
@ -2,16 +2,6 @@

 namespace DiscordChatExporter.Core.Markdown.Ast
 {
-    internal enum TextFormatting
-    {
-        Bold,
-        Italic,
-        Underline,
-        Strikethrough,
-        Spoiler,
-        Quote
-    }
-
    internal class FormattedNode : MarkdownNode
    {
        public TextFormatting Formatting { get; }
--- a/DiscordChatExporter.Core/Markdown/Ast/MentionNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/MentionNode.cs
@ -1,13 +1,5 @@
 namespace DiscordChatExporter.Core.Markdown.Ast
 {
-    internal enum MentionType
-    {
-        Meta,
-        User,
-        Channel,
-        Role
-    }
-
    internal class MentionNode : MarkdownNode
    {
        public string Id { get; }
--- a/DiscordChatExporter.Core/Markdown/Ast/MentionType.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/MentionType.cs
@ -0,0 +1,10 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal enum MentionType
+    {
+        Meta,
+        User,
+        Channel,
+        Role
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/TextFormatting.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/TextFormatting.cs
@ -0,0 +1,12 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal enum TextFormatting
+    {
+        Bold,
+        Italic,
+        Underline,
+        Strikethrough,
+        Spoiler,
+        Quote
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/MarkdownParser.cs
+++ b/DiscordChatExporter.Core/Markdown/MarkdownParser.cs
@ -3,6 +3,7 @@ using System.Linq;
 using System.Text.RegularExpressions;
 using DiscordChatExporter.Core.Markdown.Ast;
 using DiscordChatExporter.Core.Markdown.Matching;
+using DiscordChatExporter.Core.Utils;

 namespace DiscordChatExporter.Core.Markdown
 {
@ -103,7 +104,7 @@ namespace DiscordChatExporter.Core.Markdown
        // There can be either one or two backticks, but equal number on both sides
        private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
+            (_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
        );

        // Capture language identifier and then any character until the earliest triple backtick
@ -111,7 +112,7 @@ namespace DiscordChatExporter.Core.Markdown
        // Blank lines at the beginning and end of content are trimmed
        private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
+            (_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
        );

        /* Mentions */
@ -131,19 +132,19 @@ namespace DiscordChatExporter.Core.Markdown
        // Capture <@123456> or <@!123456>
        private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("<@!?(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Groups[1].Value, MentionType.User)
+            (_, m) => new MentionNode(m.Groups[1].Value, MentionType.User)
        );

        // Capture <#123456>
        private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("<#(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Groups[1].Value, MentionType.Channel)
+            (_, m) => new MentionNode(m.Groups[1].Value, MentionType.Channel)
        );

        // Capture <@&123456>
        private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("<@&(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Groups[1].Value, MentionType.Role)
+            (_, m) => new MentionNode(m.Groups[1].Value, MentionType.Role)
        );

        /* Emojis */
@ -154,15 +155,26 @@ namespace DiscordChatExporter.Core.Markdown
        // ... or digit followed by enclosing mark
        // (this does not match all emojis in Discord but it's reasonably accurate enough)
        private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
-            new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})",
-                DefaultRegexOptions),
-            m => new EmojiNode(m.Groups[1].Value)
+            new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
+            (_, m) => new EmojiNode(m.Groups[1].Value)
+        );
+
+        // Capture :thinking: (but only for known emoji codes)
+        private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex(":([\\w_]+):", DefaultRegexOptions),
+            (_, m) =>
+            {
+                var name = EmojiIndex.TryGetName(m.Groups[1].Value);
+                return name is not null
+                    ? new EmojiNode(name)
+                    : null;
+            }
        );

        // Capture <:lul:123456> or <a:lul:123456>
        private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
-            m => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
+            (_, m) => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
        );

        /* Links */
@ -170,19 +182,19 @@ namespace DiscordChatExporter.Core.Markdown
        // Capture [title](link)
        private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
-            m => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
+            (_, m) => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
        );

        // Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
        private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
-            m => new LinkNode(m.Groups[1].Value)
+            (_, m) => new LinkNode(m.Groups[1].Value)
        );

        // Same as auto link but also surrounded by angular brackets
        private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
-            m => new LinkNode(m.Groups[1].Value)
+            (_, m) => new LinkNode(m.Groups[1].Value)
        );

        /* Text */
@ -198,21 +210,21 @@ namespace DiscordChatExporter.Core.Markdown
        // This escapes it from matching for emoji
        private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
-            m => new TextNode(m.Groups[1].Value)
+            (_, m) => new TextNode(m.Groups[1].Value)
        );

        // Capture any "symbol/other" character or surrogate pair preceded by a backslash
        // This escapes it from matching for emoji
        private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
-            m => new TextNode(m.Groups[1].Value)
+            (_, m) => new TextNode(m.Groups[1].Value)
        );

        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
        // This escapes it from matching for formatting or other tokens
        private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
            new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
-            m => new TextNode(m.Groups[1].Value)
+            (_, m) => new TextNode(m.Groups[1].Value)
        );

        // Combine all matchers into one
@ -255,7 +267,8 @@ namespace DiscordChatExporter.Core.Markdown

            // Emoji
            StandardEmojiNodeMatcher,
-            CustomEmojiNodeMatcher
+            CustomEmojiNodeMatcher,
+            CodedStandardEmojiNodeMatcher
        );

        // Minimal set of matchers for non-multimedia formats (e.g. plain text)
--- a/DiscordChatExporter.Core/Markdown/Matching/RegexMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/RegexMatcher.cs
@ -6,19 +6,14 @@ namespace DiscordChatExporter.Core.Markdown.Matching
    internal class RegexMatcher<T> : IMatcher<T>
    {
        private readonly Regex _regex;
-        private readonly Func<StringPart, Match, T> _transform;
+        private readonly Func<StringPart, Match, T?> _transform;

-        public RegexMatcher(Regex regex, Func<StringPart, Match, T> transform)
+        public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
        {
            _regex = regex;
            _transform = transform;
        }

-        public RegexMatcher(Regex regex, Func<Match, T> transform)
-            : this(regex, (p, m) => transform(m))
-        {
-        }
-
        public ParsedMatch<T>? TryMatch(StringPart stringPart)
        {
            var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
@ -30,11 +25,15 @@ namespace DiscordChatExporter.Core.Markdown.Matching
            // Which is super weird because regex.Match(string, int) takes the whole input in context.
            // So in order to properly account for ^/$ regex tokens, we need to make sure that
            // the expression also matches on the bigger part of the input.
-            if (!_regex.IsMatch(stringPart.Target.Substring(0, stringPart.EndIndex), stringPart.StartIndex))
+            if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
                return null;

            var stringPartMatch = stringPart.Slice(match.Index, match.Length);
-            return new ParsedMatch<T>(stringPartMatch, _transform(stringPartMatch, match));
+            var value = _transform(stringPartMatch, match);
+
+            return value is not null
+                ? new ParsedMatch<T>(stringPartMatch, value)
+                : null;
        }
    }
 }
--- a/DiscordChatExporter.Core/Markdown/Matching/StringMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/StringMatcher.cs
@ -6,9 +6,9 @@ namespace DiscordChatExporter.Core.Markdown.Matching
    {
        private readonly string _needle;
        private readonly StringComparison _comparison;
-        private readonly Func<StringPart, T> _transform;
+        private readonly Func<StringPart, T?> _transform;

-        public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T> transform)
+        public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
        {
            _needle = needle;
            _comparison = comparison;
@ -23,14 +23,15 @@ namespace DiscordChatExporter.Core.Markdown.Matching
        public ParsedMatch<T>? TryMatch(StringPart stringPart)
        {
            var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
+            if (index < 0)
+                return null;

-            if (index >= 0)
-            {
-                var stringPartMatch = stringPart.Slice(index, _needle.Length);
-                return new ParsedMatch<T>(stringPartMatch, _transform(stringPartMatch));
-            }
+            var stringPartMatch = stringPart.Slice(index, _needle.Length);
+            var value = _transform(stringPartMatch);

-            return null;
+            return value is not null
+                ? new ParsedMatch<T>(stringPartMatch, value)
+                : null;
        }
    }
 }
--- a/DiscordChatExporter.Core/Markdown/Matching/StringPart.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/StringPart.cs
@ -10,14 +10,13 @@ namespace DiscordChatExporter.Core.Markdown.Matching

        public int Length { get; }

-        public int EndIndex { get; }
+        public int EndIndex => StartIndex + Length;

        public StringPart(string target, int startIndex, int length)
        {
            Target = target;
            StartIndex = startIndex;
            Length = length;
-            EndIndex = startIndex + length;
        }

        public StringPart(string target)
--- a/DiscordChatExporter.Core/Utils/EmojiIndex.cs
+++ b/DiscordChatExporter.Core/Utils/EmojiIndex.cs
--- a/DiscordChatExporter.Core/Utils/Extensions/StringExtensions.cs
+++ b/DiscordChatExporter.Core/Utils/Extensions/StringExtensions.cs
@ -1,4 +1,5 @@
-using System.Text;
+using System.Collections.Generic;
+using System.Text;

 namespace DiscordChatExporter.Core.Utils.Extensions
 {
@ -14,6 +15,16 @@ namespace DiscordChatExporter.Core.Utils.Extensions
                ? str[..charCount]
                : str;

+        public static IEnumerable<Rune> GetRunes(this string str)
+        {
+            var lastIndex = 0;
+            while (lastIndex < str.Length && Rune.TryGetRuneAt(str, lastIndex, out var rune))
+            {
+                yield return rune;
+                lastIndex += rune.Utf16SequenceLength;
+            }
+        }
+
        public static StringBuilder AppendIfNotEmpty(this StringBuilder builder, char value) =>
            builder.Length > 0
                ? builder.Append(value)