Filter which messages are exported (#622)

pull/639/head
Lucas LaBuff 3 years ago committed by GitHub
parent 95cd6cb50c
commit d39fc212b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -12,6 +12,7 @@ using DiscordChatExporter.Core.Discord;
using DiscordChatExporter.Core.Discord.Data;
using DiscordChatExporter.Core.Exceptions;
using DiscordChatExporter.Core.Exporting;
using DiscordChatExporter.Core.Exporting.Filtering;
using DiscordChatExporter.Core.Exporting.Partitioning;
using DiscordChatExporter.Core.Utils.Extensions;
using Tyrrrz.Extensions;
@ -35,6 +36,9 @@ namespace DiscordChatExporter.Cli.Commands.Base
[CommandOption("partition", 'p', Description = "Split output into partitions, each limited to this number of messages (e.g. 100) or file size (e.g. 10mb).")]
public PartitionLimit PartitionLimit { get; init; } = NullPartitionLimit.Instance;
[CommandOption("filter", Description = "Only include messages that satisfy this filter (e.g. from:foo#1234).")]
public MessageFilter MessageFilter { get; init; } = NullMessageFilter.Instance;
[CommandOption("parallel", Description = "Limits how many channels can be exported in parallel.")]
public int ParallelLimit { get; init; } = 1;
@ -76,6 +80,7 @@ namespace DiscordChatExporter.Cli.Commands.Base
After,
Before,
PartitionLimit,
MessageFilter,
ShouldDownloadMedia,
ShouldReuseMedia,
DateFormat

@ -8,6 +8,7 @@
<PackageReference Include="JsonExtensions" Version="1.1.0" />
<PackageReference Include="MiniRazor.CodeGen" Version="2.1.2" />
<PackageReference Include="Polly" Version="7.2.2" />
<PackageReference Include="Superpower" Version="2.3.0" />
<PackageReference Include="Tyrrrz.Extensions" Version="1.6.5" />
</ItemGroup>

@ -39,6 +39,10 @@ namespace DiscordChatExporter.Core.Exporting
var encounteredUsers = new HashSet<User>(IdBasedEqualityComparer.Instance);
await foreach (var message in _discord.GetMessagesAsync(request.Channel.Id, request.After, request.Before, progress))
{
// Skips any messages that fail to pass the supplied filter
if (!request.MessageFilter.Filter(message))
continue;
// Resolve members for referenced users
foreach (var referencedUser in message.MentionedUsers.Prepend(message.Author))
{

@ -4,6 +4,7 @@ using System.Text;
using System.Text.RegularExpressions;
using DiscordChatExporter.Core.Discord;
using DiscordChatExporter.Core.Discord.Data;
using DiscordChatExporter.Core.Exporting.Filtering;
using DiscordChatExporter.Core.Exporting.Partitioning;
using DiscordChatExporter.Core.Utils;
@ -31,6 +32,8 @@ namespace DiscordChatExporter.Core.Exporting
public PartitionLimit PartitionLimit { get; }
public MessageFilter MessageFilter { get; }
public bool ShouldDownloadMedia { get; }
public bool ShouldReuseMedia { get; }
@ -45,6 +48,7 @@ namespace DiscordChatExporter.Core.Exporting
Snowflake? after,
Snowflake? before,
PartitionLimit partitionLimit,
MessageFilter messageFilter,
bool shouldDownloadMedia,
bool shouldReuseMedia,
string dateFormat)
@ -56,6 +60,7 @@ namespace DiscordChatExporter.Core.Exporting
After = after;
Before = before;
PartitionLimit = partitionLimit;
MessageFilter = messageFilter;
ShouldDownloadMedia = shouldDownloadMedia;
ShouldReuseMedia = shouldReuseMedia;
DateFormat = dateFormat;

@ -0,0 +1,8 @@
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public enum BinaryExpressionKind
{
Or,
And
}
}

@ -0,0 +1,26 @@
using DiscordChatExporter.Core.Discord.Data;
using System;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class BinaryExpressionMessageFilter : MessageFilter
{
private readonly MessageFilter _first;
private readonly MessageFilter _second;
private readonly BinaryExpressionKind _kind;
public BinaryExpressionMessageFilter(MessageFilter first, MessageFilter second, BinaryExpressionKind kind)
{
_first = first;
_second = second;
_kind = kind;
}
public override bool Filter(Message message) => _kind switch
{
BinaryExpressionKind.Or => _first.Filter(message) || _second.Filter(message),
BinaryExpressionKind.And => _first.Filter(message) && _second.Filter(message),
_ => throw new InvalidOperationException($"Unknown binary expression kind '{_kind}'.")
};
}
}

@ -0,0 +1,15 @@
using DiscordChatExporter.Core.Discord.Data;
using System.Text.RegularExpressions;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class ContainsMessageFilter : MessageFilter
{
private readonly string _value;
public ContainsMessageFilter(string value) => _value = value;
public override bool Filter(Message message) =>
Regex.IsMatch(message.Content, $@"\b{Regex.Escape(_value)}\b", RegexOptions.IgnoreCase | DefaultRegexOptions);
}
}

@ -0,0 +1,17 @@
using DiscordChatExporter.Core.Discord.Data;
using System;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class FromMessageFilter : MessageFilter
{
private readonly string _value;
public FromMessageFilter(string value) => _value = value;
public override bool Filter(Message message) =>
string.Equals(_value, message.Author.Name, StringComparison.OrdinalIgnoreCase) ||
string.Equals(_value, message.Author.FullName, StringComparison.OrdinalIgnoreCase) ||
string.Equals(_value, message.Author.Id.ToString(), StringComparison.OrdinalIgnoreCase);
}
}

@ -0,0 +1,26 @@
using DiscordChatExporter.Core.Discord.Data;
using System;
using System.Linq;
using System.Text.RegularExpressions;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class HasMessageFilter : MessageFilter
{
private readonly string _value;
public HasMessageFilter(string value) => _value = value;
public override bool Filter(Message message) =>
_value switch
{
"link" => Regex.IsMatch(message.Content, "https?://\\S*[^\\.,:;\"\'\\s]", DefaultRegexOptions),
"embed" => message.Embeds.Any(),
"file" => message.Attachments.Any(),
"video" => message.Attachments.Any(file => file.IsVideo),
"image" => message.Attachments.Any(file => file.IsImage),
"sound" => message.Attachments.Any(file => file.IsAudio),
_ => throw new InvalidOperationException($"Invalid value provided for the 'has' message filter: '{_value}'")
};
}
}

@ -0,0 +1,19 @@
using DiscordChatExporter.Core.Discord.Data;
using System;
using System.Linq;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class MentionsMessageFilter : MessageFilter
{
private readonly string _value;
public MentionsMessageFilter(string value) => _value = value;
public override bool Filter(Message message) =>
message.MentionedUsers.Any(user =>
string.Equals(_value, user.Name, StringComparison.OrdinalIgnoreCase) ||
string.Equals(_value, user.FullName, StringComparison.OrdinalIgnoreCase) ||
string.Equals(_value, user.Id.ToString(), StringComparison.OrdinalIgnoreCase));
}
}

@ -0,0 +1,38 @@
using System;
using System.Text.RegularExpressions;
using DiscordChatExporter.Core.Discord.Data;
using DiscordChatExporter.Core.Exporting.Filtering.Parsing;
using Superpower;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public abstract partial class MessageFilter
{
public abstract bool Filter(Message message);
}
public partial class MessageFilter
{
protected const RegexOptions DefaultRegexOptions = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.Multiline;
internal static MessageFilter CreateFilter(string text) => new ContainsMessageFilter(text);
internal static MessageFilter CreateFilter(string key, string value)
{
return key.ToLowerInvariant() switch
{
"from" => new FromMessageFilter(value),
"has" => new HasMessageFilter(value),
"mentions" => new MentionsMessageFilter(value),
_ => throw new ArgumentException($"Invalid filter type '{key}'.", nameof(key))
};
}
public static MessageFilter Parse(string value, IFormatProvider? formatProvider = null)
{
var tokens = FilterTokenizer.Instance.Tokenize(value);
var parsed = FilterParser.Instance.Parse(tokens);
return parsed;
}
}
}

@ -0,0 +1,13 @@
using DiscordChatExporter.Core.Discord.Data;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class NegatedMessageFilter : MessageFilter
{
private readonly MessageFilter _filter;
public NegatedMessageFilter(MessageFilter filter) => _filter = filter;
public override bool Filter(Message message) => !_filter.Filter(message);
}
}

@ -0,0 +1,11 @@
using DiscordChatExporter.Core.Discord.Data;
namespace DiscordChatExporter.Core.Exporting.Filtering
{
public class NullMessageFilter : MessageFilter
{
public static NullMessageFilter Instance { get; } = new();
public override bool Filter(Message message) => true;
}
}

@ -0,0 +1,68 @@
using Superpower;
using Superpower.Model;
using Superpower.Parsers;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Text;
namespace DiscordChatExporter.Core.Exporting.Filtering.Parsing
{
public static class FilterParser
{
public static TextParser<string> QuotedString { get; } =
from open in Character.EqualTo('"')
from content in Character.EqualTo('\\').IgnoreThen(Character.AnyChar).Try()
.Or(Character.Except('"'))
.Many()
from close in Character.EqualTo('"')
select new string(content);
public static TextParser<string> UnquotedString { get; } =
from content in Character.EqualTo('\\').IgnoreThen(Character.In('"', '/')).Try()
.Or(Character.Except(c => char.IsWhiteSpace(c) || "():-|\"".Contains(c), "non-whitespace character except for (, ), :, -, |, and \""))
.AtLeastOnce()
select new string(content);
public static TokenListParser<FilterToken, string> AnyString { get; } =
Token.EqualTo(FilterToken.QuotedString).Apply(QuotedString)
.Or(Token.EqualTo(FilterToken.UnquotedString).Apply(UnquotedString));
public static TokenListParser<FilterToken, MessageFilter> AnyFilter { get; } =
from minus in Token.EqualTo(FilterToken.Minus).Optional()
from content in KeyValueFilter.Or(TextFilter).Or(GroupedFilter)
select minus.HasValue ? new NegatedMessageFilter(content) : content;
public static TokenListParser<FilterToken, MessageFilter> TextFilter { get; } =
from value in AnyString
select MessageFilter.CreateFilter(value);
public static TokenListParser<FilterToken, MessageFilter> KeyValueFilter { get; } =
from key in AnyString.Try()
from colon in Token.EqualTo(FilterToken.Colon).Try()
from value in AnyString
select MessageFilter.CreateFilter(key, value);
public static TokenListParser<FilterToken, MessageFilter> GroupedFilter { get; } =
from open in Token.EqualTo(FilterToken.LParen)
from content in BinaryExpression
from close in Token.EqualTo(FilterToken.RParen)
select content;
public static TokenListParser<FilterToken, MessageFilter> OrBinaryExpression { get; } =
from first in AnyFilter
from vbar in Token.EqualTo(FilterToken.VBar)
from rest in BinaryExpression
select (MessageFilter)new BinaryExpressionMessageFilter(first, rest, BinaryExpressionKind.Or);
public static TokenListParser<FilterToken, MessageFilter> AndBinaryExpression { get; } =
from first in AnyFilter
from rest in BinaryExpression
select (MessageFilter)new BinaryExpressionMessageFilter(first, rest, BinaryExpressionKind.And);
public static TokenListParser<FilterToken, MessageFilter> BinaryExpression { get; } = OrBinaryExpression.Try().Or(AndBinaryExpression.Try()).Or(AnyFilter);
public static TokenListParser<FilterToken, MessageFilter> Instance { get; } = BinaryExpression.AtEnd();
}
}

@ -0,0 +1,18 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace DiscordChatExporter.Core.Exporting.Filtering.Parsing
{
public enum FilterToken
{
None,
LParen,
RParen,
Colon,
Minus,
VBar,
UnquotedString,
QuotedString
}
}

@ -0,0 +1,23 @@
using Superpower;
using Superpower.Parsers;
using Superpower.Tokenizers;
using System;
using System.Collections.Generic;
using System.Text;
namespace DiscordChatExporter.Core.Exporting.Filtering.Parsing
{
public static class FilterTokenizer
{
public static Tokenizer<FilterToken> Instance { get; } = new TokenizerBuilder<FilterToken>()
.Ignore(Span.WhiteSpace)
.Match(Character.EqualTo('('), FilterToken.LParen)
.Match(Character.EqualTo(')'), FilterToken.RParen)
.Match(Character.EqualTo(':'), FilterToken.Colon)
.Match(Character.EqualTo('-'), FilterToken.Minus)
.Match(Character.EqualTo('|'), FilterToken.VBar)
.Match(FilterParser.QuotedString, FilterToken.QuotedString)
.Match(FilterParser.UnquotedString, FilterToken.UnquotedString)
.Build();
}
}

@ -0,0 +1,9 @@
// ReSharper disable CheckNamespace
// TODO: remove after moving to .NET 5
namespace System.Runtime.CompilerServices
{
internal static class IsExternalInit
{
}
}

@ -25,6 +25,8 @@ namespace DiscordChatExporter.Gui.Services
public string? LastPartitionLimitValue { get; set; }
public string? LastMessageFilterValue { get; set; }
public bool LastShouldDownloadMedia { get; set; }
public SettingsService()

@ -4,6 +4,7 @@ using System.Linq;
using DiscordChatExporter.Core.Discord;
using DiscordChatExporter.Core.Discord.Data;
using DiscordChatExporter.Core.Exporting;
using DiscordChatExporter.Core.Exporting.Filtering;
using DiscordChatExporter.Core.Exporting.Partitioning;
using DiscordChatExporter.Core.Utils.Extensions;
using DiscordChatExporter.Gui.Services;
@ -53,6 +54,12 @@ namespace DiscordChatExporter.Gui.ViewModels.Dialogs
? PartitionLimit.Parse(PartitionLimitValue)
: NullPartitionLimit.Instance;
public string? MessageFilterValue { get; set; }
public MessageFilter MessageFilter => !string.IsNullOrWhiteSpace(MessageFilterValue)
? MessageFilter.Parse(MessageFilterValue)
: NullMessageFilter.Instance;
public bool ShouldDownloadMedia { get; set; }
// Whether to show the "advanced options" by default when the dialog opens.
@ -61,6 +68,7 @@ namespace DiscordChatExporter.Gui.ViewModels.Dialogs
After != default ||
Before != default ||
!string.IsNullOrWhiteSpace(PartitionLimitValue) ||
!string.IsNullOrWhiteSpace(MessageFilterValue) ||
ShouldDownloadMedia != default;
public ExportSetupViewModel(DialogManager dialogManager, SettingsService settingsService)
@ -71,6 +79,7 @@ namespace DiscordChatExporter.Gui.ViewModels.Dialogs
// Persist preferences
SelectedFormat = _settingsService.LastExportFormat;
PartitionLimitValue = _settingsService.LastPartitionLimitValue;
MessageFilterValue = _settingsService.LastMessageFilterValue;
ShouldDownloadMedia = _settingsService.LastShouldDownloadMedia;
}
@ -79,6 +88,7 @@ namespace DiscordChatExporter.Gui.ViewModels.Dialogs
// Persist preferences
_settingsService.LastExportFormat = SelectedFormat;
_settingsService.LastPartitionLimitValue = PartitionLimitValue;
_settingsService.LastMessageFilterValue = MessageFilterValue;
_settingsService.LastShouldDownloadMedia = ShouldDownloadMedia;
// If single channel - prompt file path

@ -212,6 +212,7 @@ namespace DiscordChatExporter.Gui.ViewModels
dialog.After?.Pipe(Snowflake.FromDate),
dialog.Before?.Pipe(Snowflake.FromDate),
dialog.PartitionLimit,
dialog.MessageFilter,
dialog.ShouldDownloadMedia,
_settingsService.ShouldReuseMedia,
_settingsService.DateFormat

@ -133,6 +133,14 @@
Text="{Binding PartitionLimitValue}"
ToolTip="Split output into partitions, each limited to this number of messages (e.g. 100) or file size (e.g. 10mb)" />
<!-- Filtering -->
<TextBox
Margin="16,8"
materialDesign:HintAssist.Hint="Message filter"
materialDesign:HintAssist.IsFloating="True"
Text="{Binding MessageFilterValue}"
ToolTip="Only include messages that satisfy this filter (e.g. from:foo#1234)." />
<!-- Download media -->
<Grid Margin="16,16" ToolTip="Download referenced media content (user avatars, attached files, embedded images, etc)">
<Grid.ColumnDefinitions>

Loading…
Cancel
Save