From 9583e2684d936f988af73fad875367c011c9f20e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Slatinsk=C3=BD?= <43640724+slatinsky@users.noreply.github.com> Date: Sun, 3 Sep 2023 20:18:49 +0200 Subject: [PATCH] Optimize fetching threads (#1125) --- .../Commands/ExportAllCommand.cs | 2 + .../Commands/ExportGuildCommand.cs | 2 + .../Commands/GetChannelsCommand.cs | 2 + .../Discord/DiscordClient.cs | 58 +++++++++++++++++-- .../Exporting/ChannelExporter.cs | 15 +++++ 5 files changed, 73 insertions(+), 6 deletions(-) diff --git a/DiscordChatExporter.Cli/Commands/ExportAllCommand.cs b/DiscordChatExporter.Cli/Commands/ExportAllCommand.cs index 0d4df03..5dd4d30 100644 --- a/DiscordChatExporter.Cli/Commands/ExportAllCommand.cs +++ b/DiscordChatExporter.Cli/Commands/ExportAllCommand.cs @@ -76,6 +76,8 @@ public class ExportAllCommand : ExportCommandBase var thread in Discord.GetGuildThreadsAsync( guild.Id, ThreadInclusionMode == ThreadInclusionMode.All, + Before, + After, cancellationToken ) ) diff --git a/DiscordChatExporter.Cli/Commands/ExportGuildCommand.cs b/DiscordChatExporter.Cli/Commands/ExportGuildCommand.cs index 38f55c2..afc955f 100644 --- a/DiscordChatExporter.Cli/Commands/ExportGuildCommand.cs +++ b/DiscordChatExporter.Cli/Commands/ExportGuildCommand.cs @@ -54,6 +54,8 @@ public class ExportGuildCommand : ExportCommandBase var thread in Discord.GetGuildThreadsAsync( GuildId, ThreadInclusionMode == ThreadInclusionMode.All, + Before, + After, cancellationToken ) ) diff --git a/DiscordChatExporter.Cli/Commands/GetChannelsCommand.cs b/DiscordChatExporter.Cli/Commands/GetChannelsCommand.cs index 178b4d6..20b8bba 100644 --- a/DiscordChatExporter.Cli/Commands/GetChannelsCommand.cs +++ b/DiscordChatExporter.Cli/Commands/GetChannelsCommand.cs @@ -52,6 +52,8 @@ public class GetChannelsCommand : DiscordCommandBase await Discord.GetGuildThreadsAsync( GuildId, ThreadInclusionMode == ThreadInclusionMode.All, + null, + null, cancellationToken ) ) diff --git a/DiscordChatExporter.Core/Discord/DiscordClient.cs b/DiscordChatExporter.Core/Discord/DiscordClient.cs index da29308..593d0f3 100644 --- a/DiscordChatExporter.Core/Discord/DiscordClient.cs +++ b/DiscordChatExporter.Core/Discord/DiscordClient.cs @@ -277,6 +277,8 @@ public class DiscordClient public async IAsyncEnumerable GetGuildThreadsAsync( Snowflake guildId, bool includeArchived = false, + Snowflake? before = null, + Snowflake? after = null, [EnumeratorCancellation] CancellationToken cancellationToken = default ) { @@ -286,17 +288,29 @@ public class DiscordClient var tokenKind = _resolvedTokenKind ??= await GetTokenKindAsync(cancellationToken); var channels = await GetGuildChannelsAsync(guildId, cancellationToken); + var filteredChannels = channels + // Categories cannot have threads + .Where(c => c.Kind != ChannelKind.GuildCategory) + // Voice channels cannot have threads + .Where(c => !c.Kind.IsVoice()) + // Ordinary channel or forum channel without LastMessageId cannot have threads + .Where(c => c.LastMessageId != null) + // Ff --before is specified, skip channels created after the specified date + .Where(c => before == null || before > c.Id); + // User accounts can only fetch threads using the search endpoint if (tokenKind == TokenKind.User) { // Active threads - foreach (var channel in channels) + foreach (var channel in filteredChannels) { var currentOffset = 0; while (true) { var url = new UrlBuilder() .SetPath($"channels/{channel.Id}/threads/search") + .SetQueryParameter("sort_by", "last_message_time") + .SetQueryParameter("sort_order", "desc") .SetQueryParameter("archived", "false") .SetQueryParameter("offset", currentOffset.ToString()) .Build(); @@ -306,14 +320,29 @@ public class DiscordClient if (response is null) break; + var containsOlder = false; + foreach ( var threadJson in response.Value.GetProperty("threads").EnumerateArray() ) { - yield return Channel.Parse(threadJson, channel); + var thread = Channel.Parse(threadJson, channel); + + // if --after is specified, we can break early, because the threads are sorted by last message time + if (after is not null && after > thread.LastMessageId) + { + containsOlder = true; + break; + } + + yield return thread; + currentOffset++; } + if (containsOlder) + break; + if (!response.Value.GetProperty("has_more").GetBoolean()) break; } @@ -322,13 +351,15 @@ public class DiscordClient // Archived threads if (includeArchived) { - foreach (var channel in channels) + foreach (var channel in filteredChannels) { var currentOffset = 0; while (true) { var url = new UrlBuilder() .SetPath($"channels/{channel.Id}/threads/search") + .SetQueryParameter("sort_by", "last_message_time") + .SetQueryParameter("sort_order", "desc") .SetQueryParameter("archived", "true") .SetQueryParameter("offset", currentOffset.ToString()) .Build(); @@ -338,14 +369,29 @@ public class DiscordClient if (response is null) break; + var containsOlder = false; + foreach ( var threadJson in response.Value.GetProperty("threads").EnumerateArray() ) { - yield return Channel.Parse(threadJson, channel); + var thread = Channel.Parse(threadJson, channel); + + // if --after is specified, we can break early, because the threads are sorted by last message time + if (after is not null && after > thread.LastMessageId) + { + containsOlder = true; + break; + } + + yield return thread; + currentOffset++; } + if (containsOlder) + break; + if (!response.Value.GetProperty("has_more").GetBoolean()) break; } @@ -357,7 +403,7 @@ public class DiscordClient { // Active threads { - var parentsById = channels.ToDictionary(c => c.Id); + var parentsById = filteredChannels.ToDictionary(c => c.Id); var response = await GetJsonResponseAsync( $"guilds/{guildId}/threads/active", @@ -379,7 +425,7 @@ public class DiscordClient // Archived threads if (includeArchived) { - foreach (var channel in channels) + foreach (var channel in filteredChannels) { // Public archived threads { diff --git a/DiscordChatExporter.Core/Exporting/ChannelExporter.cs b/DiscordChatExporter.Core/Exporting/ChannelExporter.cs index 43bd049..7edbdbf 100644 --- a/DiscordChatExporter.Core/Exporting/ChannelExporter.cs +++ b/DiscordChatExporter.Core/Exporting/ChannelExporter.cs @@ -2,6 +2,7 @@ using System.Threading; using System.Threading.Tasks; using DiscordChatExporter.Core.Discord; +using DiscordChatExporter.Core.Discord.Data; using DiscordChatExporter.Core.Exceptions; using Gress; @@ -33,6 +34,20 @@ public class ChannelExporter ); } + // Check if the 'before' boundary is valid + if (request.Before is not null && request.Channel.Id > request.Before) + { + throw new DiscordChatExporterException( + "Channel does not contain any messages within the specified period." + ); + } + + // Skip forum channels, they are exported as threads + if (request.Channel.Kind == ChannelKind.GuildForum) + { + throw new DiscordChatExporterException("Channel is a forum."); + } + // Build context var context = new ExportContext(_discord, request); await context.PopulateChannelsAndRolesAsync(cancellationToken);