You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DiscordChatExporter/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs

121 lines
4.5 KiB

using System;
using System.Collections.Generic;
4 years ago
using System.Globalization;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using AsyncKeyedLock;
3 years ago
using DiscordChatExporter.Core.Utils;
using DiscordChatExporter.Core.Utils.Extensions;
3 years ago
namespace DiscordChatExporter.Core.Exporting;
internal partial class ExportAssetDownloader
{
private static readonly AsyncKeyedLocker<string> _locker = new(o =>
{
o.PoolSize = 20;
o.PoolInitialFill = 1;
});
3 years ago
private readonly string _workingDirPath;
private readonly bool _reuse;
// File paths of the previously downloaded assets
3 years ago
private readonly Dictionary<string, string> _pathCache = new(StringComparer.Ordinal);
public ExportAssetDownloader(string workingDirPath, bool reuse)
3 years ago
{
_workingDirPath = workingDirPath;
_reuse = reuse;
3 years ago
}
3 years ago
public async ValueTask<string> DownloadAsync(string url, CancellationToken cancellationToken = default)
{
var fileName = GetFileNameFromUrl(url);
var filePath = Path.Combine(_workingDirPath, fileName);
using (await _locker.LockAsync(filePath, cancellationToken).ConfigureAwait(false))
3 years ago
{
if (_pathCache.TryGetValue(url, out var cachedFilePath))
return cachedFilePath;
4 years ago
// Reuse existing files if we're allowed to
if (!_reuse || !File.Exists(filePath))
3 years ago
{
Directory.CreateDirectory(_workingDirPath);
3 years ago
await Http.ResiliencePolicy.ExecuteAsync(async () =>
4 years ago
{
// Download the file
using var response = await Http.Client.GetAsync(url, cancellationToken);
await using (var output = File.Create(filePath))
await response.Content.CopyToAsync(output, cancellationToken);
// Try to set the file date according to the last-modified header
try
{
var lastModified = response.Content.Headers.TryGetValue("Last-Modified")?.Pipe(s =>
DateTimeOffset.TryParse(s, CultureInfo.InvariantCulture, DateTimeStyles.None, out var instant)
? instant
: (DateTimeOffset?)null
);
if (lastModified is not null)
{
File.SetCreationTimeUtc(filePath, lastModified.Value.UtcDateTime);
File.SetLastWriteTimeUtc(filePath, lastModified.Value.UtcDateTime);
File.SetLastAccessTimeUtc(filePath, lastModified.Value.UtcDateTime);
}
}
catch
{
// This can apparently fail for some reason.
// https://github.com/Tyrrrz/DiscordChatExporter/issues/585
// Updating file dates is not a critical task, so we'll just
// ignore exceptions thrown here.
}
});
}
3 years ago
return _pathCache[url] = filePath;
}
}
3 years ago
}
internal partial class ExportAssetDownloader
3 years ago
{
private static string GetUrlHash(string url) => SHA256
.HashData(Encoding.UTF8.GetBytes(url))
.ToHex()
// 5 chars ought to be enough for anybody
.Truncate(5);
3 years ago
private static string GetFileNameFromUrl(string url)
{
var urlHash = GetUrlHash(url);
4 years ago
3 years ago
// Try to extract file name from URL
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
4 years ago
3 years ago
// If it's not there, just use the URL hash as the file name
if (string.IsNullOrWhiteSpace(fileName))
return urlHash;
3 years ago
// Otherwise, use the original file name but inject the hash in the middle
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
var fileExtension = Path.GetExtension(fileName);
2 years ago
// Probably not a file extension, just a dot in a long file name
// https://github.com/Tyrrrz/DiscordChatExporter/issues/708
if (fileExtension.Length > 41)
{
fileNameWithoutExtension = fileName;
fileExtension = "";
}
return PathEx.EscapeFileName(fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension);
}
2 years ago
}