You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DiscordChatExporter/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs

120 lines
4.2 KiB

using System;
using System.Collections.Generic;
4 years ago
using System.Globalization;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using AsyncKeyedLock;
3 years ago
using DiscordChatExporter.Core.Utils;
using DiscordChatExporter.Core.Utils.Extensions;
3 years ago
namespace DiscordChatExporter.Core.Exporting;
internal partial class ExportAssetDownloader
{
private static readonly AsyncKeyedLocker<string> Locker = new(o =>
{
o.PoolSize = 20;
o.PoolInitialFill = 1;
});
3 years ago
private readonly string _workingDirPath;
private readonly bool _reuse;
// File paths of the previously downloaded assets
3 years ago
private readonly Dictionary<string, string> _pathCache = new(StringComparer.Ordinal);
public ExportAssetDownloader(string workingDirPath, bool reuse)
3 years ago
{
_workingDirPath = workingDirPath;
_reuse = reuse;
3 years ago
}
3 years ago
public async ValueTask<string> DownloadAsync(string url, CancellationToken cancellationToken = default)
{
var fileName = GetFileNameFromUrl(url);
var filePath = Path.Combine(_workingDirPath, fileName);
1 year ago
using var _ = await Locker.LockAsync(filePath, cancellationToken);
4 years ago
1 year ago
if (_pathCache.TryGetValue(url, out var cachedFilePath))
return cachedFilePath;
1 year ago
// Reuse existing files if we're allowed to
if (_reuse && File.Exists(filePath))
return _pathCache[url] = filePath;
1 year ago
Directory.CreateDirectory(_workingDirPath);
3 years ago
1 year ago
await Http.ResiliencePolicy.ExecuteAsync(async () =>
{
// Download the file
using var response = await Http.Client.GetAsync(url, cancellationToken);
await using (var output = File.Create(filePath))
await response.Content.CopyToAsync(output, cancellationToken);
1 year ago
// Try to set the file date according to the last-modified header
try
{
var lastModified = response.Content.Headers.TryGetValue("Last-Modified")?.Pipe(s =>
DateTimeOffset.TryParse(s, CultureInfo.InvariantCulture, DateTimeStyles.None, out var instant)
? instant
: (DateTimeOffset?)null
);
if (lastModified is not null)
{
1 year ago
File.SetCreationTimeUtc(filePath, lastModified.Value.UtcDateTime);
File.SetLastWriteTimeUtc(filePath, lastModified.Value.UtcDateTime);
File.SetLastAccessTimeUtc(filePath, lastModified.Value.UtcDateTime);
}
1 year ago
}
catch
{
// This can apparently fail for some reason.
// Updating the file date is not a critical task, so we'll just ignore exceptions thrown here.
// https://github.com/Tyrrrz/DiscordChatExporter/issues/585
}
});
3 years ago
1 year ago
return _pathCache[url] = filePath;
}
3 years ago
}
internal partial class ExportAssetDownloader
3 years ago
{
private static string GetUrlHash(string url) => SHA256
.HashData(Encoding.UTF8.GetBytes(url))
.ToHex()
// 5 chars ought to be enough for anybody
.Truncate(5);
3 years ago
private static string GetFileNameFromUrl(string url)
{
var urlHash = GetUrlHash(url);
4 years ago
1 year ago
// Try to extract the file name from URL
3 years ago
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
4 years ago
3 years ago
// If it's not there, just use the URL hash as the file name
if (string.IsNullOrWhiteSpace(fileName))
return urlHash;
3 years ago
// Otherwise, use the original file name but inject the hash in the middle
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
var fileExtension = Path.GetExtension(fileName);
2 years ago
// Probably not a file extension, just a dot in a long file name
1 year ago
// https://github.com/Tyrrrz/DiscordChatExporter/pull/812
if (fileExtension.Length > 41)
{
fileNameWithoutExtension = fileName;
fileExtension = "";
}
return PathEx.EscapeFileName(fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension);
}
2 years ago
}