using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Threading; using System.Threading.Tasks; using System.Xml; using MediaBrowser.Common; using MediaBrowser.Common.Net; using MediaBrowser.Controller.Entities.Audio; using MediaBrowser.Controller.Providers; using MediaBrowser.Model.Entities; using MediaBrowser.Model.Providers; using MediaBrowser.Model.Serialization; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; namespace MediaBrowser.Providers.Music { public class MusicBrainzAlbumProvider : IRemoteMetadataProvider<MusicAlbum, AlbumInfo>, IHasOrder { internal static MusicBrainzAlbumProvider Current; private readonly IHttpClient _httpClient; private readonly IApplicationHost _appHost; private readonly ILogger _logger; private readonly IJsonSerializer _json; private Stopwatch _stopWatchMusicBrainz = new Stopwatch(); public readonly string MusicBrainzBaseUrl; /// <summary> /// The Jellyfin user-agent is unrestricted but source IP must not exceed /// one request per second, therefore we rate limit to avoid throttling. /// Be prudent, use a value slightly above the minimun required. /// https://musicbrainz.org/doc/XML_Web_Service/Rate_Limiting /// </summary> private const long MusicBrainzQueryIntervalMs = 1050u; /// <summary> /// For each single MB lookup/search, this is the maximum number of /// attempts that shall be made whilst receiving a 503 Server /// Unavailable (indicating throttled) response. /// </summary> private const uint MusicBrainzQueryAttempts = 5u; public MusicBrainzAlbumProvider( IHttpClient httpClient, IApplicationHost appHost, ILogger logger, IJsonSerializer json, IConfiguration configuration) { _httpClient = httpClient; _appHost = appHost; _logger = logger; _json = json; MusicBrainzBaseUrl = configuration["MusicBrainz:BaseUrl"]; // Use a stopwatch to ensure we don't exceed the MusicBrainz rate limit _stopWatchMusicBrainz.Start(); Current = this; } public async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(AlbumInfo searchInfo, CancellationToken cancellationToken) { var releaseId = searchInfo.GetReleaseId(); var releaseGroupId = searchInfo.GetReleaseGroupId(); string url; if (!string.IsNullOrEmpty(releaseId)) { url = string.Format("/ws/2/release/?query=reid:{0}", releaseId); } else if (!string.IsNullOrEmpty(releaseGroupId)) { url = string.Format("/ws/2/release?release-group={0}", releaseGroupId); } else { var artistMusicBrainzId = searchInfo.GetMusicBrainzArtistId(); if (!string.IsNullOrWhiteSpace(artistMusicBrainzId)) { url = string.Format("/ws/2/release/?query=\"{0}\" AND arid:{1}", WebUtility.UrlEncode(searchInfo.Name), artistMusicBrainzId); } else { // I'm sure there is a better way but for now it resolves search for 12" Mixes var queryName = searchInfo.Name.Replace("\"", string.Empty); url = string.Format("/ws/2/release/?query=\"{0}\" AND artist:\"{1}\"", WebUtility.UrlEncode(queryName), WebUtility.UrlEncode(searchInfo.GetAlbumArtist())); } } if (!string.IsNullOrWhiteSpace(url)) { using (var response = await GetMusicBrainzResponse(url, cancellationToken).ConfigureAwait(false)) { using (var stream = response.Content) { return GetResultsFromResponse(stream); } } } return Enumerable.Empty<RemoteSearchResult>(); } private IEnumerable<RemoteSearchResult> GetResultsFromResponse(Stream stream) { using (var oReader = new StreamReader(stream, Encoding.UTF8)) { var settings = new XmlReaderSettings() { ValidationType = ValidationType.None, CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true }; using (var reader = XmlReader.Create(oReader, settings)) { var results = ReleaseResult.Parse(reader); return results.Select(i => { var result = new RemoteSearchResult { Name = i.Title, ProductionYear = i.Year }; if (i.Artists.Count > 0) { result.AlbumArtist = new RemoteSearchResult { SearchProviderName = Name, Name = i.Artists[0].Item1 }; result.AlbumArtist.SetProviderId(MetadataProviders.MusicBrainzArtist, i.Artists[0].Item2); } if (!string.IsNullOrWhiteSpace(i.ReleaseId)) { result.SetProviderId(MetadataProviders.MusicBrainzAlbum, i.ReleaseId); } if (!string.IsNullOrWhiteSpace(i.ReleaseGroupId)) { result.SetProviderId(MetadataProviders.MusicBrainzReleaseGroup, i.ReleaseGroupId); } return result; }); } } } public async Task<MetadataResult<MusicAlbum>> GetMetadata(AlbumInfo id, CancellationToken cancellationToken) { var releaseId = id.GetReleaseId(); var releaseGroupId = id.GetReleaseGroupId(); var result = new MetadataResult<MusicAlbum> { Item = new MusicAlbum() }; // If we have a release group Id but not a release Id... if (string.IsNullOrWhiteSpace(releaseId) && !string.IsNullOrWhiteSpace(releaseGroupId)) { releaseId = await GetReleaseIdFromReleaseGroupId(releaseGroupId, cancellationToken).ConfigureAwait(false); result.HasMetadata = true; } if (string.IsNullOrWhiteSpace(releaseId)) { var artistMusicBrainzId = id.GetMusicBrainzArtistId(); var releaseResult = await GetReleaseResult(artistMusicBrainzId, id.GetAlbumArtist(), id.Name, cancellationToken).ConfigureAwait(false); if (releaseResult != null) { if (!string.IsNullOrWhiteSpace(releaseResult.ReleaseId)) { releaseId = releaseResult.ReleaseId; result.HasMetadata = true; } if (!string.IsNullOrWhiteSpace(releaseResult.ReleaseGroupId)) { releaseGroupId = releaseResult.ReleaseGroupId; result.HasMetadata = true; } result.Item.ProductionYear = releaseResult.Year; result.Item.Overview = releaseResult.Overview; } } // If we have a release Id but not a release group Id... if (!string.IsNullOrWhiteSpace(releaseId) && string.IsNullOrWhiteSpace(releaseGroupId)) { releaseGroupId = await GetReleaseGroupFromReleaseId(releaseId, cancellationToken).ConfigureAwait(false); result.HasMetadata = true; } if (!string.IsNullOrWhiteSpace(releaseId) || !string.IsNullOrWhiteSpace(releaseGroupId)) { result.HasMetadata = true; } if (result.HasMetadata) { if (!string.IsNullOrEmpty(releaseId)) { result.Item.SetProviderId(MetadataProviders.MusicBrainzAlbum, releaseId); } if (!string.IsNullOrEmpty(releaseGroupId)) { result.Item.SetProviderId(MetadataProviders.MusicBrainzReleaseGroup, releaseGroupId); } } return result; } public string Name => "MusicBrainz"; private Task<ReleaseResult> GetReleaseResult(string artistMusicBrainId, string artistName, string albumName, CancellationToken cancellationToken) { if (!string.IsNullOrEmpty(artistMusicBrainId)) { return GetReleaseResult(albumName, artistMusicBrainId, cancellationToken); } if (string.IsNullOrWhiteSpace(artistName)) { return Task.FromResult(new ReleaseResult()); } return GetReleaseResultByArtistName(albumName, artistName, cancellationToken); } private async Task<ReleaseResult> GetReleaseResult(string albumName, string artistId, CancellationToken cancellationToken) { var url = string.Format("/ws/2/release/?query=\"{0}\" AND arid:{1}", WebUtility.UrlEncode(albumName), artistId); using (var response = await GetMusicBrainzResponse(url, cancellationToken).ConfigureAwait(false)) using (var stream = response.Content) using (var oReader = new StreamReader(stream, Encoding.UTF8)) { var settings = new XmlReaderSettings() { ValidationType = ValidationType.None, CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true }; using (var reader = XmlReader.Create(oReader, settings)) { return ReleaseResult.Parse(reader).FirstOrDefault(); } } } private async Task<ReleaseResult> GetReleaseResultByArtistName(string albumName, string artistName, CancellationToken cancellationToken) { var url = string.Format("/ws/2/release/?query=\"{0}\" AND artist:\"{1}\"", WebUtility.UrlEncode(albumName), WebUtility.UrlEncode(artistName)); using (var response = await GetMusicBrainzResponse(url, cancellationToken).ConfigureAwait(false)) using (var stream = response.Content) using (var oReader = new StreamReader(stream, Encoding.UTF8)) { var settings = new XmlReaderSettings() { ValidationType = ValidationType.None, CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true }; using (var reader = XmlReader.Create(oReader, settings)) { return ReleaseResult.Parse(reader).FirstOrDefault(); } } } private class ReleaseResult { public string ReleaseId; public string ReleaseGroupId; public string Title; public string Overview; public int? Year; public List<ValueTuple<string, string>> Artists = new List<ValueTuple<string, string>>(); public static IEnumerable<ReleaseResult> Parse(XmlReader reader) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "release-list": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subReader = reader.ReadSubtree()) { return ParseReleaseList(subReader).ToList(); } } default: { reader.Skip(); break; } } } else { reader.Read(); } } return Enumerable.Empty<ReleaseResult>(); } private static IEnumerable<ReleaseResult> ParseReleaseList(XmlReader reader) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "release": { if (reader.IsEmptyElement) { reader.Read(); continue; } var releaseId = reader.GetAttribute("id"); using (var subReader = reader.ReadSubtree()) { var release = ParseRelease(subReader, releaseId); if (release != null) { yield return release; } } break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } } private static ReleaseResult ParseRelease(XmlReader reader, string releaseId) { var result = new ReleaseResult { ReleaseId = releaseId }; reader.MoveToContent(); reader.Read(); // http://stackoverflow.com/questions/2299632/why-does-xmlreader-skip-every-other-element-if-there-is-no-whitespace-separator // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "title": { result.Title = reader.ReadElementContentAsString(); break; } case "date": { var val = reader.ReadElementContentAsString(); if (DateTime.TryParse(val, out var date)) { result.Year = date.Year; } break; } case "annotation": { result.Overview = reader.ReadElementContentAsString(); break; } case "release-group": { result.ReleaseGroupId = reader.GetAttribute("id"); reader.Skip(); break; } case "artist-credit": { // TODO /* * <artist-credit> <name-credit> <artist id="e225cda5-882d-4b80-b8a3-b36d7175b1ea"> <name>SARCASTIC+ZOOKEEPER</name> <sort-name>SARCASTIC+ZOOKEEPER</sort-name> </artist> </name-credit> </artist-credit> */ using (var subReader = reader.ReadSubtree()) { var artist = ParseArtistCredit(subReader); if (!string.IsNullOrEmpty(artist.Item1)) { result.Artists.Add(artist); } } break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } return result; } } private static ValueTuple<string, string> ParseArtistCredit(XmlReader reader) { reader.MoveToContent(); reader.Read(); // http://stackoverflow.com/questions/2299632/why-does-xmlreader-skip-every-other-element-if-there-is-no-whitespace-separator // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "name-credit": { using (var subReader = reader.ReadSubtree()) { return ParseArtistNameCredit(subReader); } } default: { reader.Skip(); break; } } } else { reader.Read(); } } return new ValueTuple<string, string>(); } private static (string, string) ParseArtistNameCredit(XmlReader reader) { reader.MoveToContent(); reader.Read(); // http://stackoverflow.com/questions/2299632/why-does-xmlreader-skip-every-other-element-if-there-is-no-whitespace-separator // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "artist": { var id = reader.GetAttribute("id"); using (var subReader = reader.ReadSubtree()) { return ParseArtistArtistCredit(subReader, id); } } default: { reader.Skip(); break; } } } else { reader.Read(); } } return (null, null); } private static (string name, string id) ParseArtistArtistCredit(XmlReader reader, string artistId) { reader.MoveToContent(); reader.Read(); string name = null; // http://stackoverflow.com/questions/2299632/why-does-xmlreader-skip-every-other-element-if-there-is-no-whitespace-separator // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "name": { name = reader.ReadElementContentAsString(); break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } return (name, artistId); } private async Task<string> GetReleaseIdFromReleaseGroupId(string releaseGroupId, CancellationToken cancellationToken) { var url = string.Format("/ws/2/release?release-group={0}", releaseGroupId); using (var response = await GetMusicBrainzResponse(url, cancellationToken).ConfigureAwait(false)) using (var stream = response.Content) using (var oReader = new StreamReader(stream, Encoding.UTF8)) { var settings = new XmlReaderSettings() { ValidationType = ValidationType.None, CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true }; using (var reader = XmlReader.Create(oReader, settings)) { var result = ReleaseResult.Parse(reader).FirstOrDefault(); if (result != null) { return result.ReleaseId; } } } return null; } /// <summary> /// Gets the release group id internal. /// </summary> /// <param name="releaseEntryId">The release entry id.</param> /// <param name="cancellationToken">The cancellation token.</param> /// <returns>Task{System.String}.</returns> private async Task<string> GetReleaseGroupFromReleaseId(string releaseEntryId, CancellationToken cancellationToken) { var url = string.Format("/ws/2/release-group/?query=reid:{0}", releaseEntryId); using (var response = await GetMusicBrainzResponse(url, cancellationToken).ConfigureAwait(false)) using (var stream = response.Content) using (var oReader = new StreamReader(stream, Encoding.UTF8)) { var settings = new XmlReaderSettings() { ValidationType = ValidationType.None, CheckCharacters = false, IgnoreProcessingInstructions = true, IgnoreComments = true }; using (var reader = XmlReader.Create(oReader, settings)) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "release-group-list": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subReader = reader.ReadSubtree()) { return GetFirstReleaseGroupId(subReader); } } default: { reader.Skip(); break; } } } else { reader.Read(); } } return null; } } } private string GetFirstReleaseGroupId(XmlReader reader) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "release-group": { return reader.GetAttribute("id"); } default: { reader.Skip(); break; } } } else { reader.Read(); } } return null; } /// <summary> /// Makes request to MusicBrainz server and awaits a response. /// A 503 Service Unavailable response indicates throttling to maintain a rate limit. /// A number of retries shall be made in order to try and satisfy the request before /// giving up and returning null. /// </summary> internal async Task<HttpResponseInfo> GetMusicBrainzResponse(string url, CancellationToken cancellationToken) { var options = new HttpRequestOptions { Url = MusicBrainzBaseUrl.TrimEnd('/') + url, CancellationToken = cancellationToken, // MusicBrainz request a contact email address is supplied, as comment, in user agent field: // https://musicbrainz.org/doc/XML_Web_Service/Rate_Limiting#User-Agent UserAgent = string.Format("{0} ( {1} )", _appHost.ApplicationUserAgent, _appHost.ApplicationUserAgentAddress), BufferContent = false }; HttpResponseInfo response; var attempts = 0u; do { attempts++; if (_stopWatchMusicBrainz.ElapsedMilliseconds < MusicBrainzQueryIntervalMs) { // MusicBrainz is extremely adamant about limiting to one request per second var delayMs = MusicBrainzQueryIntervalMs - _stopWatchMusicBrainz.ElapsedMilliseconds; await Task.Delay((int)delayMs, cancellationToken).ConfigureAwait(false); } // Write time since last request to debug log as evidence we're meeting rate limit // requirement, before resetting stopwatch back to zero. _logger.LogDebug("GetMusicBrainzResponse: Time since previous request: {0} ms", _stopWatchMusicBrainz.ElapsedMilliseconds); _stopWatchMusicBrainz.Restart(); response = await _httpClient.SendAsync(options, "GET").ConfigureAwait(false); // We retry a finite number of times, and only whilst MB is indcating 503 (throttling) } while (attempts < MusicBrainzQueryAttempts && response.StatusCode == HttpStatusCode.ServiceUnavailable); // Log error if unable to query MB database due to throttling if (attempts == MusicBrainzQueryAttempts && response.StatusCode == HttpStatusCode.ServiceUnavailable ) { _logger.LogError("GetMusicBrainzResponse: 503 Service Unavailable (throttled) response received {0} times whilst requesting {1}", attempts, options.Url); } return response; } public int Order => 0; public Task<HttpResponseInfo> GetImageResponse(string url, CancellationToken cancellationToken) { throw new NotImplementedException(); } } }