using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Xml; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Providers; using MediaBrowser.Model.Entities; using MediaBrowser.Model.IO; using MediaBrowser.Model.Xml; using Microsoft.Extensions.Logging; namespace MediaBrowser.LocalMetadata.Parsers { /// /// Provides a base class for parsing metadata xml /// /// public class BaseItemXmlParser where T : BaseItem { /// /// The logger /// protected ILogger Logger { get; private set; } protected IProviderManager ProviderManager { get; private set; } private Dictionary _validProviderIds; protected IXmlReaderSettingsFactory XmlReaderSettingsFactory { get; private set; } protected IFileSystem FileSystem { get; private set; } /// /// Initializes a new instance of the class. /// /// The logger. public BaseItemXmlParser(ILogger logger, IProviderManager providerManager, IXmlReaderSettingsFactory xmlReaderSettingsFactory, IFileSystem fileSystem) { Logger = logger; ProviderManager = providerManager; XmlReaderSettingsFactory = xmlReaderSettingsFactory; FileSystem = fileSystem; } /// /// Fetches metadata for an item from one xml file /// /// The item. /// The metadata file. /// The cancellation token. /// public void Fetch(MetadataResult item, string metadataFile, CancellationToken cancellationToken) { if (item == null) { throw new ArgumentNullException(nameof(item)); } if (string.IsNullOrEmpty(metadataFile)) { throw new ArgumentException("The metadata file was empty or null.", nameof(metadataFile)); } var settings = XmlReaderSettingsFactory.Create(false); settings.CheckCharacters = false; settings.IgnoreProcessingInstructions = true; settings.IgnoreComments = true; _validProviderIds = _validProviderIds = new Dictionary(StringComparer.OrdinalIgnoreCase); var idInfos = ProviderManager.GetExternalIdInfos(item.Item); foreach (var info in idInfos) { var id = info.Key + "Id"; if (!_validProviderIds.ContainsKey(id)) { _validProviderIds.Add(id, info.Key); } } //Additional Mappings _validProviderIds.Add("IMDB", "Imdb"); //Fetch(item, metadataFile, settings, Encoding.GetEncoding("ISO-8859-1"), cancellationToken); Fetch(item, metadataFile, settings, Encoding.UTF8, cancellationToken); } /// /// Fetches the specified item. /// /// The item. /// The metadata file. /// The settings. /// The encoding. /// The cancellation token. private void Fetch(MetadataResult item, string metadataFile, XmlReaderSettings settings, Encoding encoding, CancellationToken cancellationToken) { item.ResetPeople(); using (var fileStream = FileSystem.OpenRead(metadataFile)) { using (var streamReader = new StreamReader(fileStream, encoding)) { // Use XmlReader for best performance using (var reader = XmlReader.Create(streamReader, settings)) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { cancellationToken.ThrowIfCancellationRequested(); if (reader.NodeType == XmlNodeType.Element) { FetchDataFromXmlNode(reader, item); } else { reader.Read(); } } } } } } private readonly CultureInfo _usCulture = new CultureInfo("en-US"); /// /// Fetches metadata from one Xml Element /// /// The reader. /// The item result. protected virtual void FetchDataFromXmlNode(XmlReader reader, MetadataResult itemResult) { var item = itemResult.Item; switch (reader.Name) { // DateCreated case "Added": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { if (DateTime.TryParse(val, out var added)) { item.DateCreated = added.ToUniversalTime(); } else { Logger.LogWarning("Invalid Added value found: " + val); } } break; } case "OriginalTitle": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(val)) { item.OriginalTitle = val; } break; } case "LocalTitle": item.Name = reader.ReadElementContentAsString(); break; case "CriticRating": { var text = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(text)) { if (float.TryParse(text, NumberStyles.Any, _usCulture, out var value)) { item.CriticRating = value; } } break; } case "SortTitle": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.ForcedSortName = val; } break; } case "Overview": case "Description": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.Overview = val; } break; } case "Language": { var val = reader.ReadElementContentAsString(); item.PreferredMetadataLanguage = val; break; } case "CountryCode": { var val = reader.ReadElementContentAsString(); item.PreferredMetadataCountryCode = val; break; } case "PlaceOfBirth": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { var person = item as Person; if (person != null) { person.ProductionLocations = new string[] { val }; } } break; } case "LockedFields": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.LockedFields = val.Split('|').Select(i => { if (Enum.TryParse(i, true, out MetadataFields field)) { return (MetadataFields?)field; } return null; }).Where(i => i.HasValue).Select(i => i.Value).ToArray(); } break; } case "TagLines": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromTaglinesNode(subtree, item); } } else { reader.Read(); } break; } case "Countries": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromCountriesNode(subtree, item); } } else { reader.Read(); } break; } case "ContentRating": case "MPAARating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { item.OfficialRating = rating; } break; } case "CustomRating": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.CustomRating = val; } break; } case "RunningTime": { var text = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(text)) { if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out var runtime)) { item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks; } } break; } case "AspectRatio": { var val = reader.ReadElementContentAsString(); var hasAspectRatio = item as IHasAspectRatio; if (!string.IsNullOrWhiteSpace(val) && hasAspectRatio != null) { hasAspectRatio.AspectRatio = val; } break; } case "LockData": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.IsLocked = string.Equals("true", val, StringComparison.OrdinalIgnoreCase); } break; } case "Network": { foreach (var name in SplitNames(reader.ReadElementContentAsString())) { if (string.IsNullOrWhiteSpace(name)) { continue; } item.AddStudio(name); } break; } case "Director": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Director })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Writer": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Writer })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Actors": { var actors = reader.ReadInnerXml(); if (actors.Contains("<")) { // This is one of the mis-named "Actors" full nodes created by MB2 // Create a reader and pass it to the persons node processor FetchDataFromPersonsNode(XmlReader.Create(new StringReader("" + actors + "")), itemResult); } else { // Old-style piped string foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } } break; } case "GuestStars": { foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar })) { if (string.IsNullOrWhiteSpace(p.Name)) { continue; } itemResult.AddPerson(p); } break; } case "Trailer": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTrailerUrl(val); } break; } case "DisplayOrder": { var val = reader.ReadElementContentAsString(); var hasDisplayOrder = item as IHasDisplayOrder; if (hasDisplayOrder != null) { if (!string.IsNullOrWhiteSpace(val)) { hasDisplayOrder.DisplayOrder = val; } } break; } case "Trailers": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchDataFromTrailersNode(subtree, item); } } else { reader.Read(); } break; } case "ProductionYear": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { if (int.TryParse(val, out var productionYear) && productionYear > 1850) { item.ProductionYear = productionYear; } } break; } case "Rating": case "IMDBrating": { var rating = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(rating)) { // All external meta is saving this as '.' for decimal I believe...but just to be sure if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out var val)) { item.CommunityRating = val; } } break; } case "BirthDate": case "PremiereDate": case "FirstAired": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var airDate) && airDate.Year > 1850) { item.PremiereDate = airDate.ToUniversalTime(); item.ProductionYear = airDate.Year; } } break; } case "DeathDate": case "EndDate": { var firstAired = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(firstAired)) { if (DateTime.TryParseExact(firstAired, "yyyy-MM-dd", CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var airDate) && airDate.Year > 1850) { item.EndDate = airDate.ToUniversalTime(); } } break; } case "CollectionNumber": var tmdbCollection = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tmdbCollection)) { item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection); } break; case "Genres": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromGenresNode(subtree, item); } } else { reader.Read(); } break; } case "Tags": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromTagsNode(subtree, item); } } else { reader.Read(); } break; } case "Persons": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchDataFromPersonsNode(subtree, itemResult); } } else { reader.Read(); } break; } case "Studios": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { FetchFromStudiosNode(subtree, item); } } else { reader.Read(); } break; } case "Shares": { if (!reader.IsEmptyElement) { using (var subtree = reader.ReadSubtree()) { var hasShares = item as IHasShares; if (hasShares != null) { FetchFromSharesNode(subtree, hasShares); } } } else { reader.Read(); } break; } case "Format3D": { var val = reader.ReadElementContentAsString(); var video = item as Video; if (video != null) { if (string.Equals("HSBS", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.HalfSideBySide; } else if (string.Equals("HTAB", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.HalfTopAndBottom; } else if (string.Equals("FTAB", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.FullTopAndBottom; } else if (string.Equals("FSBS", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.FullSideBySide; } else if (string.Equals("MVC", val, StringComparison.OrdinalIgnoreCase)) { video.Video3DFormat = Video3DFormat.MVC; } } break; } default: { string readerName = reader.Name; if (_validProviderIds.TryGetValue(readerName, out string providerIdValue)) { var id = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(id)) { item.SetProviderId(providerIdValue, id); } } else { reader.Skip(); } break; } } } private void FetchFromSharesNode(XmlReader reader, IHasShares item) { var list = new List(); reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Share": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subReader = reader.ReadSubtree()) { var child = GetShare(subReader); if (child != null) { list.Add(child); } } break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } item.Shares = list.ToArray(); } private Share GetShareFromNode(XmlReader reader) { var share = new Share(); reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "UserId": { share.UserId = reader.ReadElementContentAsString(); break; } case "CanEdit": { share.CanEdit = string.Equals(reader.ReadElementContentAsString(), true.ToString(), StringComparison.OrdinalIgnoreCase); break; } default: reader.Skip(); break; } } else { reader.Read(); } } return share; } private void FetchFromCountriesNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Country": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from taglines node. /// /// The reader. /// The item. private void FetchFromTaglinesNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tagline": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.Tagline = val; } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from genres node. /// /// The reader. /// The item. private void FetchFromGenresNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Genre": { var genre = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(genre)) { item.AddGenre(genre); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private void FetchFromTagsNode(XmlReader reader, BaseItem item) { reader.MoveToContent(); reader.Read(); var tags = new List(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Tag": { var tag = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(tag)) { tags.Add(tag); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } item.Tags = tags.Distinct(StringComparer.Ordinal).ToArray(); } /// /// Fetches the data from persons node. /// /// The reader. /// The item. private void FetchDataFromPersonsNode(XmlReader reader, MetadataResult item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Person": case "Actor": { if (reader.IsEmptyElement) { reader.Read(); continue; } using (var subtree = reader.ReadSubtree()) { foreach (var person in GetPersonsFromXmlNode(subtree)) { if (string.IsNullOrWhiteSpace(person.Name)) { continue; } item.AddPerson(person); } } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } private void FetchDataFromTrailersNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Trailer": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { item.AddTrailerUrl(val); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Fetches from studios node. /// /// The reader. /// The item. private void FetchFromStudiosNode(XmlReader reader, T item) { reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Studio": { var studio = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(studio)) { item.AddStudio(studio); } break; } default: reader.Skip(); break; } } else { reader.Read(); } } } /// /// Gets the persons from XML node. /// /// The reader. /// IEnumerable{PersonInfo}. private IEnumerable GetPersonsFromXmlNode(XmlReader reader) { var name = string.Empty; var type = PersonType.Actor; // If type is not specified assume actor var role = string.Empty; int? sortOrder = null; reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Name": name = reader.ReadElementContentAsString() ?? string.Empty; break; case "Type": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { type = val; } break; } case "Role": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { role = val; } break; } case "SortOrder": { var val = reader.ReadElementContentAsString(); if (!string.IsNullOrWhiteSpace(val)) { if (int.TryParse(val, NumberStyles.Integer, _usCulture, out var intVal)) { sortOrder = intVal; } } break; } default: reader.Skip(); break; } } else { reader.Read(); } } var personInfo = new PersonInfo { Name = name.Trim(), Role = role, Type = type, SortOrder = sortOrder }; return new[] { personInfo }; } protected LinkedChild GetLinkedChild(XmlReader reader) { var linkedItem = new LinkedChild { Type = LinkedChildType.Manual }; reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "Path": { linkedItem.Path = reader.ReadElementContentAsString(); break; } case "ItemId": { linkedItem.LibraryItemId = reader.ReadElementContentAsString(); break; } default: reader.Skip(); break; } } else { reader.Read(); } } // This is valid if (!string.IsNullOrWhiteSpace(linkedItem.Path) || !string.IsNullOrWhiteSpace(linkedItem.LibraryItemId)) { return linkedItem; } return null; } protected Share GetShare(XmlReader reader) { var item = new Share(); reader.MoveToContent(); reader.Read(); // Loop through each element while (!reader.EOF && reader.ReadState == ReadState.Interactive) { if (reader.NodeType == XmlNodeType.Element) { switch (reader.Name) { case "UserId": { item.UserId = reader.ReadElementContentAsString(); break; } case "CanEdit": { item.CanEdit = string.Equals(reader.ReadElementContentAsString(), "true", StringComparison.OrdinalIgnoreCase); break; } default: { reader.Skip(); break; } } } else { reader.Read(); } } // This is valid if (!string.IsNullOrWhiteSpace(item.UserId)) { return item; } return null; } /// /// Used to split names of comma or pipe delimeted genres and people /// /// The value. /// IEnumerable{System.String}. private IEnumerable SplitNames(string value) { value = value ?? string.Empty; // Only split by comma if there is no pipe in the string // We have to be careful to not split names like Matthew, Jr. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' }; value = value.Trim().Trim(separator); return string.IsNullOrWhiteSpace(value) ? Array.Empty() : Split(value, separator, StringSplitOptions.RemoveEmptyEntries); } /// /// Provides an additional overload for string.split /// /// The val. /// The separators. /// The options. /// System.String[][]. private string[] Split(string val, char[] separators, StringSplitOptions options) { return val.Split(separators, options); } } }