Fixed: Duplicate notifications for failed health checks

(cherry picked from commit c0e54773e213f526a5046fa46aa7b57532471128)
pull/9178/head
Mark McDowall 1 year ago committed by Bogdan
parent 80f6033595
commit 7d85922f8d

@ -6,6 +6,7 @@ using NzbDrone.Common.Cache;
using NzbDrone.Common.EnvironmentInfo;
using NzbDrone.Common.Messaging;
using NzbDrone.Common.Reflection;
using NzbDrone.Common.TPL;
using NzbDrone.Core.Lifecycle;
using NzbDrone.Core.Messaging.Commands;
using NzbDrone.Core.Messaging.Events;
@ -27,30 +28,31 @@ namespace NzbDrone.Core.HealthCheck
private readonly IProvideHealthCheck[] _startupHealthChecks;
private readonly IProvideHealthCheck[] _scheduledHealthChecks;
private readonly Dictionary<Type, IEventDrivenHealthCheck[]> _eventDrivenHealthChecks;
private readonly IServerSideNotificationService _serverSideNotificationService;
private readonly IEventAggregator _eventAggregator;
private readonly ICacheManager _cacheManager;
private readonly Logger _logger;
private readonly ICached<HealthCheck> _healthCheckResults;
private readonly HashSet<IProvideHealthCheck> _pendingHealthChecks;
private readonly Debouncer _debounce;
private bool _hasRunHealthChecksAfterGracePeriod;
private bool _isRunningHealthChecksAfterGracePeriod;
public HealthCheckService(IEnumerable<IProvideHealthCheck> healthChecks,
IServerSideNotificationService serverSideNotificationService,
IEventAggregator eventAggregator,
ICacheManager cacheManager,
IRuntimeInfo runtimeInfo,
Logger logger)
{
_healthChecks = healthChecks.ToArray();
_serverSideNotificationService = serverSideNotificationService;
_eventAggregator = eventAggregator;
_cacheManager = cacheManager;
_logger = logger;
_healthCheckResults = _cacheManager.GetCache<HealthCheck>(GetType());
_pendingHealthChecks = new HashSet<IProvideHealthCheck>();
_debounce = new Debouncer(ProcessHealthChecks, TimeSpan.FromSeconds(5));
_startupHealthChecks = _healthChecks.Where(v => v.CheckOnStartup).ToArray();
_scheduledHealthChecks = _healthChecks.Where(v => v.CheckOnSchedule).ToArray();
@ -77,63 +79,86 @@ namespace NzbDrone.Core.HealthCheck
.ToDictionary(g => g.Key, g => g.ToArray());
}
private void PerformHealthCheck(IProvideHealthCheck[] healthChecks, bool performServerChecks = false)
private void ProcessHealthChecks()
{
var results = healthChecks.Select(c => c.Check())
.ToList();
List<IProvideHealthCheck> healthChecks;
if (performServerChecks)
lock (_pendingHealthChecks)
{
results.AddRange(_serverSideNotificationService.GetServerChecks());
healthChecks = _pendingHealthChecks.ToList();
_pendingHealthChecks.Clear();
}
foreach (var result in results)
{
if (result.Type == HealthCheckResult.Ok)
{
var previous = _healthCheckResults.Find(result.Source.Name);
_debounce.Pause();
if (previous != null)
{
_eventAggregator.PublishEvent(new HealthCheckRestoredEvent(previous, !_hasRunHealthChecksAfterGracePeriod));
}
try
{
var results = healthChecks.Select(c => c.Check())
.ToList();
_healthCheckResults.Remove(result.Source.Name);
}
else
foreach (var result in results)
{
if (_healthCheckResults.Find(result.Source.Name) == null)
if (result.Type == HealthCheckResult.Ok)
{
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, !_hasRunHealthChecksAfterGracePeriod));
var previous = _healthCheckResults.Find(result.Source.Name);
if (previous != null)
{
_eventAggregator.PublishEvent(new HealthCheckRestoredEvent(previous, !_hasRunHealthChecksAfterGracePeriod));
}
_healthCheckResults.Remove(result.Source.Name);
}
else
{
if (_healthCheckResults.Find(result.Source.Name) == null)
{
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, !_hasRunHealthChecksAfterGracePeriod));
}
_healthCheckResults.Set(result.Source.Name, result);
_healthCheckResults.Set(result.Source.Name, result);
}
}
}
finally
{
_debounce.Resume();
}
_eventAggregator.PublishEvent(new HealthCheckCompleteEvent());
}
public void Execute(CheckHealthCommand message)
{
if (message.Trigger == CommandTrigger.Manual)
{
PerformHealthCheck(_healthChecks, true);
}
else
var healthChecks = message.Trigger == CommandTrigger.Manual ? _healthChecks : _scheduledHealthChecks;
lock (_pendingHealthChecks)
{
PerformHealthCheck(_scheduledHealthChecks, true);
foreach (var healthCheck in healthChecks)
{
_pendingHealthChecks.Add(healthCheck);
}
}
ProcessHealthChecks();
}
public void HandleAsync(ApplicationStartedEvent message)
{
PerformHealthCheck(_startupHealthChecks, true);
lock (_pendingHealthChecks)
{
foreach (var healthCheck in _startupHealthChecks)
{
_pendingHealthChecks.Add(healthCheck);
}
}
ProcessHealthChecks();
}
public void HandleAsync(IEvent message)
{
if (message is HealthCheckCompleteEvent)
if (message is HealthCheckCompleteEvent || message is ApplicationStartedEvent)
{
return;
}
@ -144,7 +169,16 @@ namespace NzbDrone.Core.HealthCheck
{
_isRunningHealthChecksAfterGracePeriod = true;
PerformHealthCheck(_startupHealthChecks);
lock (_pendingHealthChecks)
{
foreach (var healthCheck in _startupHealthChecks)
{
_pendingHealthChecks.Add(healthCheck);
}
}
// Call it directly so it's not debounced and any alerts can be sent.
ProcessHealthChecks();
// Update after running health checks so new failure notifications aren't sent 2x.
_hasRunHealthChecksAfterGracePeriod = true;
@ -179,8 +213,12 @@ namespace NzbDrone.Core.HealthCheck
}
}
// TODO: Add debounce
PerformHealthCheck(filteredChecks.ToArray());
lock (_pendingHealthChecks)
{
filteredChecks.ForEach(h => _pendingHealthChecks.Add(h));
}
_debounce.Execute();
}
}
}

@ -9,63 +9,67 @@ using NzbDrone.Common.EnvironmentInfo;
using NzbDrone.Common.Http;
using NzbDrone.Common.Serializer;
using NzbDrone.Core.Configuration;
using NzbDrone.Core.Localization;
namespace NzbDrone.Core.HealthCheck
{
public interface IServerSideNotificationService
{
public List<HealthCheck> GetServerChecks();
}
public class ServerSideNotificationService : IServerSideNotificationService
public class ServerSideNotificationService : HealthCheckBase
{
private readonly IHttpClient _client;
private readonly IRadarrCloudRequestBuilder _cloudRequestBuilder;
private readonly IConfigFileProvider _configFileProvider;
private readonly IHttpRequestBuilderFactory _cloudRequestBuilder;
private readonly Logger _logger;
private readonly ICached<List<HealthCheck>> _cache;
private readonly ICached<HealthCheck> _cache;
public ServerSideNotificationService(IHttpClient client,
IConfigFileProvider configFileProvider,
IRadarrCloudRequestBuilder cloudRequestBuilder,
IConfigFileProvider configFileProvider,
ILocalizationService localizationService,
ICacheManager cacheManager,
Logger logger)
: base(localizationService)
{
_client = client;
_cloudRequestBuilder = cloudRequestBuilder;
_configFileProvider = configFileProvider;
_cloudRequestBuilder = cloudRequestBuilder.Services;
_logger = logger;
_cache = cacheManager.GetCache<List<HealthCheck>>(GetType());
_cache = cacheManager.GetCache<HealthCheck>(GetType());
}
public List<HealthCheck> GetServerChecks()
public override HealthCheck Check()
{
return _cache.Get("ServerChecks", () => RetrieveServerChecks(), TimeSpan.FromHours(2));
return _cache.Get("ServerChecks", RetrieveServerChecks, TimeSpan.FromHours(2));
}
private List<HealthCheck> RetrieveServerChecks()
private HealthCheck RetrieveServerChecks()
{
var request = _cloudRequestBuilder.Create()
.Resource("/notification")
.AddQueryParam("version", BuildInfo.Version)
.AddQueryParam("os", OsInfo.Os.ToString().ToLowerInvariant())
.AddQueryParam("arch", RuntimeInformation.OSArchitecture)
.AddQueryParam("runtime", "netcore")
.AddQueryParam("branch", _configFileProvider.Branch)
.Build();
var request = _cloudRequestBuilder.Services.Create()
.Resource("/notification")
.AddQueryParam("version", BuildInfo.Version)
.AddQueryParam("os", OsInfo.Os.ToString().ToLowerInvariant())
.AddQueryParam("arch", RuntimeInformation.OSArchitecture)
.AddQueryParam("branch", _configFileProvider.Branch)
.Build();
try
{
_logger.Trace("Getting server side health notifications");
_logger.Trace("Getting notifications");
var response = _client.Execute(request);
var result = Json.Deserialize<List<ServerNotificationResponse>>(response.Content);
return result.Select(x => new HealthCheck(GetType(), x.Type, x.Message, x.WikiUrl)).ToList();
var checks = result.Select(x => new HealthCheck(GetType(), x.Type, x.Message, x.WikiUrl)).ToList();
// Only one health check is supported, services returns an ordered list, so use the first one
return checks.FirstOrDefault() ?? new HealthCheck(GetType());
}
catch (Exception ex)
{
_logger.Error(ex, "Failed to retrieve server notifications");
return new List<HealthCheck>();
_logger.Error(ex, "Failed to retrieve notifications");
return new HealthCheck(GetType());
}
}
}

Loading…
Cancel
Save