Fixed: Delay health check notifications on startup

Closes #1598

(cherry picked from commit 07f0db477a91b39c1f4b884775c08a55ada487cf)
pull/2126/head
Mark McDowall 3 years ago committed by Qstick
parent 6f17057f31
commit a18bbeee5a

@ -5,10 +5,12 @@ namespace NzbDrone.Core.HealthCheck
public class HealthCheckFailedEvent : IEvent public class HealthCheckFailedEvent : IEvent
{ {
public HealthCheck HealthCheck { get; private set; } public HealthCheck HealthCheck { get; private set; }
public bool IsInStartupGraceperiod { get; private set; }
public HealthCheckFailedEvent(HealthCheck healthCheck) public HealthCheckFailedEvent(HealthCheck healthCheck, bool isInStartupGraceperiod)
{ {
HealthCheck = healthCheck; HealthCheck = healthCheck;
IsInStartupGraceperiod = isInStartupGraceperiod;
} }
} }
} }

@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using NLog; using NLog;
using NzbDrone.Common.Cache; using NzbDrone.Common.Cache;
using NzbDrone.Common.EnvironmentInfo;
using NzbDrone.Common.Messaging; using NzbDrone.Common.Messaging;
using NzbDrone.Common.Reflection; using NzbDrone.Common.Reflection;
using NzbDrone.Core.Lifecycle; using NzbDrone.Core.Lifecycle;
@ -21,6 +22,7 @@ namespace NzbDrone.Core.HealthCheck
IHandleAsync<ApplicationStartedEvent>, IHandleAsync<ApplicationStartedEvent>,
IHandleAsync<IEvent> IHandleAsync<IEvent>
{ {
private readonly DateTime _startupGracePeriodEndTime;
private readonly IProvideHealthCheck[] _healthChecks; private readonly IProvideHealthCheck[] _healthChecks;
private readonly IProvideHealthCheck[] _startupHealthChecks; private readonly IProvideHealthCheck[] _startupHealthChecks;
private readonly IProvideHealthCheck[] _scheduledHealthChecks; private readonly IProvideHealthCheck[] _scheduledHealthChecks;
@ -32,10 +34,14 @@ namespace NzbDrone.Core.HealthCheck
private readonly ICached<HealthCheck> _healthCheckResults; private readonly ICached<HealthCheck> _healthCheckResults;
private bool _hasRunHealthChecksAfterGracePeriod = false;
private bool _isRunningHealthChecksAfterGracePeriod = false;
public HealthCheckService(IEnumerable<IProvideHealthCheck> healthChecks, public HealthCheckService(IEnumerable<IProvideHealthCheck> healthChecks,
IServerSideNotificationService serverSideNotificationService, IServerSideNotificationService serverSideNotificationService,
IEventAggregator eventAggregator, IEventAggregator eventAggregator,
ICacheManager cacheManager, ICacheManager cacheManager,
IRuntimeInfo runtimeInfo,
Logger logger) Logger logger)
{ {
_healthChecks = healthChecks.ToArray(); _healthChecks = healthChecks.ToArray();
@ -49,6 +55,7 @@ namespace NzbDrone.Core.HealthCheck
_startupHealthChecks = _healthChecks.Where(v => v.CheckOnStartup).ToArray(); _startupHealthChecks = _healthChecks.Where(v => v.CheckOnStartup).ToArray();
_scheduledHealthChecks = _healthChecks.Where(v => v.CheckOnSchedule).ToArray(); _scheduledHealthChecks = _healthChecks.Where(v => v.CheckOnSchedule).ToArray();
_eventDrivenHealthChecks = GetEventDrivenHealthChecks(); _eventDrivenHealthChecks = GetEventDrivenHealthChecks();
_startupGracePeriodEndTime = runtimeInfo.StartTime + TimeSpan.FromMinutes(15);
} }
public List<HealthCheck> Results() public List<HealthCheck> Results()
@ -101,7 +108,7 @@ namespace NzbDrone.Core.HealthCheck
{ {
if (_healthCheckResults.Find(result.Source.Name) == null) if (_healthCheckResults.Find(result.Source.Name) == null)
{ {
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result)); _eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, !_hasRunHealthChecksAfterGracePeriod));
} }
_healthCheckResults.Set(result.Source.Name, result); _healthCheckResults.Set(result.Source.Name, result);
@ -135,6 +142,30 @@ namespace NzbDrone.Core.HealthCheck
return; return;
} }
// If we haven't previously re-run health checks after startup grace period run startup checks again and track so they aren't run again.
// Return early after re-running checks to avoid triggering checks multiple times.
if (!_hasRunHealthChecksAfterGracePeriod && !_isRunningHealthChecksAfterGracePeriod && DateTime.UtcNow > _startupGracePeriodEndTime)
{
_isRunningHealthChecksAfterGracePeriod = true;
PerformHealthCheck(_startupHealthChecks);
// Update after running health checks so new failure notifications aren't sent 2x.
_hasRunHealthChecksAfterGracePeriod = true;
// Explicitly notify for any failed checks since existing failed results would not have sent events.
var results = _healthCheckResults.Values.ToList();
foreach (var result in results)
{
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, false));
}
_isRunningHealthChecksAfterGracePeriod = false;
return;
}
IEventDrivenHealthCheck[] checks; IEventDrivenHealthCheck[] checks;
if (!_eventDrivenHealthChecks.TryGetValue(message.GetType(), out checks)) if (!_eventDrivenHealthChecks.TryGetValue(message.GetType(), out checks))
{ {

@ -275,6 +275,13 @@ namespace NzbDrone.Core.Notifications
public void Handle(HealthCheckFailedEvent message) public void Handle(HealthCheckFailedEvent message)
{ {
// Don't send health check notifications during the start up grace period,
// once that duration expires they they'll be retested and fired off if necessary.
if (message.IsInStartupGraceperiod)
{
return;
}
foreach (var notification in _notificationFactory.OnHealthIssueEnabled()) foreach (var notification in _notificationFactory.OnHealthIssueEnabled())
{ {
try try

Loading…
Cancel
Save