validate thresholds whenever SMART data is recieved.

4 years ago · 80f4660130
parent 1fc910f41b
commit 80f4660130
12 changed files with 237 additions and 163 deletions
--- a/webapp/backend/pkg/database/interface.go
+++ b/webapp/backend/pkg/database/interface.go
@ -2,6 +2,7 @@ package database

 import (
 	"context"
+	"github.com/analogj/scrutiny/webapp/backend/pkg"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
@ -16,6 +17,7 @@ type DeviceRepo interface {
 	RegisterDevice(ctx context.Context, dev models.Device) error
 	GetDevices(ctx context.Context) ([]models.Device, error)
 	UpdateDevice(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (models.Device, error)
+	UpdateDeviceStatus(ctx context.Context, wwn string, status pkg.DeviceStatus) (models.Device, error)
 	GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error)

 	SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error)
--- a/webapp/backend/pkg/database/scrutiny_repository.go
+++ b/webapp/backend/pkg/database/scrutiny_repository.go
@ -3,6 +3,7 @@ package database
 import (
 	"context"
 	"fmt"
+	"github.com/analogj/scrutiny/webapp/backend/pkg"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/config"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
@ -163,6 +164,17 @@ func (sr *scrutinyRepository) UpdateDevice(ctx context.Context, wwn string, coll
 	return device, sr.gormClient.Model(&device).Updates(device).Error
 }

+//Update Device Status
+func (sr *scrutinyRepository) UpdateDeviceStatus(ctx context.Context, wwn string, status pkg.DeviceStatus) (models.Device, error) {
+	var device models.Device
+	if err := sr.gormClient.WithContext(ctx).Where("wwn = ?", wwn).First(&device).Error; err != nil {
+		return device, fmt.Errorf("Could not get device from DB", err)
+	}
+
+	device.DeviceStatus = pkg.Set(device.DeviceStatus, status)
+	return device, sr.gormClient.Model(&device).Updates(device).Error
+}
+
 func (sr *scrutinyRepository) GetDeviceDetails(ctx context.Context, wwn string) (models.Device, error) {
 	var device models.Device

@ -434,3 +446,11 @@ func (sr *scrutinyRepository) GetSummary(ctx context.Context) (map[string]*model

 	return summaries, nil
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Process Thresholds
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+func (sr *scrutinyRepository) ProcessSmartAttributeThresholds() {
+
+}
--- a/webapp/backend/pkg/models/measurements/smart.go
+++ b/webapp/backend/pkg/models/measurements/smart.go
@ -3,8 +3,8 @@ package measurements
 import (
 	"fmt"
 	"github.com/analogj/scrutiny/webapp/backend/pkg"
-	"github.com/analogj/scrutiny/webapp/backend/pkg/metadata"
 	"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
+	"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
 	"log"
 	"strings"
 	"time"
@ -22,6 +22,9 @@ type Smart struct {

 	//Attributes (fields)
 	Attributes map[string]SmartAttribute `json:"attrs"`
+
+	//status
+	Status pkg.DeviceStatus
 }

 func (sm *Smart) Flatten() (tags map[string]string, fields map[string]interface{}) {
@ -133,6 +136,7 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er

 //generate SmartAtaAttribute entries from Scrutiny Collector Smart data.
 func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) {
+	sm.Status = pkg.DeviceStatusPassed
 	for _, collectorAttr := range info.AtaSmartAttributes.Table {
 		attrModel := SmartAtaAttribute{
 			AttributeId: collectorAttr.ID,
@ -146,53 +150,72 @@ func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) {
 		}

 		//now that we've parsed the data from the smartctl response, lets match it against our metadata rules and add additional Scrutiny specific data.
-		if smartMetadata, ok := metadata.AtaMetadata[collectorAttr.ID]; ok {
+		if smartMetadata, ok := thresholds.AtaMetadata[collectorAttr.ID]; ok {
 			attrModel.Name = smartMetadata.DisplayName
 			if smartMetadata.Transform != nil {
 				attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString)
 			}
 		}
+		attrModel.PopulateAttributeStatus()
 		sm.Attributes[string(collectorAttr.ID)] = &attrModel
+		if attrModel.Status == pkg.SmartAttributeStatusFailed {
+			sm.Status = pkg.DeviceStatusFailedScrutiny
+		}
 	}
 }

 //generate SmartNvmeAttribute entries from Scrutiny Collector Smart data.
 func (sm *Smart) ProcessNvmeSmartInfo(info collector.SmartInfo) {
+
 	sm.Attributes = map[string]SmartAttribute{
-		"critical_warning":     &SmartNvmeAttribute{AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning, Threshold: 0},
-		"temperature":          &SmartNvmeAttribute{AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature, Threshold: -1},
-		"available_spare":      &SmartNvmeAttribute{AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold},
-		"percentage_used":      &SmartNvmeAttribute{AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed, Threshold: 100},
-		"data_units_read":      &SmartNvmeAttribute{AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead, Threshold: -1},
-		"data_units_written":   &SmartNvmeAttribute{AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten, Threshold: -1},
-		"host_reads":           &SmartNvmeAttribute{AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads, Threshold: -1},
-		"host_writes":          &SmartNvmeAttribute{AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites, Threshold: -1},
-		"controller_busy_time": &SmartNvmeAttribute{AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime, Threshold: -1},
-		"power_cycles":         &SmartNvmeAttribute{AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles, Threshold: -1},
-		"power_on_hours":       &SmartNvmeAttribute{AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours, Threshold: -1},
-		"unsafe_shutdowns":     &SmartNvmeAttribute{AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns, Threshold: -1},
-		"media_errors":         &SmartNvmeAttribute{AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors, Threshold: 0},
-		"num_err_log_entries":  &SmartNvmeAttribute{AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries, Threshold: 0},
-		"warning_temp_time":    &SmartNvmeAttribute{AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime, Threshold: -1},
-		"critical_comp_time":   &SmartNvmeAttribute{AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime, Threshold: -1},
+		"critical_warning":     (&SmartNvmeAttribute{AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning, Threshold: 0}).PopulateAttributeStatus(),
+		"temperature":          (&SmartNvmeAttribute{AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature, Threshold: -1}).PopulateAttributeStatus(),
+		"available_spare":      (&SmartNvmeAttribute{AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold}).PopulateAttributeStatus(),
+		"percentage_used":      (&SmartNvmeAttribute{AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed, Threshold: 100}).PopulateAttributeStatus(),
+		"data_units_read":      (&SmartNvmeAttribute{AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead, Threshold: -1}).PopulateAttributeStatus(),
+		"data_units_written":   (&SmartNvmeAttribute{AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten, Threshold: -1}).PopulateAttributeStatus(),
+		"host_reads":           (&SmartNvmeAttribute{AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads, Threshold: -1}).PopulateAttributeStatus(),
+		"host_writes":          (&SmartNvmeAttribute{AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites, Threshold: -1}).PopulateAttributeStatus(),
+		"controller_busy_time": (&SmartNvmeAttribute{AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime, Threshold: -1}).PopulateAttributeStatus(),
+		"power_cycles":         (&SmartNvmeAttribute{AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles, Threshold: -1}).PopulateAttributeStatus(),
+		"power_on_hours":       (&SmartNvmeAttribute{AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours, Threshold: -1}).PopulateAttributeStatus(),
+		"unsafe_shutdowns":     (&SmartNvmeAttribute{AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns, Threshold: -1}).PopulateAttributeStatus(),
+		"media_errors":         (&SmartNvmeAttribute{AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors, Threshold: 0}).PopulateAttributeStatus(),
+		"num_err_log_entries":  (&SmartNvmeAttribute{AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries, Threshold: 0}).PopulateAttributeStatus(),
+		"warning_temp_time":    (&SmartNvmeAttribute{AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime, Threshold: -1}).PopulateAttributeStatus(),
+		"critical_comp_time":   (&SmartNvmeAttribute{AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime, Threshold: -1}).PopulateAttributeStatus(),
+	}
+
+	//find analyzed attribute status
+	for _, val := range sm.Attributes {
+		if val.GetStatus() == pkg.SmartAttributeStatusFailed {
+			sm.Status = pkg.DeviceStatusFailedScrutiny
+		}
 	}
 }

 //generate SmartScsiAttribute entries from Scrutiny Collector Smart data.
 func (sm *Smart) ProcessScsiSmartInfo(info collector.SmartInfo) {
 	sm.Attributes = map[string]SmartAttribute{
-		"scsi_grown_defect_list":                     &SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList, Threshold: 0},
-		"read_errors_corrected_by_eccfast":           &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast, Threshold: -1},
-		"read_errors_corrected_by_eccdelayed":        &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed, Threshold: -1},
-		"read_errors_corrected_by_rereads_rewrites":  &SmartScsiAttribute{AttributeId: "read_errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites, Threshold: 0},
-		"read_total_errors_corrected":                &SmartScsiAttribute{AttributeId: "read_total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected, Threshold: -1},
-		"read_correction_algorithm_invocations":      &SmartScsiAttribute{AttributeId: "read_correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations, Threshold: -1},
-		"read_total_uncorrected_errors":              &SmartScsiAttribute{AttributeId: "read_total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors, Threshold: 0},
-		"write_errors_corrected_by_eccfast":          &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast, Threshold: -1},
-		"write_errors_corrected_by_eccdelayed":       &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed, Threshold: -1},
-		"write_errors_corrected_by_rereads_rewrites": &SmartScsiAttribute{AttributeId: "write_errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites, Threshold: 0},
-		"write_total_errors_corrected":               &SmartScsiAttribute{AttributeId: "write_total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected, Threshold: -1},
-		"write_correction_algorithm_invocations":     &SmartScsiAttribute{AttributeId: "write_correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations, Threshold: -1},
-		"write_total_uncorrected_errors":             &SmartScsiAttribute{AttributeId: "write_total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors, Threshold: 0},
+		"scsi_grown_defect_list":                     (&SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList, Threshold: 0}).PopulateAttributeStatus(),
+		"read_errors_corrected_by_eccfast":           (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
+		"read_errors_corrected_by_eccdelayed":        (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
+		"read_errors_corrected_by_rereads_rewrites":  (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
+		"read_total_errors_corrected":                (&SmartScsiAttribute{AttributeId: "read_total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
+		"read_correction_algorithm_invocations":      (&SmartScsiAttribute{AttributeId: "read_correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
+		"read_total_uncorrected_errors":              (&SmartScsiAttribute{AttributeId: "read_total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
+		"write_errors_corrected_by_eccfast":          (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
+		"write_errors_corrected_by_eccdelayed":       (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
+		"write_errors_corrected_by_rereads_rewrites": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
+		"write_total_errors_corrected":               (&SmartScsiAttribute{AttributeId: "write_total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
+		"write_correction_algorithm_invocations":     (&SmartScsiAttribute{AttributeId: "write_correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
+		"write_total_uncorrected_errors":             (&SmartScsiAttribute{AttributeId: "write_total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
+	}
+
+	//find analyzed attribute status
+	for _, val := range sm.Attributes {
+		if val.GetStatus() == pkg.SmartAttributeStatusFailed {
+			sm.Status = pkg.DeviceStatusFailedScrutiny
+		}
 	}
 }
--- a/webapp/backend/pkg/models/measurements/smart_ata_attribute.go
+++ b/webapp/backend/pkg/models/measurements/smart_ata_attribute.go
@ -2,14 +2,12 @@ package measurements

 import (
 	"fmt"
+	"github.com/analogj/scrutiny/webapp/backend/pkg"
+	"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
 	"strconv"
 	"strings"
 )

-const SmartAttributeStatusPassed = "passed"
-const SmartAttributeStatusFailed = "failed"
-const SmartAttributeStatusWarning = "warn"
-
 type SmartAtaAttribute struct {
 	AttributeId int    `json:"attribute_id"`
 	Name        string `json:"name"`
@ -27,6 +25,10 @@ type SmartAtaAttribute struct {
 	FailureRate      float64 `json:"failure_rate,omitempty"`
 }

+func (sa *SmartAtaAttribute) GetStatus() string {
+	return sa.Status
+}
+
 func (sa *SmartAtaAttribute) Flatten() map[string]interface{} {

 	idString := strconv.Itoa(sa.AttributeId)
@ -71,81 +73,82 @@ func (sa *SmartAtaAttribute) Inflate(key string, val interface{}) {
 	}
 }

-//
-////populate attribute status, using SMART Thresholds & Observed Metadata
-//func (sa *SmartAtaAttribute) PopulateAttributeStatus() {
-//	if strings.ToUpper(sa.WhenFailed) == SmartWhenFailedFailingNow {
-//		//this attribute has previously failed
-//		sa.Status = SmartAttributeStatusFailed
-//		sa.StatusReason = "Attribute is failing manufacturer SMART threshold"
-//
-//	} else if strings.ToUpper(sa.WhenFailed) == SmartWhenFailedInThePast {
-//		sa.Status = SmartAttributeStatusWarning
-//		sa.StatusReason = "Attribute has previously failed manufacturer SMART threshold"
-//	}
-//
-//	if smartMetadata, ok := metadata.AtaMetadata[sa.AttributeId]; ok {
-//		sa.MetadataObservedThresholdStatus(smartMetadata)
-//	}
-//
-//	//check if status is blank, set to "passed"
-//	if len(sa.Status) == 0 {
-//		sa.Status = SmartAttributeStatusPassed
-//	}
-//}
-//
-//// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
-//func (sa *SmartAtaAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaAttributeMetadata) {
-//	//TODO: multiple rules
-//	// try to predict the failure rates for observed thresholds that have 0 failure rate and error bars.
-//	// - if the attribute is critical
-//	//		- the failure rate is over 10 - set to failed
-//	//		- the attribute does not match any threshold, set to warn
-//	// - if the attribute is not critical
-//	//		- if failure rate is above 20 - set to failed
-//	// 		- if failure rate is above 10 but below 20 - set to warn
-//
-//	//update the smart attribute status based on Observed thresholds.
-//	var value int64
-//	if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized {
-//		value = int64(sa.Value)
-//	} else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed {
-//		value = sa.TransformedValue
-//	} else {
-//		value = sa.RawValue
-//	}
-//
-//	for _, obsThresh := range smartMetadata.ObservedThresholds {
-//
-//		//check if "value" is in this bucket
-//		if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) ||
-//			(obsThresh.Low < value && value <= obsThresh.High) {
-//			sa.FailureRate = obsThresh.AnnualFailureRate
-//
-//			if smartMetadata.Critical {
-//				if obsThresh.AnnualFailureRate >= 0.10 {
-//					sa.Status = SmartAttributeStatusFailed
-//					sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%"
-//				}
-//			} else {
-//				if obsThresh.AnnualFailureRate >= 0.20 {
-//					sa.Status = SmartAttributeStatusFailed
-//					sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%"
-//				} else if obsThresh.AnnualFailureRate >= 0.10 {
-//					sa.Status = SmartAttributeStatusWarning
-//					sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%"
-//				}
-//			}
-//
-//			//we've found the correct bucket, we can drop out of this loop
-//			return
-//		}
-//	}
-//	// no bucket found
-//	if smartMetadata.Critical {
-//		sa.Status = SmartAttributeStatusWarning
-//		sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute"
-//	}
-//
-//	return
-//}
+//populate attribute status, using SMART Thresholds & Observed Metadata
+// Chainable
+func (sa *SmartAtaAttribute) PopulateAttributeStatus() *SmartAtaAttribute {
+	if strings.ToUpper(sa.WhenFailed) == pkg.SmartWhenFailedFailingNow {
+		//this attribute has previously failed
+		sa.Status = pkg.SmartAttributeStatusFailed
+		sa.StatusReason = "Attribute is failing manufacturer SMART threshold"
+
+	} else if strings.ToUpper(sa.WhenFailed) == pkg.SmartWhenFailedInThePast {
+		sa.Status = pkg.SmartAttributeStatusWarning
+		sa.StatusReason = "Attribute has previously failed manufacturer SMART threshold"
+	}
+
+	if smartMetadata, ok := thresholds.AtaMetadata[sa.AttributeId]; ok {
+		sa.ValidateThreshold(smartMetadata)
+	}
+
+	//check if status is blank, set to "passed"
+	if len(sa.Status) == 0 {
+		sa.Status = pkg.SmartAttributeStatusPassed
+	}
+	return sa
+}
+
+// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
+func (sa *SmartAtaAttribute) ValidateThreshold(smartMetadata thresholds.AtaAttributeMetadata) {
+	//TODO: multiple rules
+	// try to predict the failure rates for observed thresholds that have 0 failure rate and error bars.
+	// - if the attribute is critical
+	//		- the failure rate is over 10 - set to failed
+	//		- the attribute does not match any threshold, set to warn
+	// - if the attribute is not critical
+	//		- if failure rate is above 20 - set to failed
+	// 		- if failure rate is above 10 but below 20 - set to warn
+
+	//update the smart attribute status based on Observed thresholds.
+	var value int64
+	if smartMetadata.DisplayType == thresholds.AtaSmartAttributeDisplayTypeNormalized {
+		value = int64(sa.Value)
+	} else if smartMetadata.DisplayType == thresholds.AtaSmartAttributeDisplayTypeTransformed {
+		value = sa.TransformedValue
+	} else {
+		value = sa.RawValue
+	}
+
+	for _, obsThresh := range smartMetadata.ObservedThresholds {
+
+		//check if "value" is in this bucket
+		if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) ||
+			(obsThresh.Low < value && value <= obsThresh.High) {
+			sa.FailureRate = obsThresh.AnnualFailureRate
+
+			if smartMetadata.Critical {
+				if obsThresh.AnnualFailureRate >= 0.10 {
+					sa.Status = pkg.SmartAttributeStatusFailed
+					sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%"
+				}
+			} else {
+				if obsThresh.AnnualFailureRate >= 0.20 {
+					sa.Status = pkg.SmartAttributeStatusFailed
+					sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%"
+				} else if obsThresh.AnnualFailureRate >= 0.10 {
+					sa.Status = pkg.SmartAttributeStatusWarning
+					sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%"
+				}
+			}
+
+			//we've found the correct bucket, we can drop out of this loop
+			return
+		}
+	}
+	// no bucket found
+	if smartMetadata.Critical {
+		sa.Status = pkg.SmartAttributeStatusWarning
+		sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute"
+	}
+
+	return
+}
--- a/webapp/backend/pkg/models/measurements/smart_attribute.go
+++ b/webapp/backend/pkg/models/measurements/smart_attribute.go
@ -3,4 +3,5 @@ package measurements
 type SmartAttribute interface {
 	Flatten() (fields map[string]interface{})
 	Inflate(key string, val interface{})
+	GetStatus() string
 }
--- a/webapp/backend/pkg/models/measurements/smart_nvme_attribute.go
+++ b/webapp/backend/pkg/models/measurements/smart_nvme_attribute.go
@ -2,6 +2,8 @@ package measurements

 import (
 	"fmt"
+	"github.com/analogj/scrutiny/webapp/backend/pkg"
+	"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
 	"strings"
 )

@ -17,6 +19,10 @@ type SmartNvmeAttribute struct {
 	FailureRate      float64 `json:"failure_rate,omitempty"`
 }

+func (sa *SmartNvmeAttribute) GetStatus() string {
+	return sa.Status
+}
+
 func (sa *SmartNvmeAttribute) Flatten() map[string]interface{} {
 	return map[string]interface{}{
 		fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId,
@ -44,25 +50,26 @@ func (sa *SmartNvmeAttribute) Inflate(key string, val interface{}) {
 	}
 }

-//
-////populate attribute status, using SMART Thresholds & Observed Metadata
-//func (sa *SmartNvmeAttribute) PopulateAttributeStatus() {
-//
-//	//-1 is a special number meaning no threshold.
-//	if sa.Threshold != -1 {
-//		if smartMetadata, ok := metadata.NmveMetadata[sa.AttributeId]; ok {
-//			//check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
-//			if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
-//				(smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
-//				sa.Status = SmartAttributeStatusFailed
-//				sa.StatusReason = "Attribute is failing recommended SMART threshold"
-//			}
-//		}
-//	}
-//	//TODO: eventually figure out the critical_warning bits and determine correct error messages here.
-//
-//	//check if status is blank, set to "passed"
-//	if len(sa.Status) == 0 {
-//		sa.Status = SmartAttributeStatusPassed
-//	}
-//}
+//populate attribute status, using SMART Thresholds & Observed Metadata
+// Chainable
+func (sa *SmartNvmeAttribute) PopulateAttributeStatus() *SmartNvmeAttribute {
+
+	//-1 is a special number meaning no threshold.
+	if sa.Threshold != -1 {
+		if smartMetadata, ok := thresholds.NmveMetadata[sa.AttributeId]; ok {
+			//check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
+			if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
+				(smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
+				sa.Status = pkg.SmartAttributeStatusFailed
+				sa.StatusReason = "Attribute is failing recommended SMART threshold"
+			}
+		}
+	}
+	//TODO: eventually figure out the critical_warning bits and determine correct error messages here.
+
+	//check if status is blank, set to "passed"
+	if len(sa.Status) == 0 {
+		sa.Status = pkg.SmartAttributeStatusPassed
+	}
+	return sa
+}
--- a/webapp/backend/pkg/models/measurements/smart_scsci_attribute.go
+++ b/webapp/backend/pkg/models/measurements/smart_scsci_attribute.go
@ -2,6 +2,8 @@ package measurements

 import (
 	"fmt"
+	"github.com/analogj/scrutiny/webapp/backend/pkg"
+	"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
 	"strings"
 )

@ -17,6 +19,10 @@ type SmartScsiAttribute struct {
 	FailureRate      float64 `json:"failure_rate,omitempty"`
 }

+func (sa *SmartScsiAttribute) GetStatus() string {
+	return sa.Status
+}
+
 func (sa *SmartScsiAttribute) Flatten() map[string]interface{} {
 	return map[string]interface{}{
 		fmt.Sprintf("attr.%s.attribute_id", sa.AttributeId): sa.AttributeId,
@ -45,23 +51,25 @@ func (sa *SmartScsiAttribute) Inflate(key string, val interface{}) {
 }

 //
-////populate attribute status, using SMART Thresholds & Observed Metadata
-//func (sa *SmartScsiAttribute) PopulateAttributeStatus() {
-//
-//	//-1 is a special number meaning no threshold.
-//	if sa.Threshold != -1 {
-//		if smartMetadata, ok := metadata.NmveMetadata[sa.AttributeId]; ok {
-//			//check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
-//			if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
-//				(smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
-//				sa.Status = SmartAttributeStatusFailed
-//				sa.StatusReason = "Attribute is failing recommended SMART threshold"
-//			}
-//		}
-//	}
-//
-//	//check if status is blank, set to "passed"
-//	if len(sa.Status) == 0 {
-//		sa.Status = SmartAttributeStatusPassed
-//	}
-//}
+//populate attribute status, using SMART Thresholds & Observed Metadata
+//Chainable
+func (sa *SmartScsiAttribute) PopulateAttributeStatus() *SmartScsiAttribute {
+
+	//-1 is a special number meaning no threshold.
+	if sa.Threshold != -1 {
+		if smartMetadata, ok := thresholds.NmveMetadata[sa.AttributeId]; ok {
+			//check what the ideal is. Ideal tells us if we our recorded value needs to be above, or below the threshold
+			if (smartMetadata.Ideal == "low" && sa.Value > sa.Threshold) ||
+				(smartMetadata.Ideal == "high" && sa.Value < sa.Threshold) {
+				sa.Status = pkg.SmartAttributeStatusFailed
+				sa.StatusReason = "Attribute is failing recommended SMART threshold"
+			}
+		}
+	}
+
+	//check if status is blank, set to "passed"
+	if len(sa.Status) == 0 {
+		sa.Status = pkg.SmartAttributeStatusPassed
+	}
+	return sa
+}
--- a/webapp/backend/pkg/thresholds/ata_attribute_metadata.go
+++ b/webapp/backend/pkg/thresholds/ata_attribute_metadata.go
@ -1,4 +1,4 @@
-package metadata
+package thresholds

 const AtaSmartAttributeDisplayTypeRaw = "raw"
 const AtaSmartAttributeDisplayTypeNormalized = "normalized"
--- a/webapp/backend/pkg/thresholds/nvme_attribute_metadata.go
+++ b/webapp/backend/pkg/thresholds/nvme_attribute_metadata.go
@ -1,4 +1,4 @@
-package metadata
+package thresholds

 // https://media.kingston.com/support/downloads/MKP_521.6_SMART-DCP1000_attribute.pdf
 // https://www.percona.com/blog/2017/02/09/using-nvme-command-line-tools-to-check-nvme-flash-health/
--- a/webapp/backend/pkg/thresholds/scsi_attribute_metadata.go
+++ b/webapp/backend/pkg/thresholds/scsi_attribute_metadata.go
@ -1,4 +1,4 @@
-package metadata
+package thresholds

 type ScsiAttributeMetadata struct {
 	ID          string `json:"-"`
--- a/webapp/backend/pkg/web/handler/get_device_details.go
+++ b/webapp/backend/pkg/web/handler/get_device_details.go
@ -2,7 +2,7 @@ package handler

 import (
 	"github.com/analogj/scrutiny/webapp/backend/pkg/database"
-	"github.com/analogj/scrutiny/webapp/backend/pkg/metadata"
+	"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
 	"github.com/gin-gonic/gin"
 	"github.com/sirupsen/logrus"
 	"net/http"
@ -23,11 +23,11 @@ func GetDeviceDetails(c *gin.Context) {

 	var deviceMetadata interface{}
 	if device.IsAta() {
-		deviceMetadata = metadata.AtaMetadata
+		deviceMetadata = thresholds.AtaMetadata
 	} else if device.IsNvme() {
-		deviceMetadata = metadata.NmveMetadata
+		deviceMetadata = thresholds.NmveMetadata
 	} else if device.IsScsi() {
-		deviceMetadata = metadata.ScsiMetadata
+		deviceMetadata = thresholds.ScsiMetadata
 	}

 	c.JSON(http.StatusOK, gin.H{"success": true, "data": map[string]interface{}{"device": device, "smart_results": smartResults}, "metadata": deviceMetadata})
--- a/webapp/backend/pkg/web/handler/upload_device_metrics.go
+++ b/webapp/backend/pkg/web/handler/upload_device_metrics.go
@ -37,13 +37,23 @@ func UploadDeviceMetrics(c *gin.Context) {
 	}

 	// insert smart info
-	_, err = deviceRepo.SaveSmartAttributes(c, c.Param("wwn"), collectorSmartData)
+	smartData, err := deviceRepo.SaveSmartAttributes(c, c.Param("wwn"), collectorSmartData)
 	if err != nil {
 		logger.Errorln("An error occurred while saving smartctl metrics", err)
 		c.JSON(http.StatusInternalServerError, gin.H{"success": false})
 		return
 	}

+	if smartData.Status != pkg.DeviceStatusPassed {
+		//there is a failure detected by Scrutiny, update the device status on the homepage.
+		updatedDevice, err = deviceRepo.UpdateDeviceStatus(c, c.Param("wwn"), smartData.Status)
+		if err != nil {
+			logger.Errorln("An error occurred while updating device status", err)
+			c.JSON(http.StatusInternalServerError, gin.H{"success": false})
+			return
+		}
+	}
+
 	// save smart temperature data (ignore failures)
 	err = deviceRepo.SaveSmartTemperature(c, c.Param("wwn"), updatedDevice.DeviceProtocol, collectorSmartData)
 	if err != nil {