From 5a80ae3e743e914968a4e10deec5db0804dd5d16 Mon Sep 17 00:00:00 2001 From: Jason Kulatunga Date: Mon, 24 Aug 2020 23:01:03 -0700 Subject: [PATCH] WIP - start supporting NVME and SCSI drive smart data. Added addiitonal log message data for Smartctl exit ccode. --- collector/pkg/collector/base.go | 20 + collector/pkg/collector/metrics.go | 1 + webapp/backend/pkg/database/sqlite3.go | 4 +- webapp/backend/pkg/models/collector/smart.go | 116 ++- webapp/backend/pkg/models/db/device.go | 113 ++- webapp/backend/pkg/models/db/smart.go | 146 ++-- .../pkg/models/db/smart_ata_attribute.go | 88 +++ .../pkg/models/db/smart_nvme_attribute.go | 21 + .../pkg/models/db/smart_scsci_attribute.go | 21 + webapp/backend/pkg/models/db/smart_test.go | 119 ++- .../pkg/models/testdata/smart-megaraid0.json | 697 +++++++++++++++++ .../pkg/models/testdata/smart-megaraid1.json | 709 ++++++++++++++++++ .../pkg/models/testdata/smart-scsi2.json | 88 +++ .../pkg/web/handler/get_device_details.go | 4 +- 14 files changed, 2022 insertions(+), 125 deletions(-) create mode 100644 webapp/backend/pkg/models/db/smart_ata_attribute.go create mode 100644 webapp/backend/pkg/models/db/smart_nvme_attribute.go create mode 100644 webapp/backend/pkg/models/db/smart_scsci_attribute.go create mode 100644 webapp/backend/pkg/models/testdata/smart-megaraid0.json create mode 100644 webapp/backend/pkg/models/testdata/smart-megaraid1.json create mode 100644 webapp/backend/pkg/models/testdata/smart-scsi2.json diff --git a/collector/pkg/collector/base.go b/collector/pkg/collector/base.go index cd379e0..d0768a9 100644 --- a/collector/pkg/collector/base.go +++ b/collector/pkg/collector/base.go @@ -135,3 +135,23 @@ func (c *BaseCollector) ExecCmd(cmdName string, cmdArgs []string, workingDir str return stdBuffer.String(), err } + +func (c *BaseCollector) LogSmartctlExitCode(exitCode int) { + if exitCode&0x01 != 0 { + c.logger.Errorln("smartctl could not parse commandline") + } else if exitCode&0x02 != 0 { + c.logger.Errorln("smartctl could not open device") + } else if exitCode&0x04 != 0 { + c.logger.Errorln("smartctl detected a checksum error") + } else if exitCode&0x08 != 0 { + c.logger.Errorln("smartctl detected a failing disk ") + } else if exitCode&0x10 != 0 { + c.logger.Errorln("smartctl detected a disk in pre-fail") + } else if exitCode&0x20 != 0 { + c.logger.Errorln("smartctl detected a disk close to failure") + } else if exitCode&0x40 != 0 { + c.logger.Errorln("smartctl detected a error log with errors") + } else if exitCode&0x80 != 0 { + c.logger.Errorln("smartctl detected a self test log with errors") + } +} diff --git a/collector/pkg/collector/metrics.go b/collector/pkg/collector/metrics.go index 7bd812b..b4927be 100644 --- a/collector/pkg/collector/metrics.go +++ b/collector/pkg/collector/metrics.go @@ -98,6 +98,7 @@ func (mc *MetricsCollector) Collect(wg *sync.WaitGroup, deviceWWN string, device if exitError, ok := err.(*exec.ExitError); ok { // smartctl command exited with an error, we should still push the data to the API server mc.logger.Errorf("smartctl returned an error code (%d) while processing %s\n", exitError.ExitCode(), deviceName) + mc.LogSmartctlExitCode(exitError.ExitCode()) mc.Publish(deviceWWN, resultBytes) } else { mc.logger.Errorf("error while attempting to execute smartctl: %s\n", deviceName) diff --git a/webapp/backend/pkg/database/sqlite3.go b/webapp/backend/pkg/database/sqlite3.go index c22dc38..3f4fc45 100644 --- a/webapp/backend/pkg/database/sqlite3.go +++ b/webapp/backend/pkg/database/sqlite3.go @@ -20,7 +20,9 @@ func DatabaseHandler(dbPath string) gin.HandlerFunc { database.AutoMigrate(&db.Device{}) database.AutoMigrate(&db.SelfTest{}) database.AutoMigrate(&db.Smart{}) - database.AutoMigrate(&db.SmartAttribute{}) + database.AutoMigrate(&db.SmartAtaAttribute{}) + database.AutoMigrate(&db.SmartNvmeAttribute{}) + database.AutoMigrate(&db.SmartScsiAttribute{}) //TODO: detrmine where we can call defer database.Close() return func(c *gin.Context) { diff --git a/webapp/backend/pkg/models/collector/smart.go b/webapp/backend/pkg/models/collector/smart.go index 5dd1883..c17b0a4 100644 --- a/webapp/backend/pkg/models/collector/smart.go +++ b/webapp/backend/pkg/models/collector/smart.go @@ -70,6 +70,16 @@ type SmartInfo struct { SmartStatus struct { Passed bool `json:"passed"` } `json:"smart_status"` + + PowerOnTime struct { + Hours int64 `json:"hours"` + } `json:"power_on_time"` + PowerCycleCount int64 `json:"power_cycle_count"` + Temperature struct { + Current int64 `json:"current"` + } `json:"temperature"` + + // ATA Protocol Specific Fields AtaSmartData struct { OfflineDataCollection struct { Status struct { @@ -134,17 +144,35 @@ type SmartInfo struct { } `json:"raw"` } `json:"table"` } `json:"ata_smart_attributes"` - PowerOnTime struct { - Hours int64 `json:"hours"` - } `json:"power_on_time"` - PowerCycleCount int64 `json:"power_cycle_count"` - Temperature struct { - Current int64 `json:"current"` - } `json:"temperature"` AtaSmartErrorLog struct { Summary struct { - Revision int `json:"revision"` - Count int `json:"count"` + Revision int `json:"revision"` + Count int `json:"count"` + LoggedCount int `json:"logged_count"` + Table []struct { + ErrorNumber int `json:"error_number"` + LifetimeHours int `json:"lifetime_hours"` + CompletionRegisters struct { + Error int `json:"error"` + Status int `json:"status"` + Count int `json:"count"` + Lba int `json:"lba"` + Device int `json:"device"` + } `json:"completion_registers"` + ErrorDescription string `json:"error_description"` + PreviousCommands []struct { + Registers struct { + Command int `json:"command"` + Features int `json:"features"` + Count int `json:"count"` + Lba int `json:"lba"` + Device int `json:"device"` + DeviceControl int `json:"device_control"` + } `json:"registers"` + PowerupMilliseconds int `json:"powerup_milliseconds"` + CommandName string `json:"command_name"` + } `json:"previous_commands"` + } `json:"table"` } `json:"summary"` } `json:"ata_smart_error_log"` AtaSmartSelfTestLog struct { @@ -183,4 +211,74 @@ type SmartInfo struct { } `json:"flags"` PowerUpScanResumeMinutes int `json:"power_up_scan_resume_minutes"` } `json:"ata_smart_selective_self_test_log"` + + // NVME Protocol Specific Fields + NvmePciVendor struct { + ID int `json:"id"` + SubsystemID int `json:"subsystem_id"` + } `json:"nvme_pci_vendor"` + NvmeIeeeOuiIdentifier int `json:"nvme_ieee_oui_identifier"` + NvmeControllerID int `json:"nvme_controller_id"` + NvmeNumberOfNamespaces int `json:"nvme_number_of_namespaces"` + NvmeNamespaces []struct { + ID int `json:"id"` + Size struct { + Blocks int `json:"blocks"` + Bytes int64 `json:"bytes"` + } `json:"size"` + Capacity struct { + Blocks int `json:"blocks"` + Bytes int64 `json:"bytes"` + } `json:"capacity"` + Utilization struct { + Blocks int `json:"blocks"` + Bytes int64 `json:"bytes"` + } `json:"utilization"` + FormattedLbaSize int `json:"formatted_lba_size"` + } `json:"nvme_namespaces"` + NvmeSmartHealthInformationLog struct { + CriticalWarning int `json:"critical_warning"` + Temperature int `json:"temperature"` + AvailableSpare int `json:"available_spare"` + AvailableSpareThreshold int `json:"available_spare_threshold"` + PercentageUsed int `json:"percentage_used"` + DataUnitsRead int `json:"data_units_read"` + DataUnitsWritten int `json:"data_units_written"` + HostReads int `json:"host_reads"` + HostWrites int `json:"host_writes"` + ControllerBusyTime int `json:"controller_busy_time"` + PowerCycles int `json:"power_cycles"` + PowerOnHours int `json:"power_on_hours"` + UnsafeShutdowns int `json:"unsafe_shutdowns"` + MediaErrors int `json:"media_errors"` + NumErrLogEntries int `json:"num_err_log_entries"` + WarningTempTime int `json:"warning_temp_time"` + CriticalCompTime int `json:"critical_comp_time"` + } `json:"nvme_smart_health_information_log"` + + // SCSI Protocol Specific Fields + Vendor string `json:"vendor"` + Product string `json:"product"` + ScsiVersion string `json:"scsi_version"` + ScsiGrownDefectList int `json:"scsi_grown_defect_list"` + ScsiErrorCounterLog struct { + Read struct { + ErrorsCorrectedByEccfast int `json:"errors_corrected_by_eccfast"` + ErrorsCorrectedByEccdelayed int `json:"errors_corrected_by_eccdelayed"` + ErrorsCorrectedByRereadsRewrites int `json:"errors_corrected_by_rereads_rewrites"` + TotalErrorsCorrected int `json:"total_errors_corrected"` + CorrectionAlgorithmInvocations int `json:"correction_algorithm_invocations"` + GigabytesProcessed string `json:"gigabytes_processed"` + TotalUncorrectedErrors int `json:"total_uncorrected_errors"` + } `json:"read"` + Write struct { + ErrorsCorrectedByEccfast int `json:"errors_corrected_by_eccfast"` + ErrorsCorrectedByEccdelayed int `json:"errors_corrected_by_eccdelayed"` + ErrorsCorrectedByRereadsRewrites int `json:"errors_corrected_by_rereads_rewrites"` + TotalErrorsCorrected int `json:"total_errors_corrected"` + CorrectionAlgorithmInvocations int `json:"correction_algorithm_invocations"` + GigabytesProcessed string `json:"gigabytes_processed"` + TotalUncorrectedErrors int `json:"total_uncorrected_errors"` + } `json:"write"` + } `json:"scsi_error_counter_log"` } diff --git a/webapp/backend/pkg/models/db/device.go b/webapp/backend/pkg/models/db/device.go index 42f3a69..de03401 100644 --- a/webapp/backend/pkg/models/db/device.go +++ b/webapp/backend/pkg/models/db/device.go @@ -13,6 +13,10 @@ type DeviceWrapper struct { Data []Device `json:"data"` } +const DeviceProtocolAta = "ATA" +const DeviceProtocolScsi = "SCSI" +const DeviceProtocolNvme = "NVMe" + type Device struct { //GORM attributes, see: http://gorm.io/docs/conventions.html CreatedAt time.Time @@ -32,17 +36,30 @@ type Device struct { Capacity int64 `json:"capacity"` FormFactor string `json:"form_factor"` SmartSupport bool `json:"smart_support"` + DeviceProtocol string `json:"device_protocol"` SmartResults []Smart `gorm:"foreignkey:DeviceWWN" json:"smart_results"` } +func (dv *Device) IsAta() bool { + return dv.DeviceProtocol == DeviceProtocolAta +} + +func (dv *Device) IsScsi() bool { + return dv.DeviceProtocol == DeviceProtocolScsi +} + +func (dv *Device) IsNvme() bool { + return dv.DeviceProtocol == DeviceProtocolNvme +} + //This method requires a device with an array of SmartResults. //It will remove all SmartResults other than the first (the latest one) -//All removed SmartResults, will be processed, grouping SmartAttribute by attribute_id +//All removed SmartResults, will be processed, grouping SmartAtaAttribute by attribute_id // and adding theme to an array called History. func (dv *Device) SquashHistory() error { if len(dv.SmartResults) <= 1 { - return nil //no history found. ignore + return nil //no ataHistory found. ignore } latestSmartResultSlice := dv.SmartResults[0:1] @@ -51,48 +68,94 @@ func (dv *Device) SquashHistory() error { //re-assign the latest slice to the SmartResults field dv.SmartResults = latestSmartResultSlice - //process the historical slice - history := map[int][]SmartAttribute{} - for _, smartResult := range historicalSmartResultSlice { - for _, smartAttribute := range smartResult.SmartAttributes { - if _, ok := history[smartAttribute.AttributeId]; !ok { - history[smartAttribute.AttributeId] = []SmartAttribute{} + //process the historical slice for ATA data + if len(dv.SmartResults[0].AtaAttributes) > 0 { + ataHistory := map[int][]SmartAtaAttribute{} + for _, smartResult := range historicalSmartResultSlice { + for _, smartAttribute := range smartResult.AtaAttributes { + if _, ok := ataHistory[smartAttribute.AttributeId]; !ok { + ataHistory[smartAttribute.AttributeId] = []SmartAtaAttribute{} + } + ataHistory[smartAttribute.AttributeId] = append(ataHistory[smartAttribute.AttributeId], smartAttribute) + } + } + + //now assign the historical slices to the AtaAttributes in the latest SmartResults + for sandx, smartAttribute := range dv.SmartResults[0].AtaAttributes { + if attributeHistory, ok := ataHistory[smartAttribute.AttributeId]; ok { + dv.SmartResults[0].AtaAttributes[sandx].History = attributeHistory } - history[smartAttribute.AttributeId] = append(history[smartAttribute.AttributeId], smartAttribute) } } - //now assign the historical slices to the SmartAttributes in the latest SmartResults - for sandx, smartAttribute := range dv.SmartResults[0].SmartAttributes { - if attributeHistory, ok := history[smartAttribute.AttributeId]; ok { - dv.SmartResults[0].SmartAttributes[sandx].History = attributeHistory + //process the historical slice for Nvme data + if len(dv.SmartResults[0].NvmeAttributes) > 0 { + nvmeHistory := map[string][]SmartNvmeAttribute{} + for _, smartResult := range historicalSmartResultSlice { + for _, smartAttribute := range smartResult.NvmeAttributes { + if _, ok := nvmeHistory[smartAttribute.AttributeId]; !ok { + nvmeHistory[smartAttribute.AttributeId] = []SmartNvmeAttribute{} + } + nvmeHistory[smartAttribute.AttributeId] = append(nvmeHistory[smartAttribute.AttributeId], smartAttribute) + } + } + + //now assign the historical slices to the AtaAttributes in the latest SmartResults + for sandx, smartAttribute := range dv.SmartResults[0].NvmeAttributes { + if attributeHistory, ok := nvmeHistory[smartAttribute.AttributeId]; ok { + dv.SmartResults[0].NvmeAttributes[sandx].History = attributeHistory + } } } + //process the historical slice for Scsi data + if len(dv.SmartResults[0].ScsiAttributes) > 0 { + scsiHistory := map[string][]SmartScsiAttribute{} + for _, smartResult := range historicalSmartResultSlice { + for _, smartAttribute := range smartResult.ScsiAttributes { + if _, ok := scsiHistory[smartAttribute.AttributeId]; !ok { + scsiHistory[smartAttribute.AttributeId] = []SmartScsiAttribute{} + } + scsiHistory[smartAttribute.AttributeId] = append(scsiHistory[smartAttribute.AttributeId], smartAttribute) + } + } + //now assign the historical slices to the AtaAttributes in the latest SmartResults + for sandx, smartAttribute := range dv.SmartResults[0].ScsiAttributes { + if attributeHistory, ok := scsiHistory[smartAttribute.AttributeId]; ok { + dv.SmartResults[0].ScsiAttributes[sandx].History = attributeHistory + } + } + } return nil } func (dv *Device) ApplyMetadataRules() error { + if !dv.IsAta() { + // Scrutiny Observed thresholds not yet available for NVME or SCSI drives + // since most SMART attributes are not present and BackBlaze data not available + return nil + } + //embed metadata in the latest smart attributes object - if len(dv.SmartResults) > 0 { - for ndx, attr := range dv.SmartResults[0].SmartAttributes { + if len(dv.SmartResults) > 0 && len(dv.SmartResults[0].AtaAttributes) > 0 { + for ndx, attr := range dv.SmartResults[0].AtaAttributes { if strings.ToUpper(attr.WhenFailed) == SmartWhenFailedFailingNow { //this attribute has previously failed - dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusFailed - dv.SmartResults[0].SmartAttributes[ndx].StatusReason = "Attribute is failing manufacturer SMART threshold" + dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusFailed + dv.SmartResults[0].AtaAttributes[ndx].StatusReason = "Attribute is failing manufacturer SMART threshold" } else if strings.ToUpper(attr.WhenFailed) == SmartWhenFailedInThePast { - dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusWarning - dv.SmartResults[0].SmartAttributes[ndx].StatusReason = "Attribute has previously failed manufacturer SMART threshold" + dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusWarning + dv.SmartResults[0].AtaAttributes[ndx].StatusReason = "Attribute has previously failed manufacturer SMART threshold" } if smartMetadata, ok := metadata.AtaSmartAttributes[attr.AttributeId]; ok { - dv.SmartResults[0].SmartAttributes[ndx].MetadataObservedThresholdStatus(smartMetadata) + dv.SmartResults[0].AtaAttributes[ndx].MetadataObservedThresholdStatus(smartMetadata) } //check if status is blank, set to "passed" - if len(dv.SmartResults[0].SmartAttributes[ndx].Status) == 0 { - dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusPassed + if len(dv.SmartResults[0].AtaAttributes[ndx].Status) == 0 { + dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusPassed } } } @@ -105,6 +168,10 @@ func (dv *Device) UpdateFromCollectorSmartInfo(info collector.SmartInfo) error { dv.RotationSpeed = info.RotationRate dv.Capacity = info.UserCapacity.Bytes dv.FormFactor = info.FormFactor.Name - //dv.SmartSupport = + dv.DeviceProtocol = info.Device.Protocol + if len(info.Vendor) > 0 { + dv.Manufacturer = info.Vendor + } + return nil } diff --git a/webapp/backend/pkg/models/db/smart.go b/webapp/backend/pkg/models/db/smart.go index e3a4b9c..bff183b 100644 --- a/webapp/backend/pkg/models/db/smart.go +++ b/webapp/backend/pkg/models/db/smart.go @@ -24,7 +24,9 @@ type Smart struct { PowerOnHours int64 `json:"power_on_hours"` PowerCycleCount int64 `json:"power_cycle_count"` - SmartAttributes []SmartAttribute `json:"smart_attributes" gorm:"foreignkey:SmartId"` + AtaAttributes []SmartAtaAttribute `json:"ata_attributes" gorm:"foreignkey:SmartId"` + NvmeAttributes []SmartNvmeAttribute `json:"nvme_attributes" gorm:"foreignkey:SmartId"` + ScsiAttributes []SmartScsiAttribute `json:"scsi_attributes" gorm:"foreignkey:SmartId"` } func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) error { @@ -36,9 +38,27 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er sm.PowerCycleCount = info.PowerCycleCount sm.PowerOnHours = info.PowerOnTime.Hours - sm.SmartAttributes = []SmartAttribute{} + // process ATA/NVME/SCSI protocol data + if info.Device.Protocol == DeviceProtocolAta { + sm.ProcessAtaSmartInfo(info) + } else if info.Device.Protocol == DeviceProtocolNvme { + sm.ProcessNvmeSmartInfo(info) + } else if info.Device.Protocol == DeviceProtocolScsi { + sm.ProcessScsiSmartInfo(info) + } + + if info.SmartStatus.Passed { + sm.SmartStatus = "passed" + } else { + sm.SmartStatus = "failed" + } + return nil +} + +func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) { + sm.AtaAttributes = []SmartAtaAttribute{} for _, collectorAttr := range info.AtaSmartAttributes.Table { - attrModel := SmartAttribute{ + attrModel := SmartAtaAttribute{ AttributeId: collectorAttr.ID, Name: collectorAttr.Name, Value: collectorAttr.Value, @@ -56,95 +76,45 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString) } } - sm.SmartAttributes = append(sm.SmartAttributes, attrModel) - } - - if info.SmartStatus.Passed { - sm.SmartStatus = "passed" - } else { - sm.SmartStatus = "failed" + sm.AtaAttributes = append(sm.AtaAttributes, attrModel) } - return nil } -const SmartAttributeStatusPassed = "passed" -const SmartAttributeStatusFailed = "failed" -const SmartAttributeStatusWarning = "warn" - -type SmartAttribute struct { - gorm.Model - - SmartId int `json:"smart_id"` - Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key - - AttributeId int `json:"attribute_id"` - Name string `json:"name"` - Value int `json:"value"` - Worst int `json:"worst"` - Threshold int `json:"thresh"` - RawValue int64 `json:"raw_value"` - RawString string `json:"raw_string"` - WhenFailed string `json:"when_failed"` - - TransformedValue int64 `json:"transformed_value"` - Status string `gorm:"-" json:"status,omitempty"` - StatusReason string `gorm:"-" json:"status_reason,omitempty"` - FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"` - History []SmartAttribute `gorm:"-" json:"history,omitempty"` -} - -// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary -func (sa *SmartAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaSmartAttribute) { - //TODO: multiple rules - // try to predict the failure rates for observed thresholds that have 0 failure rate and error bars. - // - if the attribute is critical - // - the failure rate is over 10 - set to failed - // - the attribute does not match any threshold, set to warn - // - if the attribute is not critical - // - if failure rate is above 20 - set to failed - // - if failure rate is above 10 but below 20 - set to warn - - //update the smart attribute status based on Observed thresholds. - var value int64 - if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized { - value = int64(sa.Value) - } else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed { - value = sa.TransformedValue - } else { - value = sa.RawValue +func (sm *Smart) ProcessNvmeSmartInfo(info collector.SmartInfo) { + sm.NvmeAttributes = []SmartNvmeAttribute{ + {AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning}, + {AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature}, + {AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold}, + {AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed}, + {AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead}, + {AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten}, + {AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads}, + {AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites}, + {AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime}, + {AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles}, + {AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours}, + {AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns}, + {AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors}, + {AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries}, + {AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime}, + {AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime}, } +} - for _, obsThresh := range smartMetadata.ObservedThresholds { - - //check if "value" is in this bucket - if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) || - (obsThresh.Low < value && value <= obsThresh.High) { - sa.FailureRate = obsThresh.AnnualFailureRate - - if smartMetadata.Critical { - if obsThresh.AnnualFailureRate >= 0.10 { - sa.Status = SmartAttributeStatusFailed - sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%" - } - } else { - if obsThresh.AnnualFailureRate >= 0.20 { - sa.Status = SmartAttributeStatusFailed - sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%" - } else if obsThresh.AnnualFailureRate >= 0.10 { - sa.Status = SmartAttributeStatusWarning - sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%" - } - } - - //we've found the correct bucket, we can drop out of this loop - return - } +func (sm *Smart) ProcessScsiSmartInfo(info collector.SmartInfo) { + sm.ScsiAttributes = []SmartScsiAttribute{ + {AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList}, + {AttributeId: "read.errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast}, + {AttributeId: "read.errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed}, + {AttributeId: "read.errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites}, + {AttributeId: "read.total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected}, + {AttributeId: "read.correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations}, + {AttributeId: "read.total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors}, + {AttributeId: "write.errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast}, + {AttributeId: "write.errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed}, + {AttributeId: "write.errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites}, + {AttributeId: "write.total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected}, + {AttributeId: "write.correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations}, + {AttributeId: "write.total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors}, } - // no bucket found - if smartMetadata.Critical { - sa.Status = SmartAttributeStatusWarning - sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute" - } - - return } diff --git a/webapp/backend/pkg/models/db/smart_ata_attribute.go b/webapp/backend/pkg/models/db/smart_ata_attribute.go new file mode 100644 index 0000000..03809f8 --- /dev/null +++ b/webapp/backend/pkg/models/db/smart_ata_attribute.go @@ -0,0 +1,88 @@ +package db + +import ( + "github.com/analogj/scrutiny/webapp/backend/pkg/metadata" + "github.com/jinzhu/gorm" +) + +const SmartAttributeStatusPassed = "passed" +const SmartAttributeStatusFailed = "failed" +const SmartAttributeStatusWarning = "warn" + +type SmartAtaAttribute struct { + gorm.Model + + SmartId int `json:"smart_id"` + Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key + + AttributeId int `json:"attribute_id"` + Name string `json:"name"` + Value int `json:"value"` + Worst int `json:"worst"` + Threshold int `json:"thresh"` + RawValue int64 `json:"raw_value"` + RawString string `json:"raw_string"` + WhenFailed string `json:"when_failed"` + + TransformedValue int64 `json:"transformed_value"` + Status string `gorm:"-" json:"status,omitempty"` + StatusReason string `gorm:"-" json:"status_reason,omitempty"` + FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"` + History []SmartAtaAttribute `gorm:"-" json:"history,omitempty"` +} + +// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary +func (sa *SmartAtaAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaSmartAttribute) { + //TODO: multiple rules + // try to predict the failure rates for observed thresholds that have 0 failure rate and error bars. + // - if the attribute is critical + // - the failure rate is over 10 - set to failed + // - the attribute does not match any threshold, set to warn + // - if the attribute is not critical + // - if failure rate is above 20 - set to failed + // - if failure rate is above 10 but below 20 - set to warn + + //update the smart attribute status based on Observed thresholds. + var value int64 + if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized { + value = int64(sa.Value) + } else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed { + value = sa.TransformedValue + } else { + value = sa.RawValue + } + + for _, obsThresh := range smartMetadata.ObservedThresholds { + + //check if "value" is in this bucket + if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) || + (obsThresh.Low < value && value <= obsThresh.High) { + sa.FailureRate = obsThresh.AnnualFailureRate + + if smartMetadata.Critical { + if obsThresh.AnnualFailureRate >= 0.10 { + sa.Status = SmartAttributeStatusFailed + sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%" + } + } else { + if obsThresh.AnnualFailureRate >= 0.20 { + sa.Status = SmartAttributeStatusFailed + sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%" + } else if obsThresh.AnnualFailureRate >= 0.10 { + sa.Status = SmartAttributeStatusWarning + sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%" + } + } + + //we've found the correct bucket, we can drop out of this loop + return + } + } + // no bucket found + if smartMetadata.Critical { + sa.Status = SmartAttributeStatusWarning + sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute" + } + + return +} diff --git a/webapp/backend/pkg/models/db/smart_nvme_attribute.go b/webapp/backend/pkg/models/db/smart_nvme_attribute.go new file mode 100644 index 0000000..84623b5 --- /dev/null +++ b/webapp/backend/pkg/models/db/smart_nvme_attribute.go @@ -0,0 +1,21 @@ +package db + +import "github.com/jinzhu/gorm" + +type SmartNvmeAttribute struct { + gorm.Model + + SmartId int `json:"smart_id"` + Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key + + AttributeId string `json:"attribute_id"` //json string from smartctl + Name string `json:"name"` + Value int `json:"value"` + Threshold int `json:"thresh"` + + TransformedValue int64 `json:"transformed_value"` + Status string `gorm:"-" json:"status,omitempty"` + StatusReason string `gorm:"-" json:"status_reason,omitempty"` + FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"` + History []SmartNvmeAttribute `gorm:"-" json:"history,omitempty"` +} diff --git a/webapp/backend/pkg/models/db/smart_scsci_attribute.go b/webapp/backend/pkg/models/db/smart_scsci_attribute.go new file mode 100644 index 0000000..0ee86ad --- /dev/null +++ b/webapp/backend/pkg/models/db/smart_scsci_attribute.go @@ -0,0 +1,21 @@ +package db + +import "github.com/jinzhu/gorm" + +type SmartScsiAttribute struct { + gorm.Model + + SmartId int `json:"smart_id"` + Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key + + AttributeId string `json:"attribute_id"` //json string from smartctl + Name string `json:"name"` + Value int `json:"value"` + Threshold int `json:"thresh"` + + TransformedValue int64 `json:"transformed_value"` + Status string `gorm:"-" json:"status,omitempty"` + StatusReason string `gorm:"-" json:"status_reason,omitempty"` + FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"` + History []SmartScsiAttribute `gorm:"-" json:"history,omitempty"` +} diff --git a/webapp/backend/pkg/models/db/smart_test.go b/webapp/backend/pkg/models/db/smart_test.go index 1cefe2e..0f03715 100644 --- a/webapp/backend/pkg/models/db/smart_test.go +++ b/webapp/backend/pkg/models/db/smart_test.go @@ -29,14 +29,127 @@ func TestFromCollectorSmartInfo(t *testing.T) { //assert require.NoError(t, err) - require.Equal(t, smartMdl.DeviceWWN, "WWN-test") - require.Equal(t, smartMdl.SmartStatus, "passed") + require.Equal(t, "WWN-test", smartMdl.DeviceWWN) + require.Equal(t, "passed", smartMdl.SmartStatus) + require.Equal(t, 18, len(smartMdl.AtaAttributes)) + require.Equal(t, 0, len(smartMdl.NvmeAttributes)) + require.Equal(t, 0, len(smartMdl.ScsiAttributes)) //check that temperature was correctly parsed - for _, attr := range smartMdl.SmartAttributes { + for _, attr := range smartMdl.AtaAttributes { if attr.AttributeId == 194 { require.Equal(t, int64(163210330144), attr.RawValue) require.Equal(t, int64(32), attr.TransformedValue) } } } + +func TestFromCollectorSmartInfo_Fail(t *testing.T) { + //setup + smartDataFile, err := os.Open("../testdata/smart-fail.json") + require.NoError(t, err) + defer smartDataFile.Close() + + var smartJson collector.SmartInfo + + smartDataBytes, err := ioutil.ReadAll(smartDataFile) + require.NoError(t, err) + err = json.Unmarshal(smartDataBytes, &smartJson) + require.NoError(t, err) + + //test + smartMdl := db.Smart{} + err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson) + + //assert + require.NoError(t, err) + require.Equal(t, "WWN-test", smartMdl.DeviceWWN) + require.Equal(t, "failed", smartMdl.SmartStatus) + require.Equal(t, 0, len(smartMdl.AtaAttributes)) + require.Equal(t, 0, len(smartMdl.NvmeAttributes)) + require.Equal(t, 0, len(smartMdl.ScsiAttributes)) +} + +func TestFromCollectorSmartInfo_Fail2(t *testing.T) { + //setup + smartDataFile, err := os.Open("../testdata/smart-fail2.json") + require.NoError(t, err) + defer smartDataFile.Close() + + var smartJson collector.SmartInfo + + smartDataBytes, err := ioutil.ReadAll(smartDataFile) + require.NoError(t, err) + err = json.Unmarshal(smartDataBytes, &smartJson) + require.NoError(t, err) + + //test + smartMdl := db.Smart{} + err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson) + + //assert + require.NoError(t, err) + require.Equal(t, "WWN-test", smartMdl.DeviceWWN) + require.Equal(t, "failed", smartMdl.SmartStatus) + require.Equal(t, 17, len(smartMdl.AtaAttributes)) + require.Equal(t, 0, len(smartMdl.NvmeAttributes)) + require.Equal(t, 0, len(smartMdl.ScsiAttributes)) +} + +func TestFromCollectorSmartInfo_Nvme(t *testing.T) { + //setup + smartDataFile, err := os.Open("../testdata/smart-nvme.json") + require.NoError(t, err) + defer smartDataFile.Close() + + var smartJson collector.SmartInfo + + smartDataBytes, err := ioutil.ReadAll(smartDataFile) + require.NoError(t, err) + err = json.Unmarshal(smartDataBytes, &smartJson) + require.NoError(t, err) + + //test + smartMdl := db.Smart{} + err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson) + + //assert + require.NoError(t, err) + require.Equal(t, "WWN-test", smartMdl.DeviceWWN) + require.Equal(t, "passed", smartMdl.SmartStatus) + require.Equal(t, 0, len(smartMdl.AtaAttributes)) + require.Equal(t, 16, len(smartMdl.NvmeAttributes)) + require.Equal(t, 0, len(smartMdl.ScsiAttributes)) + + require.Equal(t, 111303174, smartMdl.NvmeAttributes[6].Value) + require.Equal(t, 83170961, smartMdl.NvmeAttributes[7].Value) +} + +func TestFromCollectorSmartInfo_Scsi(t *testing.T) { + //setup + smartDataFile, err := os.Open("../testdata/smart-scsi.json") + require.NoError(t, err) + defer smartDataFile.Close() + + var smartJson collector.SmartInfo + + smartDataBytes, err := ioutil.ReadAll(smartDataFile) + require.NoError(t, err) + err = json.Unmarshal(smartDataBytes, &smartJson) + require.NoError(t, err) + + //test + smartMdl := db.Smart{} + err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson) + + //assert + require.NoError(t, err) + require.Equal(t, "WWN-test", smartMdl.DeviceWWN) + require.Equal(t, "passed", smartMdl.SmartStatus) + require.Equal(t, 0, len(smartMdl.AtaAttributes)) + require.Equal(t, 0, len(smartMdl.NvmeAttributes)) + require.Equal(t, 13, len(smartMdl.ScsiAttributes)) + + require.Equal(t, 56, smartMdl.ScsiAttributes[0].Value) + require.Equal(t, 300357663, smartMdl.ScsiAttributes[4].Value) //total_errors_corrected +} diff --git a/webapp/backend/pkg/models/testdata/smart-megaraid0.json b/webapp/backend/pkg/models/testdata/smart-megaraid0.json new file mode 100644 index 0000000..5767206 --- /dev/null +++ b/webapp/backend/pkg/models/testdata/smart-megaraid0.json @@ -0,0 +1,697 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 1 + ], + "svn_revision": "5022", + "platform_info": "x86_64-linux-5.4.0-42-generic", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-a", + "-j", + "-d", + "megaraid,0", + "-i", + "/dev/sda" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda [megaraid_disk_00] [SAT]", + "type": "sat+megaraid,0", + "protocol": "ATA" + }, + "model_name": "WD4000FYYX", + "serial_number": "XXXXXXXXXXXX", + "wwn": { + "naa": 5, + "oui": 5358, + "id": 10217451239 + }, + "ata_additional_product_id": "DELL(tm)", + "firmware_version": "00.0D1K4", + "user_capacity": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 7200, + "form_factor": { + "ata_value": 2, + "name": "3.5 inches" + }, + "in_smartctl_database": false, + "ata_version": { + "string": "ATA8-ACS T13/1699-D revision 6", + "major_value": 510, + "minor_value": 40 + }, + "sata_version": { + "string": "SATA 3.0", + "value": 62 + }, + "interface_speed": { + "max": { + "sata_value": 6, + "string": "3.0 Gb/s", + "units_per_second": 30, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 2, + "string": "3.0 Gb/s", + "units_per_second": 30, + "bits_per_unit": 100000000 + } + }, + "local_time": { + "time_t": 1598297918, + "asctime": "Mon Aug 24 21:38:38 2020 CEST" + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 130, + "string": "was completed without error", + "passed": true + }, + "completion_seconds": 90 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 523, + "conveyance": 5 + } + }, + "capabilities": { + "values": [ + 123, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": true, + "self_tests_supported": true, + "conveyance_self_test_supported": true, + "selective_self_test_supported": true, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_sct_capabilities": { + "value": 28861, + "error_recovery_control_supported": true, + "feature_control_supported": true, + "data_table_supported": true + }, + "ata_smart_attributes": { + "revision": 16, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 200, + "worst": 197, + "thresh": 51, + "when_failed": "", + "flags": { + "value": 47, + "string": "POSR-K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 3, + "name": "Spin_Up_Time", + "value": 228, + "worst": 227, + "thresh": 21, + "when_failed": "", + "flags": { + "value": 39, + "string": "POS--K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 7558, + "string": "7558" + } + }, + { + "id": 4, + "name": "Start_Stop_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 70, + "string": "70" + } + }, + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 200, + "worst": 200, + "thresh": 140, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 7, + "name": "Seek_Error_Rate", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 46, + "string": "-OSR-K ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 49, + "worst": 49, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 37787, + "string": "37787" + } + }, + { + "id": 10, + "name": "Spin_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 11, + "name": "Calibration_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 70, + "string": "70" + } + }, + { + "id": 183, + "name": "Runtime_Bad_Block", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 192, + "name": "Power-Off_Retract_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 55, + "string": "55" + } + }, + { + "id": 193, + "name": "Load_Cycle_Count", + "value": 197, + "worst": 197, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 9267, + "string": "9267" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 116, + "worst": 104, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 3145764, + "string": "36 (Min/Max 0/48)" + } + }, + { + "id": 196, + "name": "Reallocated_Event_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 197, + "name": "Current_Pending_Sector", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 198, + "name": "Offline_Uncorrectable", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 200, + "name": "Multi_Zone_Error_Rate", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 8, + "string": "---R-- ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": true, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 241, + "name": "Total_LBAs_Written", + "value": 198, + "worst": 198, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 2754608750246, + "string": "2754608750246" + } + }, + { + "id": 242, + "name": "Total_LBAs_Read", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 70057180117, + "string": "70057180117" + } + } + ] + }, + "power_on_time": { + "hours": 37787 + }, + "power_cycle_count": 70, + "temperature": { + "current": 36 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 35990 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3 + }, + { + "type": { + "value": 223, + "string": "Vendor (0xdf)" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 1 + } + ], + "count": 4, + "error_count_total": 0, + "error_count_outdated": 0 + } + }, + "ata_smart_selective_self_test_log": { + "revision": 1, + "table": [ + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + } + ], + "flags": { + "value": 0, + "remainder_scan_enabled": false + }, + "power_up_scan_resume_minutes": 0 + } +} diff --git a/webapp/backend/pkg/models/testdata/smart-megaraid1.json b/webapp/backend/pkg/models/testdata/smart-megaraid1.json new file mode 100644 index 0000000..04a2af2 --- /dev/null +++ b/webapp/backend/pkg/models/testdata/smart-megaraid1.json @@ -0,0 +1,709 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 1 + ], + "svn_revision": "5022", + "platform_info": "x86_64-linux-5.4.0-42-generic", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-a", + "-j", + "-d", + "megaraid,1", + "-i", + "/dev/sda" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda [megaraid_disk_01] [SAT]", + "type": "sat+megaraid,1", + "protocol": "ATA" + }, + "model_name": "WD4000FYYX", + "serial_number": "XXXXXXXXXXXX", + "wwn": { + "naa": 5, + "oui": 5358, + "id": 11649125727 + }, + "ata_additional_product_id": "DELL(tm)", + "firmware_version": "00.0D1K4", + "user_capacity": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 7200, + "form_factor": { + "ata_value": 2, + "name": "3.5 inches" + }, + "in_smartctl_database": false, + "ata_version": { + "string": "ATA8-ACS T13/1699-D revision 6", + "major_value": 510, + "minor_value": 40 + }, + "sata_version": { + "string": "SATA 3.0", + "value": 62 + }, + "interface_speed": { + "max": { + "sata_value": 6, + "string": "3.0 Gb/s", + "units_per_second": 30, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 2, + "string": "3.0 Gb/s", + "units_per_second": 30, + "bits_per_unit": 100000000 + } + }, + "local_time": { + "time_t": 1598297922, + "asctime": "Mon Aug 24 21:38:42 2020 CEST" + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 130, + "string": "was completed without error", + "passed": true + }, + "completion_seconds": 90 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 503, + "conveyance": 5 + } + }, + "capabilities": { + "values": [ + 123, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": true, + "self_tests_supported": true, + "conveyance_self_test_supported": true, + "selective_self_test_supported": true, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_sct_capabilities": { + "value": 28861, + "error_recovery_control_supported": true, + "feature_control_supported": true, + "data_table_supported": true + }, + "ata_smart_attributes": { + "revision": 16, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 200, + "worst": 111, + "thresh": 51, + "when_failed": "", + "flags": { + "value": 47, + "string": "POSR-K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 3, + "name": "Spin_Up_Time", + "value": 230, + "worst": 227, + "thresh": 21, + "when_failed": "", + "flags": { + "value": 39, + "string": "POS--K ", + "prefailure": true, + "updated_online": true, + "performance": true, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 7458, + "string": "7458" + } + }, + { + "id": 4, + "name": "Start_Stop_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 68, + "string": "68" + } + }, + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 188, + "worst": 188, + "thresh": 140, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 387, + "string": "387" + } + }, + { + "id": 7, + "name": "Seek_Error_Rate", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 46, + "string": "-OSR-K ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 49, + "worst": 49, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 37788, + "string": "37788" + } + }, + { + "id": 10, + "name": "Spin_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 11, + "name": "Calibration_Retry_Count", + "value": 100, + "worst": 253, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 68, + "string": "68" + } + }, + { + "id": 183, + "name": "Runtime_Bad_Block", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 192, + "name": "Power-Off_Retract_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 56, + "string": "56" + } + }, + { + "id": 193, + "name": "Load_Cycle_Count", + "value": 197, + "worst": 197, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 9462, + "string": "9462" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 116, + "worst": 101, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 3342372, + "string": "36 (Min/Max 0/51)" + } + }, + { + "id": 196, + "name": "Reallocated_Event_Count", + "value": 191, + "worst": 191, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 9, + "string": "9" + } + }, + { + "id": 197, + "name": "Current_Pending_Sector", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 198, + "name": "Offline_Uncorrectable", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 200, + "name": "Multi_Zone_Error_Rate", + "value": 200, + "worst": 199, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 8, + "string": "---R-- ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": true, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 241, + "name": "Total_LBAs_Written", + "value": 197, + "worst": 197, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 3920560799278, + "string": "3920560799278" + } + }, + { + "id": 242, + "name": "Total_LBAs_Read", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 72684827907, + "string": "72684827907" + } + } + ] + }, + "power_on_time": { + "hours": 37788 + }, + "power_cycle_count": 68, + "temperature": { + "current": 36 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 25, + "string": "Aborted by host", + "remaining_percent": 90 + }, + "lifetime_hours": 35990 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 35990 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3 + }, + { + "type": { + "value": 223, + "string": "Vendor (0xdf)" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 1 + } + ], + "count": 5, + "error_count_total": 0, + "error_count_outdated": 0 + } + }, + "ata_smart_selective_self_test_log": { + "revision": 1, + "table": [ + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + } + ], + "flags": { + "value": 0, + "remainder_scan_enabled": false + }, + "power_up_scan_resume_minutes": 0 + } +} diff --git a/webapp/backend/pkg/models/testdata/smart-scsi2.json b/webapp/backend/pkg/models/testdata/smart-scsi2.json new file mode 100644 index 0000000..26f9470 --- /dev/null +++ b/webapp/backend/pkg/models/testdata/smart-scsi2.json @@ -0,0 +1,88 @@ +{ + "json_format_version": [ + 0, + 1 + ], + "smartctl": { + "version": [ + 6, + 7 + ], + "platform_info": "x86_64-linux-4.4.0-138-generic", + "build_info": "(local build)", + "argv": [ + "smartctl", + "/dev/sdb", + "-ja" + ], + "exit_status": 0 + }, + "device": { + "name": "/dev/sdb", + "info_name": "/dev/sdb", + "type": "scsi", + "protocol": "SCSI" + }, + "vendor": "SEAGATE", + "product": "ST1200MM0088", + "model_name": "SEAGATE ST1200MM0088", + "revision": "N004", + "scsi_version": "SPC-4", + "user_capacity": { + "blocks": 2344225968, + "bytes": 1200243695616 + }, + "logical_block_size": 512, + "rotation_rate": 10500, + "form_factor": { + "scsi_value": 3, + "name": "2.5 inches" + }, + "serial_number": "Z4028VRY0000C810BZXB", + "device_type": { + "scsi_value": 0, + "name": "disk" + }, + "local_time": { + "time_t": 1545001755, + "asctime": "Sun Dec 16 17:09:15 2018 CST" + }, + "smart_status": { + "passed": true + }, + "format_status": { + "grown_defects_during_cert": "not_available", + "blocks_reassigned_during_format": "not_available", + "total_new_block_since_format": "not_available", + "power_on_minutes_since_format": "not_available" + }, + "temperature": { + "current": 31, + "drive_trip": 60 + }, + "scsi_grown_defect_list": 0, + "power_on_time": { + "hours": 5675, + "minutes": 39 + }, + "scsi_error_counter_log": { + "read": { + "errors_corrected_by_eccfast": 1410362924, + "errors_corrected_by_eccdelayed": 0, + "errors_corrected_by_rereads_rewrites": 0, + "total_errors_corrected": 1410362924, + "correction_algorithm_invocations": 0, + "gigabytes_processed": "386.568", + "total_uncorrected_errors": 0 + }, + "write": { + "errors_corrected_by_eccfast": 0, + "errors_corrected_by_eccdelayed": 0, + "errors_corrected_by_rereads_rewrites": 0, + "total_errors_corrected": 0, + "correction_algorithm_invocations": 0, + "gigabytes_processed": "806.827", + "total_uncorrected_errors": 0 + } + } +} diff --git a/webapp/backend/pkg/web/handler/get_device_details.go b/webapp/backend/pkg/web/handler/get_device_details.go index 8c7980c..d085519 100644 --- a/webapp/backend/pkg/web/handler/get_device_details.go +++ b/webapp/backend/pkg/web/handler/get_device_details.go @@ -16,7 +16,9 @@ func GetDeviceDetails(c *gin.Context) { Preload("SmartResults", func(db *gorm.DB) *gorm.DB { return db.Order("smarts.created_at DESC").Limit(40) }). - Preload("SmartResults.SmartAttributes"). + Preload("SmartResults.AtaAttributes"). + Preload("SmartResults.NvmeAttributes"). + Preload("SmartResults.ScsiAttributes"). Where("wwn = ?", c.Param("wwn")). First(&device)