You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
scrutiny/webapp/backend/pkg/models/measurements/smart.go

213 lines
11 KiB

package measurements
import (
"fmt"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"log"
"strconv"
"strings"
"time"
)
type Smart struct {
Date time.Time `json:"date"`
DeviceWWN string `json:"device_wwn"` //(tag)
DeviceProtocol string `json:"device_protocol"`
//Metrics (fields)
Temp int64 `json:"temp"`
PowerOnHours int64 `json:"power_on_hours"`
PowerCycleCount int64 `json:"power_cycle_count"`
//Attributes (fields)
Attributes map[string]SmartAttribute `json:"attrs"`
//status
Status pkg.DeviceStatus
}
func (sm *Smart) Flatten() (tags map[string]string, fields map[string]interface{}) {
tags = map[string]string{
"device_wwn": sm.DeviceWWN,
"device_protocol": sm.DeviceProtocol,
}
fields = map[string]interface{}{
"temp": sm.Temp,
"power_on_hours": sm.PowerOnHours,
"power_cycle_count": sm.PowerCycleCount,
}
for _, attr := range sm.Attributes {
for attrKey, attrVal := range attr.Flatten() {
fields[attrKey] = attrVal
}
}
return tags, fields
}
func NewSmartFromInfluxDB(attrs map[string]interface{}) (*Smart, error) {
//go though the massive map returned from influxdb. If a key is associated with the Smart struct, assign it. If it starts with "attr.*" group it by attributeId, and pass to attribute inflate.
sm := Smart{
//required fields
Date: attrs["_time"].(time.Time),
DeviceWWN: attrs["device_wwn"].(string),
DeviceProtocol: attrs["device_protocol"].(string),
Attributes: map[string]SmartAttribute{},
}
for key, val := range attrs {
switch key {
case "temp":
sm.Temp = val.(int64)
case "power_on_hours":
sm.PowerOnHours = val.(int64)
case "power_cycle_count":
sm.PowerCycleCount = val.(int64)
default:
// this key is unknown.
if !strings.HasPrefix(key, "attr.") {
continue
}
//this is a attribute, lets group it with its related "siblings", populating a SmartAttribute object
keyParts := strings.Split(key, ".")
attributeId := keyParts[1]
if _, ok := sm.Attributes[attributeId]; !ok {
// init the attribute group
if sm.DeviceProtocol == pkg.DeviceProtocolAta {
sm.Attributes[attributeId] = &SmartAtaAttribute{}
} else if sm.DeviceProtocol == pkg.DeviceProtocolNvme {
sm.Attributes[attributeId] = &SmartNvmeAttribute{}
} else if sm.DeviceProtocol == pkg.DeviceProtocolScsi {
sm.Attributes[attributeId] = &SmartScsiAttribute{}
} else {
return nil, fmt.Errorf("Unknown Device Protocol: %s", sm.DeviceProtocol)
}
}
sm.Attributes[attributeId].Inflate(key, val)
}
}
log.Printf("Found Smart Device (%s) Attributes (%v)", sm.DeviceWWN, len(sm.Attributes))
return &sm, nil
}
//Parse Collector SMART data results and create Smart object (and associated SmartAtaAttribute entries)
func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) error {
sm.DeviceWWN = wwn
sm.Date = time.Unix(info.LocalTime.TimeT, 0)
//smart metrics
sm.Temp = info.Temperature.Current
sm.PowerCycleCount = info.PowerCycleCount
sm.PowerOnHours = info.PowerOnTime.Hours
if !info.SmartStatus.Passed {
sm.Status = pkg.DeviceStatusSet(sm.Status, pkg.DeviceStatusFailedSmart)
}
sm.DeviceProtocol = info.Device.Protocol
// process ATA/NVME/SCSI protocol data
sm.Attributes = map[string]SmartAttribute{}
if sm.DeviceProtocol == pkg.DeviceProtocolAta {
sm.ProcessAtaSmartInfo(info.AtaSmartAttributes.Table)
} else if sm.DeviceProtocol == pkg.DeviceProtocolNvme {
sm.ProcessNvmeSmartInfo(info.NvmeSmartHealthInformationLog)
} else if sm.DeviceProtocol == pkg.DeviceProtocolScsi {
sm.ProcessScsiSmartInfo(info.ScsiGrownDefectList, info.ScsiErrorCounterLog)
}
return nil
}
//generate SmartAtaAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessAtaSmartInfo(tableItems []collector.AtaSmartAttributesTableItem) {
for _, collectorAttr := range tableItems {
attrModel := SmartAtaAttribute{
AttributeId: collectorAttr.ID,
Value: collectorAttr.Value,
Worst: collectorAttr.Worst,
Threshold: collectorAttr.Thresh,
RawValue: collectorAttr.Raw.Value,
RawString: collectorAttr.Raw.String,
WhenFailed: collectorAttr.WhenFailed,
}
//now that we've parsed the data from the smartctl response, lets match it against our metadata rules and add additional Scrutiny specific data.
if smartMetadata, ok := thresholds.AtaMetadata[collectorAttr.ID]; ok {
if smartMetadata.Transform != nil {
attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString)
}
}
attrModel.PopulateAttributeStatus()
sm.Attributes[strconv.Itoa(collectorAttr.ID)] = &attrModel
if pkg.AttributeStatusHas(attrModel.Status, pkg.AttributeStatusFailedScrutiny) {
sm.Status = pkg.DeviceStatusSet(sm.Status, pkg.DeviceStatusFailedScrutiny)
}
}
}
//generate SmartNvmeAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessNvmeSmartInfo(nvmeSmartHealthInformationLog collector.NvmeSmartHealthInformationLog) {
sm.Attributes = map[string]SmartAttribute{
"critical_warning": (&SmartNvmeAttribute{AttributeId: "critical_warning", Value: nvmeSmartHealthInformationLog.CriticalWarning, Threshold: 0}).PopulateAttributeStatus(),
"temperature": (&SmartNvmeAttribute{AttributeId: "temperature", Value: nvmeSmartHealthInformationLog.Temperature, Threshold: -1}).PopulateAttributeStatus(),
"available_spare": (&SmartNvmeAttribute{AttributeId: "available_spare", Value: nvmeSmartHealthInformationLog.AvailableSpare, Threshold: nvmeSmartHealthInformationLog.AvailableSpareThreshold}).PopulateAttributeStatus(),
"percentage_used": (&SmartNvmeAttribute{AttributeId: "percentage_used", Value: nvmeSmartHealthInformationLog.PercentageUsed, Threshold: 100}).PopulateAttributeStatus(),
"data_units_read": (&SmartNvmeAttribute{AttributeId: "data_units_read", Value: nvmeSmartHealthInformationLog.DataUnitsRead, Threshold: -1}).PopulateAttributeStatus(),
"data_units_written": (&SmartNvmeAttribute{AttributeId: "data_units_written", Value: nvmeSmartHealthInformationLog.DataUnitsWritten, Threshold: -1}).PopulateAttributeStatus(),
"host_reads": (&SmartNvmeAttribute{AttributeId: "host_reads", Value: nvmeSmartHealthInformationLog.HostReads, Threshold: -1}).PopulateAttributeStatus(),
"host_writes": (&SmartNvmeAttribute{AttributeId: "host_writes", Value: nvmeSmartHealthInformationLog.HostWrites, Threshold: -1}).PopulateAttributeStatus(),
"controller_busy_time": (&SmartNvmeAttribute{AttributeId: "controller_busy_time", Value: nvmeSmartHealthInformationLog.ControllerBusyTime, Threshold: -1}).PopulateAttributeStatus(),
"power_cycles": (&SmartNvmeAttribute{AttributeId: "power_cycles", Value: nvmeSmartHealthInformationLog.PowerCycles, Threshold: -1}).PopulateAttributeStatus(),
"power_on_hours": (&SmartNvmeAttribute{AttributeId: "power_on_hours", Value: nvmeSmartHealthInformationLog.PowerOnHours, Threshold: -1}).PopulateAttributeStatus(),
"unsafe_shutdowns": (&SmartNvmeAttribute{AttributeId: "unsafe_shutdowns", Value: nvmeSmartHealthInformationLog.UnsafeShutdowns, Threshold: -1}).PopulateAttributeStatus(),
"media_errors": (&SmartNvmeAttribute{AttributeId: "media_errors", Value: nvmeSmartHealthInformationLog.MediaErrors, Threshold: 0}).PopulateAttributeStatus(),
"num_err_log_entries": (&SmartNvmeAttribute{AttributeId: "num_err_log_entries", Value: nvmeSmartHealthInformationLog.NumErrLogEntries, Threshold: -1}).PopulateAttributeStatus(),
"warning_temp_time": (&SmartNvmeAttribute{AttributeId: "warning_temp_time", Value: nvmeSmartHealthInformationLog.WarningTempTime, Threshold: -1}).PopulateAttributeStatus(),
"critical_comp_time": (&SmartNvmeAttribute{AttributeId: "critical_comp_time", Value: nvmeSmartHealthInformationLog.CriticalCompTime, Threshold: -1}).PopulateAttributeStatus(),
}
//find analyzed attribute status
for _, val := range sm.Attributes {
if pkg.AttributeStatusHas(val.GetStatus(), pkg.AttributeStatusFailedScrutiny) {
sm.Status = pkg.DeviceStatusSet(sm.Status, pkg.DeviceStatusFailedScrutiny)
}
}
}
//generate SmartScsiAttribute entries from Scrutiny Collector Smart data.
func (sm *Smart) ProcessScsiSmartInfo(defectGrownList int64, scsiErrorCounterLog collector.ScsiErrorCounterLog) {
sm.Attributes = map[string]SmartAttribute{
"scsi_grown_defect_list": (&SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Value: defectGrownList, Threshold: 0}).PopulateAttributeStatus(),
"read_errors_corrected_by_eccfast": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccfast", Value: scsiErrorCounterLog.Read.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
"read_errors_corrected_by_eccdelayed": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_eccdelayed", Value: scsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
"read_errors_corrected_by_rereads_rewrites": (&SmartScsiAttribute{AttributeId: "read_errors_corrected_by_rereads_rewrites", Value: scsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
"read_total_errors_corrected": (&SmartScsiAttribute{AttributeId: "read_total_errors_corrected", Value: scsiErrorCounterLog.Read.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
"read_correction_algorithm_invocations": (&SmartScsiAttribute{AttributeId: "read_correction_algorithm_invocations", Value: scsiErrorCounterLog.Read.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
"read_total_uncorrected_errors": (&SmartScsiAttribute{AttributeId: "read_total_uncorrected_errors", Value: scsiErrorCounterLog.Read.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
"write_errors_corrected_by_eccfast": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccfast", Value: scsiErrorCounterLog.Write.ErrorsCorrectedByEccfast, Threshold: -1}).PopulateAttributeStatus(),
"write_errors_corrected_by_eccdelayed": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_eccdelayed", Value: scsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed, Threshold: -1}).PopulateAttributeStatus(),
"write_errors_corrected_by_rereads_rewrites": (&SmartScsiAttribute{AttributeId: "write_errors_corrected_by_rereads_rewrites", Value: scsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites, Threshold: 0}).PopulateAttributeStatus(),
"write_total_errors_corrected": (&SmartScsiAttribute{AttributeId: "write_total_errors_corrected", Value: scsiErrorCounterLog.Write.TotalErrorsCorrected, Threshold: -1}).PopulateAttributeStatus(),
"write_correction_algorithm_invocations": (&SmartScsiAttribute{AttributeId: "write_correction_algorithm_invocations", Value: scsiErrorCounterLog.Write.CorrectionAlgorithmInvocations, Threshold: -1}).PopulateAttributeStatus(),
"write_total_uncorrected_errors": (&SmartScsiAttribute{AttributeId: "write_total_uncorrected_errors", Value: scsiErrorCounterLog.Write.TotalUncorrectedErrors, Threshold: 0}).PopulateAttributeStatus(),
}
//find analyzed attribute status
for _, val := range sm.Attributes {
if pkg.AttributeStatusHas(val.GetStatus(), pkg.AttributeStatusFailedScrutiny) {
sm.Status = pkg.DeviceStatusSet(sm.Status, pkg.DeviceStatusFailedScrutiny)
}
}
}