WIP - start supporting NVME and SCSI drive smart data.
Added addiitonal log message data for Smartctl exit ccode.
This commit is contained in:
@@ -13,6 +13,10 @@ type DeviceWrapper struct {
|
||||
Data []Device `json:"data"`
|
||||
}
|
||||
|
||||
const DeviceProtocolAta = "ATA"
|
||||
const DeviceProtocolScsi = "SCSI"
|
||||
const DeviceProtocolNvme = "NVMe"
|
||||
|
||||
type Device struct {
|
||||
//GORM attributes, see: http://gorm.io/docs/conventions.html
|
||||
CreatedAt time.Time
|
||||
@@ -32,17 +36,30 @@ type Device struct {
|
||||
Capacity int64 `json:"capacity"`
|
||||
FormFactor string `json:"form_factor"`
|
||||
SmartSupport bool `json:"smart_support"`
|
||||
DeviceProtocol string `json:"device_protocol"`
|
||||
|
||||
SmartResults []Smart `gorm:"foreignkey:DeviceWWN" json:"smart_results"`
|
||||
}
|
||||
|
||||
func (dv *Device) IsAta() bool {
|
||||
return dv.DeviceProtocol == DeviceProtocolAta
|
||||
}
|
||||
|
||||
func (dv *Device) IsScsi() bool {
|
||||
return dv.DeviceProtocol == DeviceProtocolScsi
|
||||
}
|
||||
|
||||
func (dv *Device) IsNvme() bool {
|
||||
return dv.DeviceProtocol == DeviceProtocolNvme
|
||||
}
|
||||
|
||||
//This method requires a device with an array of SmartResults.
|
||||
//It will remove all SmartResults other than the first (the latest one)
|
||||
//All removed SmartResults, will be processed, grouping SmartAttribute by attribute_id
|
||||
//All removed SmartResults, will be processed, grouping SmartAtaAttribute by attribute_id
|
||||
// and adding theme to an array called History.
|
||||
func (dv *Device) SquashHistory() error {
|
||||
if len(dv.SmartResults) <= 1 {
|
||||
return nil //no history found. ignore
|
||||
return nil //no ataHistory found. ignore
|
||||
}
|
||||
|
||||
latestSmartResultSlice := dv.SmartResults[0:1]
|
||||
@@ -51,48 +68,94 @@ func (dv *Device) SquashHistory() error {
|
||||
//re-assign the latest slice to the SmartResults field
|
||||
dv.SmartResults = latestSmartResultSlice
|
||||
|
||||
//process the historical slice
|
||||
history := map[int][]SmartAttribute{}
|
||||
for _, smartResult := range historicalSmartResultSlice {
|
||||
for _, smartAttribute := range smartResult.SmartAttributes {
|
||||
if _, ok := history[smartAttribute.AttributeId]; !ok {
|
||||
history[smartAttribute.AttributeId] = []SmartAttribute{}
|
||||
//process the historical slice for ATA data
|
||||
if len(dv.SmartResults[0].AtaAttributes) > 0 {
|
||||
ataHistory := map[int][]SmartAtaAttribute{}
|
||||
for _, smartResult := range historicalSmartResultSlice {
|
||||
for _, smartAttribute := range smartResult.AtaAttributes {
|
||||
if _, ok := ataHistory[smartAttribute.AttributeId]; !ok {
|
||||
ataHistory[smartAttribute.AttributeId] = []SmartAtaAttribute{}
|
||||
}
|
||||
ataHistory[smartAttribute.AttributeId] = append(ataHistory[smartAttribute.AttributeId], smartAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
//now assign the historical slices to the AtaAttributes in the latest SmartResults
|
||||
for sandx, smartAttribute := range dv.SmartResults[0].AtaAttributes {
|
||||
if attributeHistory, ok := ataHistory[smartAttribute.AttributeId]; ok {
|
||||
dv.SmartResults[0].AtaAttributes[sandx].History = attributeHistory
|
||||
}
|
||||
history[smartAttribute.AttributeId] = append(history[smartAttribute.AttributeId], smartAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
//now assign the historical slices to the SmartAttributes in the latest SmartResults
|
||||
for sandx, smartAttribute := range dv.SmartResults[0].SmartAttributes {
|
||||
if attributeHistory, ok := history[smartAttribute.AttributeId]; ok {
|
||||
dv.SmartResults[0].SmartAttributes[sandx].History = attributeHistory
|
||||
//process the historical slice for Nvme data
|
||||
if len(dv.SmartResults[0].NvmeAttributes) > 0 {
|
||||
nvmeHistory := map[string][]SmartNvmeAttribute{}
|
||||
for _, smartResult := range historicalSmartResultSlice {
|
||||
for _, smartAttribute := range smartResult.NvmeAttributes {
|
||||
if _, ok := nvmeHistory[smartAttribute.AttributeId]; !ok {
|
||||
nvmeHistory[smartAttribute.AttributeId] = []SmartNvmeAttribute{}
|
||||
}
|
||||
nvmeHistory[smartAttribute.AttributeId] = append(nvmeHistory[smartAttribute.AttributeId], smartAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
//now assign the historical slices to the AtaAttributes in the latest SmartResults
|
||||
for sandx, smartAttribute := range dv.SmartResults[0].NvmeAttributes {
|
||||
if attributeHistory, ok := nvmeHistory[smartAttribute.AttributeId]; ok {
|
||||
dv.SmartResults[0].NvmeAttributes[sandx].History = attributeHistory
|
||||
}
|
||||
}
|
||||
}
|
||||
//process the historical slice for Scsi data
|
||||
if len(dv.SmartResults[0].ScsiAttributes) > 0 {
|
||||
scsiHistory := map[string][]SmartScsiAttribute{}
|
||||
for _, smartResult := range historicalSmartResultSlice {
|
||||
for _, smartAttribute := range smartResult.ScsiAttributes {
|
||||
if _, ok := scsiHistory[smartAttribute.AttributeId]; !ok {
|
||||
scsiHistory[smartAttribute.AttributeId] = []SmartScsiAttribute{}
|
||||
}
|
||||
scsiHistory[smartAttribute.AttributeId] = append(scsiHistory[smartAttribute.AttributeId], smartAttribute)
|
||||
}
|
||||
}
|
||||
|
||||
//now assign the historical slices to the AtaAttributes in the latest SmartResults
|
||||
for sandx, smartAttribute := range dv.SmartResults[0].ScsiAttributes {
|
||||
if attributeHistory, ok := scsiHistory[smartAttribute.AttributeId]; ok {
|
||||
dv.SmartResults[0].ScsiAttributes[sandx].History = attributeHistory
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dv *Device) ApplyMetadataRules() error {
|
||||
if !dv.IsAta() {
|
||||
// Scrutiny Observed thresholds not yet available for NVME or SCSI drives
|
||||
// since most SMART attributes are not present and BackBlaze data not available
|
||||
return nil
|
||||
}
|
||||
|
||||
//embed metadata in the latest smart attributes object
|
||||
if len(dv.SmartResults) > 0 {
|
||||
for ndx, attr := range dv.SmartResults[0].SmartAttributes {
|
||||
if len(dv.SmartResults) > 0 && len(dv.SmartResults[0].AtaAttributes) > 0 {
|
||||
for ndx, attr := range dv.SmartResults[0].AtaAttributes {
|
||||
if strings.ToUpper(attr.WhenFailed) == SmartWhenFailedFailingNow {
|
||||
//this attribute has previously failed
|
||||
dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusFailed
|
||||
dv.SmartResults[0].SmartAttributes[ndx].StatusReason = "Attribute is failing manufacturer SMART threshold"
|
||||
dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusFailed
|
||||
dv.SmartResults[0].AtaAttributes[ndx].StatusReason = "Attribute is failing manufacturer SMART threshold"
|
||||
|
||||
} else if strings.ToUpper(attr.WhenFailed) == SmartWhenFailedInThePast {
|
||||
dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusWarning
|
||||
dv.SmartResults[0].SmartAttributes[ndx].StatusReason = "Attribute has previously failed manufacturer SMART threshold"
|
||||
dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusWarning
|
||||
dv.SmartResults[0].AtaAttributes[ndx].StatusReason = "Attribute has previously failed manufacturer SMART threshold"
|
||||
}
|
||||
|
||||
if smartMetadata, ok := metadata.AtaSmartAttributes[attr.AttributeId]; ok {
|
||||
dv.SmartResults[0].SmartAttributes[ndx].MetadataObservedThresholdStatus(smartMetadata)
|
||||
dv.SmartResults[0].AtaAttributes[ndx].MetadataObservedThresholdStatus(smartMetadata)
|
||||
}
|
||||
|
||||
//check if status is blank, set to "passed"
|
||||
if len(dv.SmartResults[0].SmartAttributes[ndx].Status) == 0 {
|
||||
dv.SmartResults[0].SmartAttributes[ndx].Status = SmartAttributeStatusPassed
|
||||
if len(dv.SmartResults[0].AtaAttributes[ndx].Status) == 0 {
|
||||
dv.SmartResults[0].AtaAttributes[ndx].Status = SmartAttributeStatusPassed
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,6 +168,10 @@ func (dv *Device) UpdateFromCollectorSmartInfo(info collector.SmartInfo) error {
|
||||
dv.RotationSpeed = info.RotationRate
|
||||
dv.Capacity = info.UserCapacity.Bytes
|
||||
dv.FormFactor = info.FormFactor.Name
|
||||
//dv.SmartSupport =
|
||||
dv.DeviceProtocol = info.Device.Protocol
|
||||
if len(info.Vendor) > 0 {
|
||||
dv.Manufacturer = info.Vendor
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -24,7 +24,9 @@ type Smart struct {
|
||||
PowerOnHours int64 `json:"power_on_hours"`
|
||||
PowerCycleCount int64 `json:"power_cycle_count"`
|
||||
|
||||
SmartAttributes []SmartAttribute `json:"smart_attributes" gorm:"foreignkey:SmartId"`
|
||||
AtaAttributes []SmartAtaAttribute `json:"ata_attributes" gorm:"foreignkey:SmartId"`
|
||||
NvmeAttributes []SmartNvmeAttribute `json:"nvme_attributes" gorm:"foreignkey:SmartId"`
|
||||
ScsiAttributes []SmartScsiAttribute `json:"scsi_attributes" gorm:"foreignkey:SmartId"`
|
||||
}
|
||||
|
||||
func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) error {
|
||||
@@ -36,9 +38,27 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er
|
||||
sm.PowerCycleCount = info.PowerCycleCount
|
||||
sm.PowerOnHours = info.PowerOnTime.Hours
|
||||
|
||||
sm.SmartAttributes = []SmartAttribute{}
|
||||
// process ATA/NVME/SCSI protocol data
|
||||
if info.Device.Protocol == DeviceProtocolAta {
|
||||
sm.ProcessAtaSmartInfo(info)
|
||||
} else if info.Device.Protocol == DeviceProtocolNvme {
|
||||
sm.ProcessNvmeSmartInfo(info)
|
||||
} else if info.Device.Protocol == DeviceProtocolScsi {
|
||||
sm.ProcessScsiSmartInfo(info)
|
||||
}
|
||||
|
||||
if info.SmartStatus.Passed {
|
||||
sm.SmartStatus = "passed"
|
||||
} else {
|
||||
sm.SmartStatus = "failed"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *Smart) ProcessAtaSmartInfo(info collector.SmartInfo) {
|
||||
sm.AtaAttributes = []SmartAtaAttribute{}
|
||||
for _, collectorAttr := range info.AtaSmartAttributes.Table {
|
||||
attrModel := SmartAttribute{
|
||||
attrModel := SmartAtaAttribute{
|
||||
AttributeId: collectorAttr.ID,
|
||||
Name: collectorAttr.Name,
|
||||
Value: collectorAttr.Value,
|
||||
@@ -56,95 +76,45 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er
|
||||
attrModel.TransformedValue = smartMetadata.Transform(attrModel.Value, attrModel.RawValue, attrModel.RawString)
|
||||
}
|
||||
}
|
||||
sm.SmartAttributes = append(sm.SmartAttributes, attrModel)
|
||||
sm.AtaAttributes = append(sm.AtaAttributes, attrModel)
|
||||
}
|
||||
|
||||
if info.SmartStatus.Passed {
|
||||
sm.SmartStatus = "passed"
|
||||
} else {
|
||||
sm.SmartStatus = "failed"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const SmartAttributeStatusPassed = "passed"
|
||||
const SmartAttributeStatusFailed = "failed"
|
||||
const SmartAttributeStatusWarning = "warn"
|
||||
|
||||
type SmartAttribute struct {
|
||||
gorm.Model
|
||||
|
||||
SmartId int `json:"smart_id"`
|
||||
Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key
|
||||
|
||||
AttributeId int `json:"attribute_id"`
|
||||
Name string `json:"name"`
|
||||
Value int `json:"value"`
|
||||
Worst int `json:"worst"`
|
||||
Threshold int `json:"thresh"`
|
||||
RawValue int64 `json:"raw_value"`
|
||||
RawString string `json:"raw_string"`
|
||||
WhenFailed string `json:"when_failed"`
|
||||
|
||||
TransformedValue int64 `json:"transformed_value"`
|
||||
Status string `gorm:"-" json:"status,omitempty"`
|
||||
StatusReason string `gorm:"-" json:"status_reason,omitempty"`
|
||||
FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"`
|
||||
History []SmartAttribute `gorm:"-" json:"history,omitempty"`
|
||||
func (sm *Smart) ProcessNvmeSmartInfo(info collector.SmartInfo) {
|
||||
sm.NvmeAttributes = []SmartNvmeAttribute{
|
||||
{AttributeId: "critical_warning", Name: "Critical Warning", Value: info.NvmeSmartHealthInformationLog.CriticalWarning},
|
||||
{AttributeId: "temperature", Name: "Temperature", Value: info.NvmeSmartHealthInformationLog.Temperature},
|
||||
{AttributeId: "available_spare", Name: "Available Spare", Value: info.NvmeSmartHealthInformationLog.AvailableSpare, Threshold: info.NvmeSmartHealthInformationLog.AvailableSpareThreshold},
|
||||
{AttributeId: "percentage_used", Name: "Percentage Used", Value: info.NvmeSmartHealthInformationLog.PercentageUsed},
|
||||
{AttributeId: "data_units_read", Name: "Data Units Read", Value: info.NvmeSmartHealthInformationLog.DataUnitsRead},
|
||||
{AttributeId: "data_units_written", Name: "Data Units Written", Value: info.NvmeSmartHealthInformationLog.DataUnitsWritten},
|
||||
{AttributeId: "host_reads", Name: "Host Reads", Value: info.NvmeSmartHealthInformationLog.HostReads},
|
||||
{AttributeId: "host_writes", Name: "Host Writes", Value: info.NvmeSmartHealthInformationLog.HostWrites},
|
||||
{AttributeId: "controller_busy_time", Name: "Controller Busy Time", Value: info.NvmeSmartHealthInformationLog.ControllerBusyTime},
|
||||
{AttributeId: "power_cycles", Name: "Power Cycles", Value: info.NvmeSmartHealthInformationLog.PowerCycles},
|
||||
{AttributeId: "power_on_hours", Name: "Power on Hours", Value: info.NvmeSmartHealthInformationLog.PowerOnHours},
|
||||
{AttributeId: "unsafe_shutdowns", Name: "Unsafe Shutdowns", Value: info.NvmeSmartHealthInformationLog.UnsafeShutdowns},
|
||||
{AttributeId: "media_errors", Name: "Media Errors", Value: info.NvmeSmartHealthInformationLog.MediaErrors},
|
||||
{AttributeId: "num_err_log_entries", Name: "Numb Err Log Entries", Value: info.NvmeSmartHealthInformationLog.NumErrLogEntries},
|
||||
{AttributeId: "warning_temp_time", Name: "Warning Temp Time", Value: info.NvmeSmartHealthInformationLog.WarningTempTime},
|
||||
{AttributeId: "critical_comp_time", Name: "Critical CompTime", Value: info.NvmeSmartHealthInformationLog.CriticalCompTime},
|
||||
}
|
||||
}
|
||||
|
||||
// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
|
||||
func (sa *SmartAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaSmartAttribute) {
|
||||
//TODO: multiple rules
|
||||
// try to predict the failure rates for observed thresholds that have 0 failure rate and error bars.
|
||||
// - if the attribute is critical
|
||||
// - the failure rate is over 10 - set to failed
|
||||
// - the attribute does not match any threshold, set to warn
|
||||
// - if the attribute is not critical
|
||||
// - if failure rate is above 20 - set to failed
|
||||
// - if failure rate is above 10 but below 20 - set to warn
|
||||
|
||||
//update the smart attribute status based on Observed thresholds.
|
||||
var value int64
|
||||
if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized {
|
||||
value = int64(sa.Value)
|
||||
} else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed {
|
||||
value = sa.TransformedValue
|
||||
} else {
|
||||
value = sa.RawValue
|
||||
func (sm *Smart) ProcessScsiSmartInfo(info collector.SmartInfo) {
|
||||
sm.ScsiAttributes = []SmartScsiAttribute{
|
||||
{AttributeId: "scsi_grown_defect_list", Name: "Grown Defect List", Value: info.ScsiGrownDefectList},
|
||||
{AttributeId: "read.errors_corrected_by_eccfast", Name: "Read Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccfast},
|
||||
{AttributeId: "read.errors_corrected_by_eccdelayed", Name: "Read Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByEccdelayed},
|
||||
{AttributeId: "read.errors_corrected_by_rereads_rewrites", Name: "Read Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Read.ErrorsCorrectedByRereadsRewrites},
|
||||
{AttributeId: "read.total_errors_corrected", Name: "Read Total Errors Corrected", Value: info.ScsiErrorCounterLog.Read.TotalErrorsCorrected},
|
||||
{AttributeId: "read.correction_algorithm_invocations", Name: "Read Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Read.CorrectionAlgorithmInvocations},
|
||||
{AttributeId: "read.total_uncorrected_errors", Name: "Read Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Read.TotalUncorrectedErrors},
|
||||
{AttributeId: "write.errors_corrected_by_eccfast", Name: "Write Errors Corrected by ECC Fast", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccfast},
|
||||
{AttributeId: "write.errors_corrected_by_eccdelayed", Name: "Write Errors Corrected by ECC Delayed", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByEccdelayed},
|
||||
{AttributeId: "write.errors_corrected_by_rereads_rewrites", Name: "Write Errors Corrected by ReReads/ReWrites", Value: info.ScsiErrorCounterLog.Write.ErrorsCorrectedByRereadsRewrites},
|
||||
{AttributeId: "write.total_errors_corrected", Name: "Write Total Errors Corrected", Value: info.ScsiErrorCounterLog.Write.TotalErrorsCorrected},
|
||||
{AttributeId: "write.correction_algorithm_invocations", Name: "Write Correction Algorithm Invocations", Value: info.ScsiErrorCounterLog.Write.CorrectionAlgorithmInvocations},
|
||||
{AttributeId: "write.total_uncorrected_errors", Name: "Write Total Uncorrected Errors", Value: info.ScsiErrorCounterLog.Write.TotalUncorrectedErrors},
|
||||
}
|
||||
|
||||
for _, obsThresh := range smartMetadata.ObservedThresholds {
|
||||
|
||||
//check if "value" is in this bucket
|
||||
if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) ||
|
||||
(obsThresh.Low < value && value <= obsThresh.High) {
|
||||
sa.FailureRate = obsThresh.AnnualFailureRate
|
||||
|
||||
if smartMetadata.Critical {
|
||||
if obsThresh.AnnualFailureRate >= 0.10 {
|
||||
sa.Status = SmartAttributeStatusFailed
|
||||
sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%"
|
||||
}
|
||||
} else {
|
||||
if obsThresh.AnnualFailureRate >= 0.20 {
|
||||
sa.Status = SmartAttributeStatusFailed
|
||||
sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%"
|
||||
} else if obsThresh.AnnualFailureRate >= 0.10 {
|
||||
sa.Status = SmartAttributeStatusWarning
|
||||
sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%"
|
||||
}
|
||||
}
|
||||
|
||||
//we've found the correct bucket, we can drop out of this loop
|
||||
return
|
||||
}
|
||||
}
|
||||
// no bucket found
|
||||
if smartMetadata.Critical {
|
||||
sa.Status = SmartAttributeStatusWarning
|
||||
sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute"
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
package db
|
||||
|
||||
import (
|
||||
"github.com/analogj/scrutiny/webapp/backend/pkg/metadata"
|
||||
"github.com/jinzhu/gorm"
|
||||
)
|
||||
|
||||
const SmartAttributeStatusPassed = "passed"
|
||||
const SmartAttributeStatusFailed = "failed"
|
||||
const SmartAttributeStatusWarning = "warn"
|
||||
|
||||
type SmartAtaAttribute struct {
|
||||
gorm.Model
|
||||
|
||||
SmartId int `json:"smart_id"`
|
||||
Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key
|
||||
|
||||
AttributeId int `json:"attribute_id"`
|
||||
Name string `json:"name"`
|
||||
Value int `json:"value"`
|
||||
Worst int `json:"worst"`
|
||||
Threshold int `json:"thresh"`
|
||||
RawValue int64 `json:"raw_value"`
|
||||
RawString string `json:"raw_string"`
|
||||
WhenFailed string `json:"when_failed"`
|
||||
|
||||
TransformedValue int64 `json:"transformed_value"`
|
||||
Status string `gorm:"-" json:"status,omitempty"`
|
||||
StatusReason string `gorm:"-" json:"status_reason,omitempty"`
|
||||
FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"`
|
||||
History []SmartAtaAttribute `gorm:"-" json:"history,omitempty"`
|
||||
}
|
||||
|
||||
// compare the attribute (raw, normalized, transformed) value to observed thresholds, and update status if necessary
|
||||
func (sa *SmartAtaAttribute) MetadataObservedThresholdStatus(smartMetadata metadata.AtaSmartAttribute) {
|
||||
//TODO: multiple rules
|
||||
// try to predict the failure rates for observed thresholds that have 0 failure rate and error bars.
|
||||
// - if the attribute is critical
|
||||
// - the failure rate is over 10 - set to failed
|
||||
// - the attribute does not match any threshold, set to warn
|
||||
// - if the attribute is not critical
|
||||
// - if failure rate is above 20 - set to failed
|
||||
// - if failure rate is above 10 but below 20 - set to warn
|
||||
|
||||
//update the smart attribute status based on Observed thresholds.
|
||||
var value int64
|
||||
if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeNormalized {
|
||||
value = int64(sa.Value)
|
||||
} else if smartMetadata.DisplayType == metadata.AtaSmartAttributeDisplayTypeTransformed {
|
||||
value = sa.TransformedValue
|
||||
} else {
|
||||
value = sa.RawValue
|
||||
}
|
||||
|
||||
for _, obsThresh := range smartMetadata.ObservedThresholds {
|
||||
|
||||
//check if "value" is in this bucket
|
||||
if ((obsThresh.Low == obsThresh.High) && value == obsThresh.Low) ||
|
||||
(obsThresh.Low < value && value <= obsThresh.High) {
|
||||
sa.FailureRate = obsThresh.AnnualFailureRate
|
||||
|
||||
if smartMetadata.Critical {
|
||||
if obsThresh.AnnualFailureRate >= 0.10 {
|
||||
sa.Status = SmartAttributeStatusFailed
|
||||
sa.StatusReason = "Observed Failure Rate for Critical Attribute is greater than 10%"
|
||||
}
|
||||
} else {
|
||||
if obsThresh.AnnualFailureRate >= 0.20 {
|
||||
sa.Status = SmartAttributeStatusFailed
|
||||
sa.StatusReason = "Observed Failure Rate for Attribute is greater than 20%"
|
||||
} else if obsThresh.AnnualFailureRate >= 0.10 {
|
||||
sa.Status = SmartAttributeStatusWarning
|
||||
sa.StatusReason = "Observed Failure Rate for Attribute is greater than 10%"
|
||||
}
|
||||
}
|
||||
|
||||
//we've found the correct bucket, we can drop out of this loop
|
||||
return
|
||||
}
|
||||
}
|
||||
// no bucket found
|
||||
if smartMetadata.Critical {
|
||||
sa.Status = SmartAttributeStatusWarning
|
||||
sa.StatusReason = "Could not determine Observed Failure Rate for Critical Attribute"
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package db
|
||||
|
||||
import "github.com/jinzhu/gorm"
|
||||
|
||||
type SmartNvmeAttribute struct {
|
||||
gorm.Model
|
||||
|
||||
SmartId int `json:"smart_id"`
|
||||
Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key
|
||||
|
||||
AttributeId string `json:"attribute_id"` //json string from smartctl
|
||||
Name string `json:"name"`
|
||||
Value int `json:"value"`
|
||||
Threshold int `json:"thresh"`
|
||||
|
||||
TransformedValue int64 `json:"transformed_value"`
|
||||
Status string `gorm:"-" json:"status,omitempty"`
|
||||
StatusReason string `gorm:"-" json:"status_reason,omitempty"`
|
||||
FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"`
|
||||
History []SmartNvmeAttribute `gorm:"-" json:"history,omitempty"`
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package db
|
||||
|
||||
import "github.com/jinzhu/gorm"
|
||||
|
||||
type SmartScsiAttribute struct {
|
||||
gorm.Model
|
||||
|
||||
SmartId int `json:"smart_id"`
|
||||
Smart Device `json:"-" gorm:"foreignkey:SmartId"` // use SmartId as foreign key
|
||||
|
||||
AttributeId string `json:"attribute_id"` //json string from smartctl
|
||||
Name string `json:"name"`
|
||||
Value int `json:"value"`
|
||||
Threshold int `json:"thresh"`
|
||||
|
||||
TransformedValue int64 `json:"transformed_value"`
|
||||
Status string `gorm:"-" json:"status,omitempty"`
|
||||
StatusReason string `gorm:"-" json:"status_reason,omitempty"`
|
||||
FailureRate float64 `gorm:"-" json:"failure_rate,omitempty"`
|
||||
History []SmartScsiAttribute `gorm:"-" json:"history,omitempty"`
|
||||
}
|
||||
@@ -29,14 +29,127 @@ func TestFromCollectorSmartInfo(t *testing.T) {
|
||||
|
||||
//assert
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, smartMdl.DeviceWWN, "WWN-test")
|
||||
require.Equal(t, smartMdl.SmartStatus, "passed")
|
||||
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
|
||||
require.Equal(t, "passed", smartMdl.SmartStatus)
|
||||
require.Equal(t, 18, len(smartMdl.AtaAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.NvmeAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.ScsiAttributes))
|
||||
|
||||
//check that temperature was correctly parsed
|
||||
for _, attr := range smartMdl.SmartAttributes {
|
||||
for _, attr := range smartMdl.AtaAttributes {
|
||||
if attr.AttributeId == 194 {
|
||||
require.Equal(t, int64(163210330144), attr.RawValue)
|
||||
require.Equal(t, int64(32), attr.TransformedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromCollectorSmartInfo_Fail(t *testing.T) {
|
||||
//setup
|
||||
smartDataFile, err := os.Open("../testdata/smart-fail.json")
|
||||
require.NoError(t, err)
|
||||
defer smartDataFile.Close()
|
||||
|
||||
var smartJson collector.SmartInfo
|
||||
|
||||
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
|
||||
require.NoError(t, err)
|
||||
err = json.Unmarshal(smartDataBytes, &smartJson)
|
||||
require.NoError(t, err)
|
||||
|
||||
//test
|
||||
smartMdl := db.Smart{}
|
||||
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
|
||||
|
||||
//assert
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
|
||||
require.Equal(t, "failed", smartMdl.SmartStatus)
|
||||
require.Equal(t, 0, len(smartMdl.AtaAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.NvmeAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.ScsiAttributes))
|
||||
}
|
||||
|
||||
func TestFromCollectorSmartInfo_Fail2(t *testing.T) {
|
||||
//setup
|
||||
smartDataFile, err := os.Open("../testdata/smart-fail2.json")
|
||||
require.NoError(t, err)
|
||||
defer smartDataFile.Close()
|
||||
|
||||
var smartJson collector.SmartInfo
|
||||
|
||||
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
|
||||
require.NoError(t, err)
|
||||
err = json.Unmarshal(smartDataBytes, &smartJson)
|
||||
require.NoError(t, err)
|
||||
|
||||
//test
|
||||
smartMdl := db.Smart{}
|
||||
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
|
||||
|
||||
//assert
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
|
||||
require.Equal(t, "failed", smartMdl.SmartStatus)
|
||||
require.Equal(t, 17, len(smartMdl.AtaAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.NvmeAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.ScsiAttributes))
|
||||
}
|
||||
|
||||
func TestFromCollectorSmartInfo_Nvme(t *testing.T) {
|
||||
//setup
|
||||
smartDataFile, err := os.Open("../testdata/smart-nvme.json")
|
||||
require.NoError(t, err)
|
||||
defer smartDataFile.Close()
|
||||
|
||||
var smartJson collector.SmartInfo
|
||||
|
||||
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
|
||||
require.NoError(t, err)
|
||||
err = json.Unmarshal(smartDataBytes, &smartJson)
|
||||
require.NoError(t, err)
|
||||
|
||||
//test
|
||||
smartMdl := db.Smart{}
|
||||
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
|
||||
|
||||
//assert
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
|
||||
require.Equal(t, "passed", smartMdl.SmartStatus)
|
||||
require.Equal(t, 0, len(smartMdl.AtaAttributes))
|
||||
require.Equal(t, 16, len(smartMdl.NvmeAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.ScsiAttributes))
|
||||
|
||||
require.Equal(t, 111303174, smartMdl.NvmeAttributes[6].Value)
|
||||
require.Equal(t, 83170961, smartMdl.NvmeAttributes[7].Value)
|
||||
}
|
||||
|
||||
func TestFromCollectorSmartInfo_Scsi(t *testing.T) {
|
||||
//setup
|
||||
smartDataFile, err := os.Open("../testdata/smart-scsi.json")
|
||||
require.NoError(t, err)
|
||||
defer smartDataFile.Close()
|
||||
|
||||
var smartJson collector.SmartInfo
|
||||
|
||||
smartDataBytes, err := ioutil.ReadAll(smartDataFile)
|
||||
require.NoError(t, err)
|
||||
err = json.Unmarshal(smartDataBytes, &smartJson)
|
||||
require.NoError(t, err)
|
||||
|
||||
//test
|
||||
smartMdl := db.Smart{}
|
||||
err = smartMdl.FromCollectorSmartInfo("WWN-test", smartJson)
|
||||
|
||||
//assert
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "WWN-test", smartMdl.DeviceWWN)
|
||||
require.Equal(t, "passed", smartMdl.SmartStatus)
|
||||
require.Equal(t, 0, len(smartMdl.AtaAttributes))
|
||||
require.Equal(t, 0, len(smartMdl.NvmeAttributes))
|
||||
require.Equal(t, 13, len(smartMdl.ScsiAttributes))
|
||||
|
||||
require.Equal(t, 56, smartMdl.ScsiAttributes[0].Value)
|
||||
require.Equal(t, 300357663, smartMdl.ScsiAttributes[4].Value) //total_errors_corrected
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user