Support for SAS disks in the SMART plugin

Dear all,

we have several machines with SAS (SEAGATE ST4000NM0023) disks. Sadly, they are not supported by smartmontools (see links below). Therefore I modified your smart plugin, to parse the none standardized output from our devices. Maybe you are interested, to make it more general for other SAS devices, too. The code is at the very end. If required, I can also attach the smartctl output.

https://www.smartmontools.org/wiki/FAQ#MySCSISASdriveisnotinthesmartctlsmartddatabase
https://www.smartmontools.org/attachment/ticket/867/smartctl-Seagate-ST4000NM0023

Thanks and Greetings
Jörg

#!/bin/bash
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.




# This will be called on LSI based raidcontrollers and accesses
# the SMART data of SATA disks attached to a SAS Raid HBA via
# SCSI protocol interface.
sas_info()
{
    if [ "$1" == "RAID" ]; then
        SAS_INFO=$(smartctl -d megaraid,"$2" -A /dev/sg0)
    elif [ "$1" == "SAS" ]; then
	SAS_INFO=$(smartctl -d scsi -A $2)
    else
	return
    fi

    echo "$SAS_INFO" | while read -r line ; do
	case $line in
	    # number of hours powered up = 41174.10
	    "number of hours powered up"*)
	    VALUE=$(echo "$line" | awk '{print $7}' | sed 's/\.[0-9]*$//')
                echo "  9 Power_On_Hours          0x0032   035   035   000    Old_age   Always       -       $VALUE"
		;;
	    # Accumulated start-stop cycles:  45
	    "Accumulated start-stop cycles"*)
		VALUE=$(echo "$line" | awk '{print $4}')
		echo "  4 Start_Stop_Count        0x0032   100   100   020    Old_age   Always       -       $VALUE"
		;;
	    # Accumulated load-unload cycles:  1805
	    "Accumulated load-unload cycles"*)
		VALUE=$(echo "$line" | awk '{print $4}')
		echo "193 Load_Cycle_Count        0x0032   099   099   000    Old_age   Always       -       $VALUE"
		;;
	    # Current Drive Temperature:     30 C
	    "Current Drive Temperature"*)
		VALUE=$(echo "$line" | awk '{print $4}')
		echo "194 Temperature_Celsius     0x0022   027   040   000    Old_age   Always       -       $VALUE (0 0 0 0)"
		;;
	    # Blocks sent to initiator = 2707153807
	    "Blocks sent to initiator"*)
		VALUE=$(echo "$line" | awk '{print $6}')
		echo "241 Total_LBAs_Written      0x0000   100   253   000    Old_age   Offline      -       $VALUE"
		;;
	    # Blocks received from initiator = 1375121110
	    "Blocks received from initiator"*)
		VALUE=$(echo "$line" | awk '{print $6}')
		echo "242 Total_LBAs_Read         0x0000   100   253   000    Old_age   Offline      -       $VALUE"
		;;
	    # Elements in grown defect list: 0
	    "Elements in grown defect list"*)
		VALUE=$(echo "$line" | awk '{print $6}')
		echo "  5 Reallocated_Sector_Ct   0x0033   100   100   036    Pre-fail  Always       -       $VALUE"
		;;
	    ## Only with "smartctl -a" instead of "smartctl -A"
	    # Error counter log:
            # Errors Corrected by           Total   Correction     Gigabytes    Total
            #               ECC           rereads/    errors   algorithm      processed    uncorrected
            # fast | delayed   rewrites  corrected  invocations   [10^9 bytes]  errors
	    # read:   1756124203        0         0  1756124203          0       1025.345           0
	    # write:         0        0         0         0          0       3132.578           0
	    # verify: 1357542225        0         0  1357542225          0     137902.750           0
	    "read:"*)
		VALUE=$(echo "$line" | awk '{print $2}')
		echo "195 Hardware_ECC_Recovered  0x001a   102   099   000    Old_age   Always       -       $VALUE"
		VALUE=$(echo "$line" | awk '{print $5}')
		echo "  1 Raw_Read_Error_Rate     0x000f   066   063   044    Pre-fail  Always       -       $VALUE"
		;;
        esac
    done
}


megaraid_info()
{
    #PDINFO=$(MegaCli -PDlist -a0)
    if [ -z "$1" ]; then
        PDINFO=$(megacli -PDlist -a0 -NoLog)
    else
        PDINFO=$($1 -PDlist -a0 -NoLog)
    fi

    echo "$PDINFO" | \
    while read -r line ; do
        case "$line" in
           # FIRST LINE
           "Enclosure Device ID"*) #Enclosure Device ID: 252
            ENC=$( echo "$line" | awk '{print $4}')
                  unset SLOT LOG_DEV_ID VEND MODEL
        ;;
        "Slot Number"*)  #Slot Number: 7
            SLOT=$( echo "$line" | awk '{print $3}')
        ;;
        # Identify the logical device ID. smartctl needs it to access the disk.
        "Device Id"*)    #Device Id: 19
            LOG_DEV_ID=$( echo "$line" | awk '{print $3}')
        ;;
        "PD Type"*)      #PD Type: SATA
            VEND=$( echo "$line" | awk '{print $3}')
        ;;
        # This is the last value, generate output here
        "Inquiry Data"*)
	    if [ $VEND == SAS ]; then
		MODEL=$( echo "$line" | awk '{printf "%s_%s", $3, $4}')
		sas_info RAID $LOG_DEV_ID | grep Always  | grep -E -v '^190(.*)Temperature(.*)'       | \
                  sed "s|^|Enc${ENC}/Slot${SLOT} $VEND $MODEL |"
            else
            #Inquiry Data: WD-WCC1T1035197WDC WD20EZRX-00DC0B0 80.00A80
            # $4 seems to be better for some vendors... wont be possible to get this perfect.
                MODEL=$( echo "$line" | awk '{print $3}')

                # /dev/sdc ATA SAMSUNG_SSD_830   5 Reallocated_Sector_Ct   0x0033   100   100   010    Pre-fail  Always       -
                smartctl -d megaraid,"${LOG_DEV_ID}" -v 9,raw48 -A /dev/sg0 | \
                  grep Always  | grep -E -v '^190(.*)Temperature(.*)'       | \
                  sed "s|^|Enc${ENC}/Slot${SLOT} $VEND $MODEL |"
	    fi
            ;;
            esac
       done
}


# Only handle always updated values, add device path and vendor/model
if which smartctl > /dev/null 2>&1 ; then
    #
    # if the 3ware-utility is found
    # get the serials for all disks on the controller
    #
    if which tw_cli > /dev/null 2>&1 ; then
        # support for only one controller at the moment
        TWAC=$(tw_cli show | awk 'NR < 4 { next } { print $1 }' | head -n 1)

        # - add a trailing zero to handle case of unused slot
        #   trailing zeros are part of the device links in /dev/disk/by-id/... anyway
        # - only the last 9 chars seem to be relevant
        # (hopefully all this doesn't change with new kernels...)
        eval "$(tw_cli /"$TWAC" show drivestatus | grep -E '^p[0-9]' | awk '{print $1 " " $7 "0"}' | while read -r twaminor serial ; do
            twaminor=${twaminor#p}
            serial=${serial:(-9)}
            serial=AMCC_${serial}00000000000
            echo "$serial=$twaminor"
        done)"
    else:
        echo "tw_cli not found" >&2
    fi

    echo '<<<smart>>>'
    SEEN=
    for D in /dev/disk/by-id/{scsi,ata,nvme}-*; do
        [ "$D" != "${D%scsi-\*}" ] && continue
        [ "$D" != "${D%ata-\*}" ] && continue
        [ "$D" != "${D%nvme-\*}" ] && continue
        [ "$D" != "${D%-part*}" ] && continue
        N=$(readlink "$D")
        N=${N##*/}
        if [ -r /sys/block/"$N"/device/vendor ]; then
            VEND=$(tr -d ' ' < /sys/block/"$N"/device/vendor)
        elif [ -r /sys/block/"$N"/device/device/vendor ]; then
            VEND=NVME
        else
            # 2012-01-25 Stefan Kaerst CDJ - in case $N does not exist
            VEND=ATA
        fi

        if [ -r /sys/block/"$N"/device/model ]; then
            MODEL=$(sed -e 's/ /_/g' -e 's/_*$//g' < /sys/block/"$N"/device/model)
        else
            MODEL=$(smartctl -a "$D" | grep -i "device model" | sed -e "s/.*:[ ]*//g" -e "s/\ /_/g")
        fi

	# js_mz_20191127: SAS disk have another output format 
	if [ -r /sys/block/"$N"/device/sas_address ]; then
	    VEND=SAS
        fi

        # Excluded disk models for SAN arrays or certain RAID luns that are also not usable..
        if [ "$MODEL" = "iSCSI_Disk" ] || [ "$MODEL" = "LOGICAL_VOLUME" ]; then
            continue
        fi

	# js_mz_20191127: Exclude disk behind LSI MegaRAID
	if [ "$VEND" = "LSI" ]; then
	    continue
	fi
	
        # Avoid duplicate entries for same device
        if [ "${SEEN//.$N./}" != "$SEEN" ] ; then
            continue
        fi
        SEEN="$SEEN.$N."

        # strip device name for final output
        DNAME=${D#/dev/disk/by-id/scsi-}
        DNAME=${DNAME#/dev/disk/by-id/ata-}
        # 2012-01-25 Stefan Kaerst CDJ - special option in case vendor is AMCC
        CMD=
        if [ "$VEND" == "AMCC" ] && [ -n "$TWAC" ]; then
            DNAME=${DNAME#1}
            [ -z "${!DNAME}" ] && continue
            CMD="smartctl -d 3ware,${!DNAME} -v 9,raw48 -A /dev/twa0"
            # create nice device name including model
            MODEL=$(tw_cli /"$TWAC"/p"${!DNAME}" show model | head -n 1 | awk -F= '{ print $2 }')
            MODEL=${MODEL## }
            MODEL=${MODEL// /-}
            DNAME=${DNAME#AMCC_}
            DNAME="AMCC_${MODEL}_${DNAME%000000000000}"
	elif [ $VEND == "SAS" ]; then
	    CMD=
	    sas_info SAS $D | sed "s|^|$DNAME $VEND $MODEL |"
        elif [ "$VEND" != "ATA" ] ; then
            if [ "$VEND" == "NVME" ] ; then
                DNAME="/dev/$N"
                CMD="smartctl -d nvme -A $DNAME"
            else
                TEMP=
                # create temperature output as expected by checks/smart
                # this is a hack, TODO: change checks/smart to support SCSI-disks
                eval "$(smartctl -d scsi -i -A "$D" | while read -r a b c d _rest ; do
                    [ "$a" == Serial ] && echo SN="$c"
                    [ "$a" == Current ] && [ "$b" == Drive ] && [ "$c" == Temperature: ] && echo TEMP="$d"
                done)"
                [ -n "$TEMP" ] && CMD="echo 194 Temperature_Celsius 0x0000 000 000 000 Old_age Always - $TEMP (0 0 0 0)"
                DNAME="${VEND}_${MODEL}_${SN}"
            fi
        else
            CMD="smartctl -d ata -v 9,raw48 -A $D"
        fi

            if [ $VEND == "NVME" ]; then
                echo "$DNAME $VEND $MODEL"
                [ -n "$CMD" ] && $CMD | sed -e '1,5d; /^$/d'
            else
                [ -n "$CMD" ] && $CMD | grep Always | grep -E -v '^190(.*)Temperature(.*)' | sed "s|^|$DNAME $VEND $MODEL |"
            fi
    done 2>/dev/null

    # Call MegaRaid submodule if conditions are met
    if type MegaCli >/dev/null 2>&1; then
        MegaCli_bin="MegaCli"
    elif type MegaCli64 >/dev/null 2>&1; then
        MegaCli_bin="MegaCli64"
    elif type megacli >/dev/null 2>&1; then
        MegaCli_bin="megacli"
    else
        MegaCli_bin="unknown"
    fi

    if [ "$MegaCli_bin" != "unknown" ]; then
        megaraid_info "$MegaCli_bin"
    fi
else
    echo "ERROR: smartctl not found" >&2
fi