MRPE HSRP Check on Cisco Routers

Hi list
some time ago we had to solve the following problem: we have to monitor the HSRP status of a thousand WAN routers. But since our monitoring is centralized and would access those routers coming from WAN side although its MPLS , SNMP is not allowed here (restrictions from the vendor). But we have SSH access from the LAN side of these routers :slight_smile:
So we came up with the MRPE principle and a classical Nagios check. This check is executed against the (Cisco!)switch which is (on LAN side) connected as primary one to (after) the router, determines the routers’ HSRP status (if OK also the backup router IP) and reports it back to Checkmk.
The check is highly custom written to our needs (like VLAN settings and user/pass are coded into it) but does what it is written for (a thousand times every five minutes :slight_smile: )
Maybe someone else is looking for something like this and can use and/or improve it.
Netmiko is needed for this: netmiko · PyPI

#!/usr/bin/python3
from __future__ import print_function, unicode_literals
import netmiko
import sys
import getopt
import argparse

parser = argparse.ArgumentParser(description='HSRP all listening check .')
parser.add_argument('-d','--device',
        help='IP of switch')
args = parser.parse_args()

#defined variables#
ip_device=args.device
standby_rtr=0
dev_username = ""
dev_password = ""
outputtext=""
nagioscode=3
grouparraynumber=0
grouparraynumber2=0
counter_ip_int_br=0
counter_int_des=0
status_vlan_69=False

#functions - all self written functions are placed here#

#func ende, end Script and transfer parameters to monitoring system#
def ende(outputtext_ende,nagioscode_ende):
    if nagioscode == 3:
        outputtext = "issue in Script, please review Check"
    print(outputtext_ende)
    exit(nagioscode_ende)

#programm call#

#connect to Switch#
try:
    net_connect = netmiko.ConnectHandler(
        ip_device,
        username=dev_username,
        password=dev_password,
        device_type="cisco_ios",
    )
except netmiko.ssh_exception.NetMikoTimeoutException:
    ende("Connection to device timed-out",3)
#send commands#
output_all = net_connect.send_command("show standby vl69 all", use_textfsm=True)
output_ip_int_brief = net_connect.send_command("show ip int brief", use_textfsm=True)
output_show_int_des = net_connect.send_command("show int description", use_textfsm=True)

#print(output_show_int_des)

#disconnect from Switch#
net_connect.disconnect()

#Check for invalid input, command not found, or no output#
if "Invalid input detected at '^' marker" in output_all:
    ende("Invalid input detected in command show standby vl69 all",1)

if "Invalid input detected at '^' marker" in output_ip_int_brief:
    ende("Invalid input detected in command show ip int brief",1)

if "Invalid input detected at '^' marker" in output_show_int_des:
    ende("Invalid input detected in command show int description",1)

if not output_all:
    ende("output for 'command show standby vl69 all' empty", 0)

#filter array ip int brief#
output_dict_int_brief = output_ip_int_brief[counter_ip_int_br]

#search for interface vlan69#
#VLAN69 is found, if not Script will be ended#

counter_array_ip_int_br = len(output_ip_int_brief)
#print(counter_array_ip_int_br)
while (counter_ip_int_br <= counter_array_ip_int_br):
    if "Vlan69" in output_dict_int_brief["intf"]:
        #print("line99")
        break
    elif "FastEthernet0" in output_dict_int_brief["intf"]:
        #print("line102")
        ende("Interface VLAN69 not found, check no possible",0)
    else:
        #print(output_dict_int_brief)
        counter_ip_int_br = counter_ip_int_br+1
        output_dict_int_brief = output_ip_int_brief[counter_ip_int_br]
        #print(output_dict_int_brief)

if counter_ip_int_br >= counter_array_ip_int_br:
    ende("Interface VLAN69 not found, check no possible", 0)

#Check if VLAN69 is up, status and protocol#

if "up" in output_dict_int_brief["status"]:
    if "up" in output_dict_int_brief["proto"]:
        status_vlan_69=True
    else:
        ende("Interface VLAN69 protocol status is "+output_dict_int_brief["proto"],0)
else:
    ende("Interface VLAN69 status is "+output_dict_int_brief["status"],0)

#filter array standby all#
output_dict = output_all[grouparraynumber]

#filter for hsrp_group#
output_hsrp_group = output_dict["group_name"]

if output_hsrp_group =="hsrp-Vl69-169":
    grouparraynumber+1
    output_dict = output_all[grouparraynumber]
#filter for hsrp_group#
output_hsrp_group = output_dict["group_name"]

if output_hsrp_group =="hsrp-Vl69-169":
    grouparraynumber+1
    output_dict = output_all[grouparraynumber]
    output_hsrp_group = output_dict["group_name"]

if output_hsrp_group =="hsrp-Vl69-169":
    ende("Only group "+output_hsrp_group+" available. No Check possible, no provider hsrp group detected", 0)

#filter for standby_router#
output_standby_rtr = output_dict["standby_router"]

#change variable standby_rtr to 0 for no standby router, 1 for standby router exists#
if not output_standby_rtr:
    standby_rtr = 0
else:
    standby_rtr = 1

#create Nagiosoutput and outputtext#
if standby_rtr == 1:
    nagioscode = 0
    outputtext="Standby Router in HSRP Group "+output_hsrp_group+" is "+output_standby_rtr
elif standby_rtr == 0:
    output_dict_int_desc = output_show_int_des[counter_int_des]
    counter_array_int_des = len(output_show_int_des)
    while (counter_int_des <= counter_array_int_des):
        if "transit-wan bkp" in output_dict_int_desc["descrip"]:
            break
        else:
            # print(output_dict_int_brief)
            counter_int_des = counter_int_des + 1
            output_dict_int_desc = output_show_int_des[counter_int_des]
            # print(output_dict_int_brief)

    if "up" in output_dict_int_desc["status"]:
        if "up" in output_dict_int_desc["protocol"]:
            nagioscode = 2
            outputtext = "Standby Router in HSRP Group " + output_hsrp_group + " is unknown. First bkp Interface found "+output_dict_int_desc["port"]+"protocol status is " + output_dict_int_desc["protocol"]
        else:
            nagioscode = 3
            outputtext = "Standby Router in HSRP Group " + output_hsrp_group + " is unknown. First bkp Interface found "+output_dict_int_desc["port"]+"protocol status is " + output_dict_int_desc["protocol"]
    else:
        nagioscode = 3
        outputtext = "Standby Router in HSRP Group " + output_hsrp_group + " is unknown. First bkp Interface found "+output_dict_int_desc["port"]+" line status is " + output_dict_int_desc["status"]

ende(outputtext,nagioscode)
1 Like