#!/usr/bin/python # -*- encoding: utf-8; py-indent-offset: 4 -*- # +------------------------------------------------------------------+ # | ____ _ _ __ __ _ __ | # | / ___| |__ ___ ___| | __ | \/ | |/ / | # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / | # | | |___| | | | __/ (__| < | | | | . \ | # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ | # | | # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de | # +------------------------------------------------------------------+ # # This file is part of Check_MK. # The official homepage is at http://mathias-kettner.de/check_mk. # # check_mk is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation in version 2. check_mk is distributed # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with- # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more de- # tails. You should have received a copy of the GNU General Public # License along with GNU Make; see the file COPYING. If not, write # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, # Boston, MA 02110-1301 USA. # First generation of agents output only the process command line: # /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6 # Second generation of agents output the user in brackets in the first columns: # (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6 # Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU: # (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan # Forth generation (>=1.2.5), additional columns in bracket: # (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name # (\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1) NOTEPAD.EXE # Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time # The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information # is reported by the windows agent itself. However, we still support sections from psperf.bat # if the agent version is lower than 1.2.5. # Windows agent now ships a plugin "psperf.bat" that adds a section from wmic # to the output: # <<>> # [wmic process] # ^M # Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M # WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M # WINDOWSXP,155781250,System,0,59,0,1957888,253952^M # WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M # WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M # WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M # WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M # WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M # WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M # New since 1.2.1i2: WATO compatible syntax # # Holds a list of rules which are matching hosts by names or tags and # where each rule holds a dictionary. # # Each of those entries defines the following options: # # 1. descr: item name to be used for the service description # 2. match: matching-definition # 3. user: user definition # 5. perfdata: monitor with perfdata # 4. levels: four numbers (thresholds) inventory_processes_rules = [] # Deprecated option since 1.6. cmk_base creates a config warning when finding rules # for this ruleset. Can be dropped with 1.7. inventory_processes = [] inventory_processes_perf = [] ANY_USER = None def ps_cleanup_counters(parsed): # remove legacy key used for some kind of caching cleanup_idents = ["last.cleared.ps_"] pids = ps_get_current_pids(parsed) cleanup_idents += ps_get_counters_to_delete(pids) # Avoid growing up the item state with info about processes that aren't # executing anymore. Clean all information about process that are not # the ones specifically inside the current parsed agent output clear_item_states_by_full_keys(cleanup_idents) # Get the idents of the counters which can be deleted because the process id of # the counter is not found anymore in the process table. # # Handle these formats of idents: # Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1), # New magic keys: ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007), def ps_get_counters_to_delete(pids): counters_to_delete = [] for ident in get_all_item_states(): if isinstance(ident, tuple) and ident[0] in ["ps", "ps.perf", "winservice_ps"]: check_ident = ident[2] elif not isinstance(ident, tuple) and (ident.startswith("ps_stat") or ident.startswith("ps_wmic")): check_ident = ident else: continue pid = check_ident.split(".")[-1] if pid.isdigit() and pid not in pids: counters_to_delete.append(ident) return counters_to_delete def ps_get_current_pids(parsed): pids = set() for line in parsed: process_info = line[1] if process_info.process_id: pids.add(process_info.process_id) return pids # This function is only concerned with deprecated output from psperf.bat, # in case of all other output it just returns info unmodified. But if it is # a windows output it will extract the number of cpu cores def ps_merge_wmic_info(info): # Agent output version cmk>1.2.5 # Assumes line = [CLUSTER, PS_INFO, COMMAND] has_wmic = False for line in info: if len(line) > 2 and line[2].lower() == "system idle process": cpu_cores = int(line[1][1:-1].split(",")[9]) return cpu_cores, info if "wmic process" in line[-1]: has_wmic = True break # Data from other systems than windows if not has_wmic: return 1, info # Data from windows with wmic info, cmk<1.2.5 return extract_wmic_info(info) def extract_wmic_info(info): ps_result = [] lines = iter(info) wmic_info = {} is_wmic = False while True: try: line = next(lines) if line[-1] == '[wmic process]': is_wmic = True wmic_headers = ["node"] + next(lines)[1:] continue elif line[-1] == '[wmic process end]': is_wmic = False continue except StopIteration: break # Finished with all lines if is_wmic: row = dict(zip(wmic_headers, line)) # Row might be damaged. I've seen this agent output: # Node - TILE-BUILDER02 # ERROR: # Description = Quota violation # # Node, if "Name" in row and "ProcessId" in row: wmic_info.setdefault((row["node"], row["Name"]), []).append(row) else: ps_result.append(line) # plain list of process names return merge_wmic(ps_result, wmic_info, wmic_headers) def merge_wmic(ps_result, wmic_info, wmic_headers): info = [] seen_pids = set([]) # Remove duplicate entries cpu_cores = 1 for line in ps_result: psinfos = wmic_info.get((line[0], line[1]), []) if psinfos: psinfo = psinfos.pop() # each info is used only once! # Get number of CPU cores from system idle process if "ThreadCount" in wmic_headers and psinfo["Name"].lower() == "system idle process": cpu_cores = int(psinfo["ThreadCount"]) pid = int(psinfo["ProcessId"]) if pid not in seen_pids: seen_pids.add(pid) virt = int(psinfo["VirtualSize"]) / 1024 # Bytes -> KB resi = int(psinfo["WorkingSetSize"]) / 1024 # Bytes -> KB pagefile = int(psinfo["PageFileUsage"]) / 1024 # Bytes -> KB userc = int(psinfo["UserModeTime"]) # do not resolve counter here! kernelc = int(psinfo["KernelModeTime"]) # do not resolve counter here! handlec = int(psinfo.get("HandleCount", 0)) # Only in newer psperf.bat versions threadc = int(psinfo["ThreadCount"]) # do not resolve counter here! line[1:1] = [ "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" % (virt, resi, pid, pagefile, userc, kernelc, handlec, threadc) ] info.append(line) return cpu_cores, info # This mainly formats the line[1] element which contains the process info (user,...) def ps_parse_process_entries(pre_parsed): parsed = [] # line[0] = node # line[1] = process_info OR (if no process info available) = process name for line in pre_parsed: process_info = ps_info_tuple(line[1]) if process_info: parsed_line = [line[0], process_info] + line[2:] else: # Make number of columns in line consistent for discovery/check parsed_line = [line[0], ps_info()] + line[1:] # Filter out any lines where no process command line is available, e.g. # [None, u'(,,,)'] # [None, u'(,,,)', u''] if len(parsed_line) > 2 and parsed_line[2] and parsed_line[2] != 'svchost.exe': parsed.append(parsed_line) return parsed # Produces a list of lists where each sub list is built as follows: # [ # [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'], # ] # First element: The node the data comes from in a cluster or None # Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements) # Third element: The process command line def parse_ps(info): cpu_cores, parsed = ps_merge_wmic_info(info) parsed = ps_parse_process_entries(parsed) return cpu_cores, parsed def inventory_ps(info): ps_info, ps_lnx_info = info[:2] if ps_lnx_info: parsed = parse_ps_lnx(ps_lnx_info) else: _cpu_cores, parsed = parse_ps(ps_info) return inventory_ps_common(inventory_processes_rules, parsed) def check_ps(item, params, info): ps_info, ps_lnx_info = info[:2] if ps_lnx_info: cpu_cores, parsed = 1, parse_ps_lnx(ps_lnx_info) else: cpu_cores, parsed = parse_ps(ps_info) # Cleanup counters of processes which do not exist anymore ps_cleanup_counters(parsed) mem_info, solaris_mem_info, statgrab_mem_info, aix_memory_info, cpu_info = info[2:] # cpu_info for non windows systems if cpu_info and len(cpu_info[0]) == 6: cpu_cores = int(cpu_info[0][5]) if mem_info: total_ram = parse_proc_meminfo_bytes(mem_info).get("MemTotal") elif solaris_mem_info: total_ram = solaris_mem_info.get("MemTotal") * 1024 elif statgrab_mem_info: total_ram = statgrab_mem_info.get("MemTotal") * 1024 elif aix_memory_info: total_ram = int(aix_memory_info[0][0]) * 4 * 1024 else: total_ram = None return check_ps_common(item, params, parsed, cpu_cores=cpu_cores, total_ram=total_ram) check_info['ps'] = { "inventory_function": inventory_ps, "check_function": check_ps, "service_description": "Process %s", "includes": ["ps_lnx.include", "ps.include", "mem.include"], "has_perfdata": True, "node_info": True, # add first column with actual host name "group": "ps", "default_levels_variable": "ps_default_levels", "extra_sections": ["ps_lnx", "mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"], } check_info['winservice_ps'] = { "inventory_function": inventory_ps, "check_function": check_ps, "service_description": "Process %s", "includes": ["ps_lnx.include", "ps.include", "mem.include"], "has_perfdata": True, "node_info": True, # add first column with actual host name "group": "ps", "default_levels_variable": "ps_default_levels", "extra_sections": ["ps_lnx", "mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"], } # NOTE: This check is deprecated and will be removed any decade now. ps now # does always performance data. check_info['ps.perf'] = { "check_function": check_ps, "service_description": "Process %s", "includes": ["ps_lnx.include", "ps.include", "mem.include"], "has_perfdata": True, "node_info": True, # add first column with actual host name "group": "ps", "default_levels_variable": "ps_default_levels", "extra_sections": ["ps_lnx", "mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"], }