#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2016             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# This plugin was sponsored by BenV. Thanks!
# https://notes.benv.junerules.com/mtr/

# Concept:
# Read config mtr.cfg
# For every host:
# parse outstanding reports (and delete them)
# If current time > last check + config(time)//300 start new mtr in background
#    MTR results are stored in $VARDIR/mtr_${host}.report
# return previous host data

import sys, os, re, time, glob, ConfigParser, StringIO, distutils.spawn
from unicodedata import normalize
import subprocess

mk_confdir = os.getenv("MK_CONFDIR") or "/etc/check_mk"
mk_vardir = os.getenv("MK_VARDIR") or "/var/lib/check_mk_agent"

config_filename = mk_confdir + "/mtr.cfg"
config_dir      = mk_confdir + "/mtr.d/*.cfg"
status_filename = mk_vardir  + "/mtr.state"
report_filepre  = mk_vardir  + "/mtr.report."

if '-d' in sys.argv[2:] or '--debug' in sys.argv[1:]:
    debug = True
else:
    debug = False

# See if we have mtr
mtr_prog = distutils.spawn.find_executable('mtr')
if mtr_prog == None:
    if debug:
        sys.stdout.write("Could not find mtr binary\n")
    sys.exit(0)

def read_config():
    default_options = {
        'type'      : 'icmp',
        'count'     : "10",
        'force_ipv4': "0",
        'force_ipv6': "0",
        'size'      : "64",
        'time'      : "0",
        'dns'       : "0",
        'port'      : None,
        'address'   : None,
        'interval'  : None,
        'timeout'   : None
    }
    if not os.path.exists(config_filename):
        if debug:
            sys.stdout.write("Not configured, %s missing\n" % config_filename)
        sys.exit(0)

    config = ConfigParser.SafeConfigParser(default_options)
    # Let ConfigParser figure it out
    for config_file in ( [ config_filename ] + glob.glob(config_dir)):
        try:
            if not config.read(config_file):
                sys.stdout.write("**ERROR** Failed to parse configuration file %s!\n" % config_file)
        except Exception as e:
            sys.stdout.write("**ERROR** Failed to parse config file %s: %s\n" % (config_file, repr(e)))

    if len(config.sections()) == 0:
        sys.stdout.write("**ERROR** Configuration defines no hosts!\n")
        sys.exit(0)

    return config

# structure of statusfile
# # HOST        |LASTTIME |HOPCOUNT|HOP1|Loss%|Snt|Last|Avg|Best|Wrst|StDev|HOP2|...|HOP8|...|StdDev
# www.google.com|145122481|8|192.168.1.1|0.0%|10|32.6|3.6|0.3|32.6|10.2|192.168.0.1|...|9.8
def read_status():
    status = {}
    if not os.path.exists(status_filename):
        return status

    for line in file(status_filename):
        try:
            parts = line.split('|')
            if len(parts) < 2:
                sys.stdout.write("**ERROR** (BUG) Status has less than 2 parts:\n")
                sys.stdout.write("%s\n" % parts)
                continue
            host = parts[0]
            lasttime = int(float(parts[1]))
            status[host] = {'hops': {}, 'lasttime': lasttime};
            hops = int(parts[2])
            for i in range(0, hops):
                status[host]["hops"][i+1] = {
                    'hopname': parts[i*8 + 3].rstrip(),
                    'loss'   : parts[i*8 + 4].rstrip(),
                    'snt'    : parts[i*8 + 5].rstrip(),
                    'last'   : parts[i*8 + 6].rstrip(),
                    'avg'    : parts[i*8 + 7].rstrip(),
                    'best'   : parts[i*8 + 8].rstrip(),
                    'wrst'   : parts[i*8 + 9].rstrip(),
                    'stddev' : parts[i*8 + 10].rstrip(),
                }
        except Exception as e:
            sys.stdout.write("*ERROR** (BUG) Could not parse status line: %s, reason: %s\n" % (line, repr(e)))
    return status

def save_status(status):
    f = file(status_filename, "w")
    for host, hostdict in status.items():
        hopnum = len(hostdict["hops"].keys())
        lastreport = hostdict["lasttime"]
        hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
        for hop in hostdict["hops"].keys():
            hi = hostdict["hops"][hop]
            hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (hi['hopname'], hi['loss'], hi['snt'], hi['last'], hi['avg'], hi['best'], hi['wrst'], hi['stddev'])
        hoststring = hoststring.rstrip()
        f.write("%s\n" % hoststring)

_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.:]+')
def host_to_filename(host, delim=u'-'):
    # Get rid of gibberish chars, stolen from Django
    """Generates an slightly worse ASCII-only slug."""
    host=unicode(host, 'UTF-8')
    result = []
    for word in _punct_re.split(host.lower()):
        word = normalize('NFKD', word).encode('ascii', 'ignore')
        if word:
            result.append(word)
    return unicode(delim.join(result))

def check_mtr_pid(pid):
    """ Check for the existence of a unix pid and if the process matches. """
    try:
        os.kill(pid, 0)
    except OSError:
        return False # process does no longer exist
    else:
        pid_cmdline = "/proc/%d/cmdline" % pid
        try:
            if os.path.exists(pid_cmdline):
                if file(pid_cmdline).read().startswith("mtr\x00--report\x00--report-wide"):
                    return True
                else:
                    return False # different process than mtr
            else:
                return False # cmdline no longer exists, race condition..
        except:
            return False # any error

def parse_report(host):
    reportfile = report_filepre + host_to_filename(host)
    if not os.path.exists(reportfile):
        if not host in status.keys():
            # New host
            status[host] = {'hops':{}, 'lasttime': 0}
        return
# 1451228358
# Start: Sun Dec 27 14:35:18 2015
#HOST: purple         Loss%   Snt   Last   Avg  Best  Wrst StDev
#  1.|-- 80.69.76.120    0.0%    10    0.3   0.4   0.3   0.6   0.0
#  2.|-- 80.249.209.100  0.0%    10    1.0   1.1   0.8   1.4   0.0
#  3.|-- 209.85.240.63   0.0%    10    1.3   1.7   1.1   3.6   0.5
#  4.|-- 209.85.253.242  0.0%    10    1.6   1.8   1.6   2.1   0.0
#  5.|-- 209.85.253.201  0.0%    10    4.8   5.0   4.8   5.4   0.0
#  6.|-- 216.239.56.6    0.0%    10    4.7   5.1   4.7   5.5   0.0
#  7.|-- ???            100.0    10    0.0   0.0   0.0   0.0   0.0
#  8.|-- 74.125.136.147  0.0%    10    4.5   4.6   4.3   5.2   0.0
    # See if pidfile exists and if mtr is still running
    if os.path.exists(reportfile + ".pid"):
        # See if it's running
        try:
            pid = int(file(reportfile + ".pid", 'r').readline().rstrip())
            if check_mtr_pid(pid):
                # Still running, we're done.
                if not host in status.keys():
                    # New host
                    status[host] = {'hops':{}, 'lasttime': 0}
                status[host]['running'] = True
                return
        except ValueError:
            # Pid file is broken. Process probably crashed..
            pass
        # Done running, get rid of pid file
        os.unlink(reportfile + ".pid")

    # Parse the existing report
    lines = file(reportfile).readlines()
    if len(lines) < 3:
        sys.stdout.write("**ERROR** Report file %s has less than 3 lines, "
                         "expecting at least 1 hop! Throwing away invalid report\n" % reportfile)
        os.unlink(reportfile)
        if not host in status.keys():
            # New host
            status[host] = {'hops':{}, 'lasttime': 0}
        return
    status[host] = {'hops':{}, 'lasttime': 0}

    hopcount = 0
    status[host]["lasttime"] = int(float(lines.pop(0)))
    while len(lines) > 0 and not lines[0].startswith("HOST:"):
        lines.pop(0)
    if len(lines) < 2: # Not enough lines
        return
    try:
        lines.pop(0) # Get rid of HOST: header
        hopline = re.compile('^\s*\d+\.') #  10.|-- 129.250.2.147   0.0%    10  325.6 315.5 310.3 325.6   5.0
        for line in lines:
            if not hopline.match(line):
                continue; #     |  `|-- 129.250.2.159
            hopcount += 1
            parts = line.split()
            if len(parts) < 8:
                sys.stdout.write("**ERROR** Bug parsing host/hop, "
                                 "line has less than 8 parts: %s\n" % line)
                continue;
            status[host]['hops'][hopcount] = {
                'hopname': parts[1],
                'loss'   : parts[2],
                'snt'    : parts[3],
                'last'   : parts[4],
                'avg'    : parts[5],
                'best'   : parts[6],
                'wrst'   : parts[7],
                'stddev' : parts[8],
            }
    except Exception, e:
        sys.stdout.write("**ERROR** Could not parse report file %s, "
                         "tossing away invalid data %s\n" % (reportfile, e))
        del status[host]
    os.unlink(reportfile)

def output_report(host):
    hostdict = status.get(host)
    if not hostdict:
        return

    hopnum = len(hostdict["hops"].keys())
    lastreport = hostdict["lasttime"]
    hoststring = "%s|%s|%s" % (host, lastreport, hopnum)
    for hop in hostdict["hops"].keys():
        hi = hostdict["hops"][hop]
        hoststring += '|%s|%s|%s|%s|%s|%s|%s|%s' % (hi['hopname'], hi['loss'], hi['snt'], hi['last'], hi['avg'], hi['best'], hi['wrst'], hi['stddev'])
    sys.stdout.write("%s\n" % hoststring)

def start_mtr(host):
    options  = [mtr_prog, '--report', '--report-wide']
    pingtype = config.get(host, "type")
    count    = config.getint(host, "count")
    ipv4     = config.getboolean(host, "force_ipv4")
    ipv6     = config.getboolean(host, "force_ipv6")
    size     = config.getint(host, "size")
    lasttime = config.getint(host, "time")
    dns      = config.getboolean(host, "dns")
    port     = config.get(host, "port")
    address  = config.get(host, "address")
    interval = config.get(host, "interval")
    timeout  = config.get(host, "timeout")

    if "running" in status[host].keys():
        if debug:
            sys.stdout.write("MTR for host still running, not restarting MTR!\n")
        return

    if time.time() - status[host]["lasttime"] < lasttime:
        if debug:
            sys.stdout.write("%s - %s = %s is smaller than %s => mtr run not needed yet.\n" %
                (time.time(), status[host]["lasttime"], time.time() - status[host]["lasttime"], lasttime))
        return


    pid = os.fork()
    if pid > 0:
        # Parent process, return and keep running
        return

    os.chdir("/")
    os.umask(0)
    os.setsid()

    # Close all fd except stdin,out,err
    for fd in range(3, 256):
        try:
            os.close(fd)
        except OSError:
            pass

    if pingtype == 'tcp':
        options.append("--tcp")
    if pingtype == 'udp':
        options.append("--udp")
    if not port == None:
        options.append("--port")
        options.append(str(port))
    if ipv4 == True:
        options.append("-4")
    if ipv6 == True:
        options.append("-6")
    options.append("-s")
    options.append(str(size))
    options.append("-c")
    options.append(str(count))
    if dns == False:
        options.append("--no-dns")
    if not address == None:
        options.append("--address")
        options.append(str(address))
    if not interval == None:
        options.append("-i")
        options.append(str(interval))
    if not timeout == None:
        options.append("--timeout")
        options.append(str(timeout))

    options.append(str(host))
    if debug:
        sys.stdout.write("Startin MTR: %s\n" % (" ".join(options)))
    reportfile = report_filepre + host_to_filename(host)
    if (os.path.exists(reportfile)):
        os.unlink(reportfile)
    report=open(reportfile, 'a+')
    report.write(str(int(time.time())) + "\n")
    report.flush()
    process = subprocess.Popen(options, stdout=report, stderr=report)
    # Write pid to report.pid
    pidfile=open(reportfile + ".pid", 'w')
    pidfile.write("%d\n" % process.pid)
    pidfile.flush()
    pidfile.close()
    os._exit(os.EX_OK)

# Parse config
sys.stdout.write("<<<mtr:sep(124)>>>\n")
config = read_config()
status = read_status()
for host in config.sections():
    # Parse outstanding report
    parse_report(host)
    # Output last known values
    output_report(host)
    # Start new if needed
    start_mtr(host)
save_status(status)
