# Copyright: (c) OpenSpug Organization. https://github.com/openspug/spug # Copyright: (c) <spug.dev@gmail.com> # Released under the AGPL-3.0 License. from django.db import close_old_connections from django_redis import get_redis_connection from apps.host.models import Host from apps.monitor.utils import handle_notify from socket import socket import subprocess import platform import requests import logging import json import time logging.captureWarnings(True) def site_check(url, limit): try: res = requests.get(url, timeout=10) if limit: duration = int(res.elapsed.total_seconds() * 1000) if duration > int(limit): return False, f'响应时间:{duration}ms' return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}' except Exception as e: return False, f'异常信息:{e}' def port_check(addr, port): try: sock = socket() sock.settimeout(5) sock.connect((addr, int(port))) sock.close() return True, '端口状态检测正常' except Exception as e: return False, f'异常信息:{e}' def ping_check(addr): try: if platform.system().lower() == 'windows': command = f'ping -n 1 -w 3000 {addr}' else: command = f'ping -c 1 -W 3 {addr}' task = subprocess.run(command, shell=True, stdout=subprocess.PIPE) if task.returncode == 0: return True, 'Ping检测正常' else: return False, 'Ping检测失败' except Exception as e: return False, f'异常信息:{e}' def host_executor(host, command): try: with host.get_ssh() as ssh: exit_code, out = ssh.exec_command_raw(command) if exit_code == 0: return True, out or '检测状态正常' else: return False, out or f'退出状态码:{exit_code}' except Exception as e: return False, f'异常信息:{e}' def monitor_worker_handler(job): task_id, tp, addr, extra, threshold, quiet = json.loads(job) target = addr if tp == '1': is_ok, message = site_check(addr, extra) elif tp == '2': is_ok, message = port_check(addr, extra) elif tp == '5': is_ok, message = ping_check(addr) elif tp not in ('3', '4'): is_ok, message = False, f'invalid monitor type for {tp!r}' else: close_old_connections() command = f'ps -ef|grep -v grep|grep {extra!r}' if tp == '3' else extra host = Host.objects.filter(pk=addr).first() if not host: is_ok, message = False, f'unknown host id for {addr!r}' else: is_ok, message = host_executor(host, command) target = f'{host.name}({host.hostname})' rds, key, f_count, f_time = get_redis_connection(), f'spug:det:{task_id}', f'c_{addr}', f't_{addr}' v_count, v_time = rds.hmget(key, f_count, f_time) if is_ok: if v_count: rds.hdel(key, f_count, f_time) if v_time: logging.warning('send recovery notification') handle_notify(task_id, target, is_ok, message, int(v_count) + 1) return v_count = rds.hincrby(key, f_count) if v_count >= threshold: if not v_time or int(time.time()) - int(v_time) >= quiet * 60: rds.hset(key, f_time, int(time.time())) logging.warning('send fault alarm notification') handle_notify(task_id, target, is_ok, message, v_count) def dispatch(tp, addr, extra): if tp == '1': return site_check(addr, extra) elif tp == '2': return port_check(addr, extra) elif tp == '5': return ping_check(addr) elif tp == '3': command = f'ps -ef|grep -v grep|grep {extra!r}' elif tp == '4': command = extra else: raise TypeError(f'invalid monitor type: {tp!r}') host = Host.objects.filter(pk=addr).first() return host_executor(host, command)