executors.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. # Copyright: (c) OpenSpug Organization. https://github.com/openspug/spug
  2. # Copyright: (c) <spug.dev@gmail.com>
  3. # Released under the AGPL-3.0 License.
  4. from django.db import close_old_connections
  5. from django_redis import get_redis_connection
  6. from apps.host.models import Host
  7. from apps.monitor.utils import handle_notify
  8. from socket import socket
  9. import subprocess
  10. import platform
  11. import requests
  12. import logging
  13. import json
  14. import time
  15. logging.captureWarnings(True)
  16. def site_check(url, limit):
  17. try:
  18. res = requests.get(url, timeout=10)
  19. if limit:
  20. duration = int(res.elapsed.total_seconds() * 1000)
  21. if duration > int(limit):
  22. return False, f'响应时间:{duration}ms'
  23. return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}'
  24. except Exception as e:
  25. return False, f'异常信息:{e}'
  26. def port_check(addr, port):
  27. try:
  28. sock = socket()
  29. sock.settimeout(5)
  30. sock.connect((addr, int(port)))
  31. sock.close()
  32. return True, '端口状态检测正常'
  33. except Exception as e:
  34. return False, f'异常信息:{e}'
  35. def ping_check(addr):
  36. try:
  37. if platform.system().lower() == 'windows':
  38. command = f'ping -n 1 -w 3000 {addr}'
  39. else:
  40. command = f'ping -c 1 -W 3 {addr}'
  41. task = subprocess.run(command, shell=True, stdout=subprocess.PIPE)
  42. if task.returncode == 0:
  43. return True, 'Ping检测正常'
  44. else:
  45. return False, 'Ping检测失败'
  46. except Exception as e:
  47. return False, f'异常信息:{e}'
  48. def host_executor(host, command):
  49. try:
  50. with host.get_ssh() as ssh:
  51. exit_code, out = ssh.exec_command_raw(command)
  52. if exit_code == 0:
  53. return True, out or '检测状态正常'
  54. else:
  55. return False, out or f'退出状态码:{exit_code}'
  56. except Exception as e:
  57. return False, f'异常信息:{e}'
  58. def monitor_worker_handler(job):
  59. task_id, tp, addr, extra, threshold, quiet = json.loads(job)
  60. target = addr
  61. if tp == '1':
  62. is_ok, message = site_check(addr, extra)
  63. elif tp == '2':
  64. is_ok, message = port_check(addr, extra)
  65. elif tp == '5':
  66. is_ok, message = ping_check(addr)
  67. elif tp not in ('3', '4'):
  68. is_ok, message = False, f'invalid monitor type for {tp!r}'
  69. else:
  70. close_old_connections()
  71. command = f'ps -ef|grep -v grep|grep {extra!r}' if tp == '3' else extra
  72. host = Host.objects.filter(pk=addr).first()
  73. if not host:
  74. is_ok, message = False, f'unknown host id for {addr!r}'
  75. else:
  76. is_ok, message = host_executor(host, command)
  77. target = f'{host.name}({host.hostname})'
  78. rds, key, f_count, f_time = get_redis_connection(), f'spug:det:{task_id}', f'c_{addr}', f't_{addr}'
  79. v_count, v_time = rds.hmget(key, f_count, f_time)
  80. if is_ok:
  81. if v_count:
  82. rds.hdel(key, f_count, f_time)
  83. if v_time:
  84. logging.warning('send recovery notification')
  85. handle_notify(task_id, target, is_ok, message, int(v_count) + 1)
  86. return
  87. v_count = rds.hincrby(key, f_count)
  88. if v_count >= threshold:
  89. if not v_time or int(time.time()) - int(v_time) >= quiet * 60:
  90. rds.hset(key, f_time, int(time.time()))
  91. logging.warning('send fault alarm notification')
  92. handle_notify(task_id, target, is_ok, message, v_count)
  93. def dispatch(tp, addr, extra):
  94. if tp == '1':
  95. return site_check(addr, extra)
  96. elif tp == '2':
  97. return port_check(addr, extra)
  98. elif tp == '5':
  99. return ping_check(addr)
  100. elif tp == '3':
  101. command = f'ps -ef|grep -v grep|grep {extra!r}'
  102. elif tp == '4':
  103. command = extra
  104. else:
  105. raise TypeError(f'invalid monitor type: {tp!r}')
  106. host = Host.objects.filter(pk=addr).first()
  107. return host_executor(host, command)