342 lines
14 KiB
Python
Executable File
342 lines
14 KiB
Python
Executable File
import time
|
|
from threading import Thread
|
|
|
|
import psutil
|
|
from prometheus_client import Gauge, Enum, Counter, REGISTRY
|
|
|
|
import app_config
|
|
|
|
ENUM_UP_DN_STATES = ['up', 'dn']
|
|
|
|
def get_metric(name):
|
|
return REGISTRY._names_to_collectors.get(name)
|
|
|
|
def get_gauge_metric(metric_name, descr, labels=None):
|
|
if labels is None:
|
|
labels = []
|
|
metric = get_metric(metric_name)
|
|
if metric is None:
|
|
if labels:
|
|
metric = Gauge(metric_name, descr, labelnames=labels)
|
|
else:
|
|
metric = Gauge(metric_name, descr)
|
|
return metric
|
|
|
|
def get_counter_metric(metric_name, descr, labels=None):
|
|
metric = get_metric(metric_name)
|
|
if metric is None:
|
|
if labels:
|
|
metric = Counter(metric_name, descr, labelnames=labels)
|
|
else:
|
|
metric = Counter(metric_name, descr)
|
|
return metric
|
|
|
|
def get_enum_metric(metric_name, descr, states, labels=None):
|
|
metric = get_metric(metric_name)
|
|
if metric is None:
|
|
if labels:
|
|
metric = Enum(metric_name, descr, states=states, labelnames=labels)
|
|
else:
|
|
metric = Enum(metric_name, descr, states=states)
|
|
return metric
|
|
|
|
def get_time_millis():
|
|
return round(time.time() * 1000)
|
|
|
|
|
|
class AbstractData:
|
|
g_collect: Gauge
|
|
def __init__(self, name, interval, prefix=''):
|
|
self.name = name
|
|
self.interval = interval
|
|
self.instance_prefix = prefix
|
|
self.updated_at = int(time.time())
|
|
self.g_collect = get_gauge_metric('das_collect_time_ms',
|
|
'Total time spent collecting metrics [name] on [server] in milliseconds',
|
|
['server', 'name'])
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
self.g_collect.labels(server=prefix, name=name)
|
|
|
|
def set_update_time(self):
|
|
self.updated_at = int(time.time())
|
|
|
|
def is_need_to_update(self):
|
|
return self.updated_at + self.interval <= int(time.time())
|
|
|
|
def set_collect_time(self, value=0):
|
|
self.g_collect.labels(server=self.instance_prefix, name=self.name).set(value)
|
|
|
|
def print_trigger_info(self):
|
|
if app_config.IS_PRINT_INFO:
|
|
print(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} [INFO]: Touch "{self.name}"')
|
|
|
|
|
|
class DiskData(AbstractData):
|
|
g_all: Gauge
|
|
def __init__(self, mount_point='/', total=0, used=0, free=0, interval=60, name='', prefix=''):
|
|
super().__init__(name, interval, prefix)
|
|
self.mount_point = mount_point
|
|
self.total = total
|
|
self.used = used
|
|
self.free = free
|
|
self.g_all = get_gauge_metric('das_disk_bytes',
|
|
'Bytes [total, used, free] on [mount_point] for [server]',
|
|
['name', 'mount', 'server', 'metric'])
|
|
self.g_all.labels(name=name, mount=mount_point, server=self.instance_prefix, metric='total')
|
|
self.g_all.labels(name=name, mount=mount_point, server=self.instance_prefix, metric='used')
|
|
self.g_all.labels(name=name, mount=mount_point, server=self.instance_prefix, metric='free')
|
|
self.set_data(total, used, free)
|
|
|
|
def set_data(self, total, used, free):
|
|
time_ms = get_time_millis()
|
|
self.g_all.labels(name=self.name, mount=self.mount_point, server=self.instance_prefix, metric='total').set(total)
|
|
self.g_all.labels(name=self.name, mount=self.mount_point, server=self.instance_prefix, metric='used').set(used)
|
|
self.g_all.labels(name=self.name, mount=self.mount_point, server=self.instance_prefix, metric='free').set(free)
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class HealthData(AbstractData):
|
|
e_state: Enum
|
|
def __init__(self, name, url, interval, timeout, is_up=False, method='GET', user=None, password=None, headers=None, prefix=''):
|
|
super().__init__(name, interval, prefix)
|
|
if headers is None:
|
|
headers = {}
|
|
self.url = url
|
|
self.timeout = timeout
|
|
self.is_up = is_up
|
|
self.method = method.upper()
|
|
self.user = user
|
|
self.password = password
|
|
self.headers = headers
|
|
self.e_state = get_enum_metric('das_service_health',
|
|
'Service [name, url, method, server] health',
|
|
ENUM_UP_DN_STATES,['name', 'url', 'method', 'server'])
|
|
self.e_state.labels(name=name, url=url, method=method, server=self.instance_prefix)
|
|
self.set_data(is_up)
|
|
|
|
def set_data(self, is_up):
|
|
time_ms = get_time_millis()
|
|
self.is_up = is_up
|
|
self.e_state.labels(name=self.name, url=self.url, method=self.method, server=self.instance_prefix).state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class RestValueData(AbstractData):
|
|
g_value: Gauge
|
|
def __init__(self, name, url, interval, timeout, value=None, method='GET', user=None, password=None, headers=None, prefix='',
|
|
result_type='single', result_path=''):
|
|
super().__init__(name, interval, prefix)
|
|
if headers is None:
|
|
headers = {}
|
|
self.url = url
|
|
self.timeout = timeout
|
|
self.method = method.upper()
|
|
self.user = user
|
|
self.password = password
|
|
self.headers = headers
|
|
self.value = value
|
|
self.type = result_type
|
|
self.path = result_path
|
|
self.g_value = get_gauge_metric('das_rest_value',
|
|
'Remote REST API [name, url, method, server] Value',
|
|
['name', 'url', 'method', 'server'])
|
|
self.g_value.labels(name=name, url=url, method=method, server=self.instance_prefix)
|
|
self.set_data(value)
|
|
|
|
def set_data(self, value):
|
|
time_ms = get_time_millis()
|
|
self.value = value
|
|
try:
|
|
self.g_value.labels(name=self.name, url=self.url, method=self.method, server=self.instance_prefix).set(int(value))
|
|
except:
|
|
self.g_value.labels(name=self.name, url=self.url, method=self.method, server=self.instance_prefix).set(0)
|
|
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class ShellValueData(AbstractData):
|
|
g_value: Gauge
|
|
def __init__(self, name, interval, command, value=None, args=None, prefix=''):
|
|
super().__init__(name, interval, prefix)
|
|
if args is None:
|
|
args = {}
|
|
self.command = command
|
|
self.value = value
|
|
self.args = args
|
|
self.g_value = get_gauge_metric('das_shell_value',
|
|
'Shell [name, command, server] Value ',
|
|
['name', 'command', 'server'])
|
|
self.g_value.labels(name=name, command=command, server=self.instance_prefix)
|
|
self.set_data(value)
|
|
|
|
def set_data(self, value):
|
|
time_ms = get_time_millis()
|
|
self.value = value
|
|
try:
|
|
self.g_value.labels(name=self.name, command=self.command, server=self.instance_prefix).set(int(value))
|
|
except:
|
|
self.g_value.labels(name=self.name, command=self.command, server=self.instance_prefix).set(0)
|
|
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class IcmpData(AbstractData):
|
|
e_state: Enum
|
|
def __init__(self, name, ip, count, interval, is_up=False, prefix=''):
|
|
super().__init__(name, interval, prefix)
|
|
self.ip = ip
|
|
self.count = count
|
|
self.is_up = is_up
|
|
self.e_state = get_enum_metric('das_host_available',
|
|
'Host [name, ip, server] availability',
|
|
ENUM_UP_DN_STATES, ['name', 'ip', 'server'])
|
|
self.e_state.labels(name=name, ip=ip, server=self.instance_prefix)
|
|
self.set_data(is_up)
|
|
|
|
def set_data(self, is_up):
|
|
time_ms = get_time_millis()
|
|
self.is_up = is_up
|
|
self.e_state.labels(name=self.name, ip=self.ip, server=self.instance_prefix).state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class InterfaceData(AbstractData):
|
|
g_all: Counter
|
|
def __init__(self, name, iface, interval, sent, receive, prefix=''):
|
|
super().__init__(name, interval, prefix)
|
|
self.iface = iface
|
|
self.sent = sent
|
|
self.receive = receive
|
|
self.g_all = get_counter_metric('das_net_interface_bytes',
|
|
'Network Interface [name, server, metric=[sent,receive]] bytes',
|
|
['name', 'server', 'metric'])
|
|
self.g_all.labels(name=name, server=self.instance_prefix, metric='sent')
|
|
self.g_all.labels(name=name, server=self.instance_prefix, metric='receive')
|
|
self.set_data(sent, receive)
|
|
|
|
def set_data(self, sent, receive):
|
|
time_ms = get_time_millis()
|
|
sent_delta = sent - self.sent
|
|
recv_delta = receive - self.receive
|
|
self.sent = sent
|
|
self.receive = receive
|
|
self.g_all.labels(name=self.name, server=self.instance_prefix, metric='sent').inc(sent_delta)
|
|
self.g_all.labels(name=self.name, server=self.instance_prefix, metric='receive').inc(recv_delta)
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class UptimeData(AbstractData):
|
|
START_TIME = int(time.time())
|
|
c_uptime: Counter
|
|
def __init__(self, interval, prefix=''):
|
|
super().__init__('uptime', interval, prefix)
|
|
self.uptime = 0
|
|
self.c_uptime = get_counter_metric('das_exporter',
|
|
'Exporter Uptime for [server] in seconds',
|
|
['server'])
|
|
self.c_uptime.labels(server=self.instance_prefix)
|
|
self.set_data()
|
|
|
|
def set_data(self):
|
|
time_ms = get_time_millis()
|
|
uptime = int(time.time()) - self.START_TIME
|
|
self.c_uptime.labels(server=self.instance_prefix).inc(uptime - self.uptime)
|
|
self.uptime = uptime
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
|
|
class SystemData(AbstractData):
|
|
BOOT_TIME = int(psutil.boot_time())
|
|
c_uptime: Counter
|
|
g_cpu: Gauge
|
|
g_memory: Gauge
|
|
g_chassis_temp: Gauge
|
|
g_cpu_temp: Gauge
|
|
def __init__(self, interval, prefix=''):
|
|
super().__init__('system', interval, prefix)
|
|
self.cpu, self.memory, self.uptime, self.ch_temp, self.cpu_temp = 0,0,0,0,0
|
|
self.init_metrics()
|
|
self.set_data()
|
|
|
|
def init_metrics(self):
|
|
self.c_uptime = get_counter_metric('das_uptime_seconds', 'System uptime on [server]', ['server'])
|
|
self.c_uptime.labels(server=self.instance_prefix)
|
|
self.g_cpu = get_gauge_metric('das_cpu_percent', 'CPU used percent on [server]', ['server'])
|
|
self.g_cpu.labels(server=self.instance_prefix)
|
|
self.g_memory = get_gauge_metric('das_memory_percent', 'Memory used percent on [server]', ['server'])
|
|
self.g_memory.labels(server=self.instance_prefix)
|
|
self.g_chassis_temp = get_gauge_metric('das_ChassisTemperature_current', 'Current Chassis Temperature overall on [server]', ['server'])
|
|
self.g_chassis_temp.labels(server=self.instance_prefix)
|
|
self.g_cpu_temp = get_gauge_metric('das_CpuTemperature_current', 'Current CPU Temperature overall on [server]', ['server'])
|
|
self.g_cpu_temp.labels(server=self.instance_prefix)
|
|
|
|
def set_data(self):
|
|
time_ms = get_time_millis()
|
|
uptime = int(time.time()) - self.BOOT_TIME
|
|
self.c_uptime.labels(server=self.instance_prefix).inc(uptime - self.uptime)
|
|
self.uptime = uptime
|
|
self.memory = psutil.virtual_memory().percent
|
|
self.g_memory.labels(server=self.instance_prefix).set(self.memory)
|
|
Thread(target=self.set_cpu_percent()).run()
|
|
|
|
try:
|
|
avg_temp = 0
|
|
temps = psutil.sensors_temperatures()
|
|
if 'coretemp' in temps:
|
|
self.cpu_temp = temps["coretemp"][0].current
|
|
elif 'cpu_thermal' in temps:
|
|
self.cpu_temp = temps["cpu_thermal"][0].current
|
|
else:
|
|
# if no coretemp we try to get an average temperature
|
|
temp, amount = 0, 0
|
|
for i in temps.keys():
|
|
if i != 'acpitz':
|
|
temp += temps.get(i)[0].current
|
|
amount += 1
|
|
self.cpu_temp = temp // amount
|
|
|
|
if 'acpitz' in temps:
|
|
self.ch_temp = temps["acpitz"][0].current
|
|
else:
|
|
self.ch_temp = self.cpu_temp
|
|
|
|
self.g_chassis_temp.labels(server=self.instance_prefix).set(self.ch_temp)
|
|
self.g_cpu_temp.labels(server=self.instance_prefix).set(self.cpu_temp)
|
|
except:
|
|
self.ch_temp = -500
|
|
self.cpu_temp = -500
|
|
self.g_chassis_temp.labels(server=self.instance_prefix).set(self.ch_temp)
|
|
self.g_cpu_temp.labels(server=self.instance_prefix).set(self.cpu_temp)
|
|
|
|
self.set_collect_time(get_time_millis() - time_ms)
|
|
self.set_update_time()
|
|
self.print_trigger_info()
|
|
|
|
def set_cpu_percent(self):
|
|
self.cpu = psutil.cpu_percent(1)
|
|
self.g_cpu.labels(server=self.instance_prefix).set(self.cpu)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pass
|