initial commit

This commit is contained in:
Anry Das
2025-02-16 12:38:35 +02:00
commit 954f607c5a
21 changed files with 1793 additions and 0 deletions

292
metrics/DataStructures.py Executable file
View File

@@ -0,0 +1,292 @@
import time
from threading import Thread
import psutil
from prometheus_client import Gauge, Enum, Counter, REGISTRY
import app_config
ENUM_UP_DN_STATES = ['up', 'dn']
def get_metric(name):
return REGISTRY._names_to_collectors.get(name)
def get_gauge_metric(metric_name, descr):
metric = get_metric(metric_name)
if metric is None:
metric = Gauge(metric_name, descr)
return metric
def get_counter_metric(metric_name, descr):
metric = get_metric(metric_name)
if metric is None:
metric = Counter(metric_name, descr)
return metric
def get_enum_metric(metric_name, descr, states):
metric = get_metric(metric_name)
if metric is None:
metric = Enum(metric_name, descr, states=states)
return metric
class AbstractData:
METRIC_NAME_PREFIX = 'das_'
def __init__(self, name, interval, prefix=''):
self.name = name
self.interval = interval
self.instance_prefix = prefix
self.updated_at = int(time.time())
def set_update_time(self):
self.updated_at = int(time.time())
def is_need_to_update(self):
return self.updated_at + self.interval <= int(time.time())
def get_metric_name(self, metric_text, name):
return (self.METRIC_NAME_PREFIX +
metric_text + '_' +
(self.instance_prefix + '_' if self.instance_prefix else '') +
name)
def print_trigger_info(self):
if app_config.IS_PRINT_INFO:
print(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} [INFO]: Touch "{self.name}"')
class DiskData(AbstractData):
g_total: Gauge
g_used: Gauge
g_free: Gauge
def __init__(self, mount_point='/', total=0, used=0, free=0, interval=60, name='', prefix=''):
super().__init__(name, interval, prefix)
self.mount_point = mount_point
self.total = total
self.used = used
self.free = free
self.g_total = get_gauge_metric(self.get_metric_name('disk_total_bytes', name), 'Total bytes on disk')
self.g_used = get_gauge_metric(self.get_metric_name('disk_used_bytes', name), 'Used bytes on disk')
self.g_free = get_gauge_metric(self.get_metric_name('disk_free_bytes', name), 'Free bytes on disk')
self.set_data(total, used, free)
def set_data(self, total, used, free):
self.g_total.set(total)
self.g_used.set(used)
self.g_free.set(free)
self.set_update_time()
self.print_trigger_info()
class HealthData(AbstractData):
e_state: Enum
def __init__(self, name, url, interval, timeout, is_up=False, method='GET', user=None, password=None, headers=None, prefix=''):
super().__init__(name, interval, prefix)
if headers is None:
headers = {}
self.url = url
self.timeout = timeout
self.is_up = is_up
self.method = method.upper()
self.user = user
self.password = password
self.headers = headers
metric_name = self.get_metric_name('service_health', name)
self.e_state = get_enum_metric(metric_name, 'Service health', ENUM_UP_DN_STATES)
self.set_status(is_up)
def set_status(self, is_up):
self.is_up = is_up
self.e_state.state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
self.set_update_time()
self.print_trigger_info()
class RestValueData(AbstractData):
g_value: Gauge
def __init__(self, name, url, interval, timeout, value=None, method='GET', user=None, password=None, headers=None, prefix='',
result_type='single', result_path=''):
super().__init__(name, interval, prefix)
if headers is None:
headers = {}
self.url = url
self.timeout = timeout
self.method = method.upper()
self.user = user
self.password = password
self.headers = headers
self.value = value
self.type = result_type
self.path = result_path
metric_name = self.get_metric_name('rest_value', name)
self.g_value = get_gauge_metric(metric_name, 'Remote REST API Value ' + name)
self.set_value(value)
def set_value(self, value):
self.value = value
try:
self.g_value.set(int(value))
except:
self.g_value.set(0)
self.set_update_time()
self.print_trigger_info()
class ShellValueData(AbstractData):
g_value: Gauge
def __init__(self, name, interval, command, value=None, args=None, prefix=''):
super().__init__(name, interval, prefix)
if args is None:
args = {}
self.command = command
self.value = value
self.args = args
metric_name = self.get_metric_name('shell_value', name)
self.g_value = get_gauge_metric(metric_name, 'Shell Value ' + name)
self.set_value(value)
def set_value(self, value):
self.value = value
try:
self.g_value.set(int(value))
except:
self.g_value.set(0)
self.set_update_time()
self.print_trigger_info()
class IcmpData(AbstractData):
e_state: Enum
def __init__(self, name, ip, count, interval, is_up=False, prefix=''):
super().__init__(name, interval, prefix)
self.ip = ip
self.count = count
self.is_up = is_up
metric_name = self.get_metric_name('host_available', name)
self.e_state = get_enum_metric(metric_name, 'Host availability', ENUM_UP_DN_STATES)
self.set_status(is_up)
def set_status(self, is_up):
self.is_up = is_up
self.e_state.state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
self.set_update_time()
self.print_trigger_info()
class InterfaceData(AbstractData):
g_sent: Counter
g_receive: Counter
def __init__(self, name, iface, interval, sent, receive, prefix=''):
super().__init__(name, interval, prefix)
self.iface = iface
self.sent = sent
self.receive = receive
sent_metric_name = self.get_metric_name('net_interface_sent_bytes', name)
self.g_sent = get_counter_metric(sent_metric_name, 'Network Interface bytes sent')
receive_metric_name = self.get_metric_name('net_interface_receive_bytes', name)
self.g_receive = get_counter_metric(receive_metric_name, 'Network Interface bytes receive')
self.set_data(sent, receive)
def set_data(self, sent, receive):
sent_delta = sent - self.sent
recv_delta = receive - self.receive
self.sent = sent
self.receive = receive
self.g_sent.inc(sent_delta)
self.g_receive.inc(recv_delta)
self.set_update_time()
self.print_trigger_info()
class UptimeData(AbstractData):
START_TIME = int(time.time())
c_uptime: Counter
def __init__(self, interval, prefix=''):
super().__init__('uptime', interval, prefix)
self.uptime = 0
metric_name = self.get_metric_name('exporter', self.name)
self.c_uptime = get_counter_metric(metric_name, 'Exporter Uptime in seconds')
self.set_data()
def set_data(self):
uptime = int(time.time()) - self.START_TIME
self.c_uptime.inc(uptime - self.uptime)
self.uptime = uptime
self.set_update_time()
self.print_trigger_info()
class SystemData(AbstractData):
BOOT_TIME = int(psutil.boot_time())
c_uptime: Counter
g_cpu: Gauge
g_memory: Gauge
g_chassis_temp: Gauge
g_cpu_temp: Gauge
def __init__(self, interval, prefix=''):
super().__init__('system', interval, prefix)
self.cpu, self.memory, self.uptime, self.ch_temp, self.cpu_temp = 0,0,0,0,0
self.init_metrics()
self.set_data()
def init_metrics(self):
uptime_metric_name = self.get_metric_name(self.name, 'uptime_seconds')
self.c_uptime = get_counter_metric(uptime_metric_name, 'System uptime')
cpu_metric_name = self.get_metric_name(self.name, 'cpu_percent')
self.g_cpu = get_gauge_metric(cpu_metric_name, 'CPU used percent')
mem_metric_name = self.get_metric_name(self.name, 'memory_percent')
self.g_memory = get_gauge_metric(mem_metric_name, 'Memory used percent')
chassis_temp_metric_name = self.get_metric_name(self.name, 'ChassisTemperature_current')
self.g_chassis_temp = get_gauge_metric(chassis_temp_metric_name, 'Current Chassis Temperature overall')
cpu_temp_metric_name = self.get_metric_name(self.name, 'CpuTemperature_current')
self.g_cpu_temp = get_gauge_metric(cpu_temp_metric_name, 'Current CPU Temperature overall')
def set_data(self):
uptime = int(time.time()) - self.BOOT_TIME
self.c_uptime.inc(uptime - self.uptime)
self.uptime = uptime
self.memory = psutil.virtual_memory().percent
self.g_memory.set(self.memory)
Thread(target=self.set_cpu_percent()).run()
try:
avg_temp = 0
temps = psutil.sensors_temperatures()
if 'coretemp' in temps:
self.cpu_temp = temps["coretemp"][0].current
elif 'cpu_thermal' in temps:
self.cpu_temp = temps["cpu_thermal"][0].current
else:
# if no coretemp we try to get an average temperature
temp, amount = 0, 0
for i in temps.keys():
if i != 'acpitz':
temp += temps.get(i)[0].current
amount += 1
self.cpu_temp = temp // amount
if 'acpitz' in temps:
self.ch_temp = temps["acpitz"][0].current
else:
self.ch_temp = self.cpu_temp
self.g_chassis_temp.set(self.ch_temp)
self.g_cpu_temp.set(self.cpu_temp)
except:
self.ch_temp = -500
self.cpu_temp = -500
self.g_chassis_temp.set(self.ch_temp)
self.g_cpu_temp.set(self.cpu_temp)
self.set_update_time()
self.print_trigger_info()
def set_cpu_percent(self):
self.cpu = psutil.cpu_percent(1)
self.g_cpu.set(self.cpu)
if __name__ == '__main__':
pass

300
metrics/MetricClasses.py Executable file
View File

@@ -0,0 +1,300 @@
import json
import shutil
from abc import abstractmethod
import time
import platform
import subprocess
import requests
import psutil
import app_config
from threading import Thread
from metrics.DataStructures import DiskData, HealthData, IcmpData, ENUM_UP_DN_STATES, InterfaceData, UptimeData, \
SystemData, RestValueData, ShellValueData
class AbstractMetric:
metric_key = ""
config = {}
def __init__(self, key, config):
self.metric_key = key
if key and key in config:
self.config = config[key]
self.data_array = []
@abstractmethod
def proceed_metric(self):
pass
@abstractmethod
def print_debug_info(self):
pass
def is_health_check(url, timeout, method, user, pwd, headers, callback=None):
session = requests.Session()
if user and pwd:
session.auth = (user, pwd)
try:
response = session.request(
url=url,
timeout=timeout,
method=method,
headers=headers
)
result = response.status_code == 200
if callback is not None:
callback(result)
else:
return result
except (requests.ConnectTimeout, requests.exceptions.ConnectionError) as e:
return False
def get_rest_value(url, timeout, method, user, pwd, headers, callback=None, result_type='single', path=''):
session = requests.Session()
if user and pwd:
session.auth = (user, pwd)
try:
response = session.request(
url=url,
timeout=timeout,
method=method,
headers=headers
)
resp = json.loads(response.content.decode().replace("'", '"'))
result = parse_response(resp, path)
if not result.isalnum():
result = 0
if callback is not None:
callback(result)
else:
return result
except (requests.ConnectTimeout, requests.exceptions.ConnectionError) as e:
return 0
def parse_response(resp, path):
if app_config.RESPONSE_PATH_SEPARATOR in path:
r = None
for s in path.split(app_config.RESPONSE_PATH_SEPARATOR):
if r is None:
if s in resp:
r = resp[s]
else:
return ''
else:
if s in r:
r = r[s]
else:
return ''
return r
else:
if path in resp:
return resp[path]
else:
return ''
def get_shell_value(command, args, callback=None):
cmd = [command, ' '.join(str(s) for s in args)]
try:
output = subprocess.check_output(cmd)
if output.isalnum():
result = int(output)
else:
result = 0
except:
result = 0
if callback is not None:
callback(result)
else:
return result
def is_ping(ip, count, callback=None):
param = '-n' if platform.system().lower() == 'windows' else '-c'
command = ['ping', param, str(count), ip]
output = subprocess.check_output(command)
result = 'unreachable' not in str(output) and 'could not find' not in str(output) and 'time out' not in str(output)
if callback is not None:
callback(result)
else:
return result
def get_net_iface_stat(name):
return psutil.net_io_counters(pernic=True).get(name)
def get_next_update_time(d):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(d.updated_at + d.interval))
class DiskMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('disk', config)
for d in self.config:
mount_point, interval, name = d['path'], d['interval'], d['name']
total, used, free = shutil.disk_usage(mount_point)
self.data_array.append(DiskData(mount_point, total, used, free, interval, name, prefix))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
mount_point = d.mount_point
d.total, d.used, d.free = shutil.disk_usage(mount_point)
d.set_data(d.total, d.used, d.free)
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) {d.mount_point}: total={d.total // (2 ** 30)} Gb, used={d.used // (2 ** 30)} Gb, free={d.free // (2 ** 30)} Gb')
class HealthMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('health', config)
for d in self.config:
name, url, interval, timeout, method = d['name'], d['url'], d['interval'], d['timeout'], d['method']
if 'auth' in self.config:
user = d['auth']['user']
pwd = d['auth']['pass']
else:
user = ''
pwd = ''
if 'headers' in self.config:
headers = d['headers']
else:
headers = ''
result = is_health_check(url, timeout, method, user, pwd, headers)
self.data_array.append(HealthData(name, url, interval, timeout, result, method, user, pwd, headers, prefix))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
thread = Thread(target=is_health_check, args=(d.url, d.timeout, d.method, d.user, d.password, d.headers, d.set_status))
thread.start()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) {d.url}: {ENUM_UP_DN_STATES[0].upper() if d.is_up else ENUM_UP_DN_STATES[1].upper()}')
class IcmpMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('ping', config)
for d in self.config:
name, ip, count, interval = d['name'], d['ip'], d['count'], d['interval']
result = is_ping(ip, count)
self.data_array.append(IcmpData(name, ip, count, interval, result, prefix))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
thread = Thread(target=is_ping, args=(d.ip, d.count, d.set_status))
thread.start()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) {d.ip}: {"UP" if d.is_up else "DN"}')
class InterfaceMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('iface', config)
for d in self.config:
name, iface, interval = d['name'], d['iface'], d['interval']
result = get_net_iface_stat(iface)
self.data_array.append(InterfaceData(name, iface, interval, result.bytes_sent, result.bytes_recv, prefix))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
result = get_net_iface_stat(d.iface)
d.set_data(result.bytes_sent, result.bytes_recv)
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) {d.iface}: sent={d.sent}, receive={d.receive}')
class RestValueMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('rest_value', config)
for d in self.config:
name, url, interval, timeout, method = d['name'], d['url'], d['interval'], d['timeout'], d['method']
if 'auth' in self.config:
user = d['auth']['user']
pwd = d['auth']['pass']
else:
user = ''
pwd = ''
if 'headers' in self.config:
headers = d['headers']
else:
headers = ''
result_type, result_path = d['result_type'], d['result_path']
result = get_rest_value(url=url, timeout=timeout, method=method, user=user, pwd=pwd, headers=headers,
result_type=result_type, path=result_path)
self.data_array.append(RestValueData(name, url, interval, timeout, result, method, user, pwd, headers, prefix, result_type, result_path))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
thread = Thread(target=get_rest_value, args=(d.url, d.timeout, d.method, d.user, d.password, d.headers,
d.set_value, d.type, d.path))
thread.start()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) on {d.url}: by {d.method} in {d.path} got value="{d.value}"')
class ShellValueMetric(AbstractMetric):
def __init__(self, config, prefix=''):
super().__init__('shell_value', config)
for d in self.config:
name, command, interval, args = d['name'], d['command'], d['interval'], d['args']
result = get_shell_value(command, args)
self.data_array.append(ShellValueData(name, interval, command, result, args, prefix))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
thread = Thread(target=get_shell_value, args=(d.command, d.args, d.set_value))
thread.start()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) on local shell: by command {d.command} with args="{d.args}" got value="{d.value}"')
class UptimeMetric(AbstractMetric):
def __init__(self, interval):
super().__init__(None, {})
self.data_array.append(UptimeData(interval))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
d.set_data()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) Uptime: {d.uptime}')
class SystemMetric(AbstractMetric):
def __init__(self, interval):
super().__init__(None, {})
self.data_array.append(SystemData(interval, app_config.INSTANCE_PREFIX))
def proceed_metric(self):
for d in self.data_array:
if d.is_need_to_update():
d.set_data()
def print_debug_info(self):
for d in self.data_array:
print(f'[DEBUG] (next update at {get_next_update_time(d)}) CPU: {d.cpu}% Mem: {d.memory}% Uptime: {d.uptime}s '
f'CPU temperature: {d.cpu_temp} Chassis temperature: {d.ch_temp}')
if __name__ == '__main__':
pass

0
metrics/__init__.py Executable file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.