Compare commits

...

10 Commits

Author SHA1 Message Date
d828afdb53 Merge pull request 'Collect time procedure improved' (#7) from dev into main
Reviewed-on: #7
2025-05-31 08:35:31 +03:00
Anry Das
11d497b4e3 Collect time procedure improved 2025-05-31 08:31:41 +03:00
5fdca2c30d Merge pull request 'Some improvements' (#6) from dev into main
Improved Metric Classes
Reviewed-on: #6
2025-04-19 16:05:37 +03:00
Anry Das
631fdd9389 Some improvements 2025-04-19 16:04:10 +03:00
902d60bbea Merge pull request 'Refactored Temperature - created single metric with labels' (#5) from dev into main
Reviewed-on: #5
2025-04-06 11:54:48 +03:00
Anry Das
efc028c511 Refactored Temperature - created single metric with labels 2025-04-06 11:52:17 +03:00
0e24bc09a0 Merge pull request 'Version 2.0' (#4) from dev into main
Reviewed-on: #4
v.2.0:
Refactored all metrics
Added labels
Added self measured metrics
Updated README.md
2025-04-06 09:58:34 +03:00
das
7d8afee839 Merge pull request 'dev to main' (#3) from dev into main
Reviewed-on: #3
2025-02-23 10:44:25 +02:00
das
78b7ea7146 Merge pull request 'Dev to Main: corrections' (#2) from dev into main
Reviewed-on: #2
Removed unnecessary folder
Corrected ICMP function
Tested YAML files processing
Updated .gitignore file
2025-02-18 10:25:39 +02:00
das
f24181ff79 Merge pull request 'Dev to Main' (#1) from dev into main
Reviewed-on: #1
2025-02-16 15:16:23 +02:00
5 changed files with 69 additions and 46 deletions

View File

@@ -203,13 +203,12 @@ From version 2.0 there are following metric names used
- `das_rest_value` - Remote REST API Value; Labels **name, url, method, server**
- `das_shell_value` - Shell Value; Labels: **name, command, server**
- `das_host_available` - Host availability; Labels **name, ip, server**
- `das_net_interface_bytes` - Network Interface bytes; Labels: **name, server, metric**=(sent|receive)
- `das_net_interface_bytes` - Network Interface bytes; Labels: **name, server, metric=(sent|receive)**
- `das_exporter` - Exporter Uptime for **server** in seconds
- `das_uptime_seconds` - System uptime on **server**
- `das_cpu_percent` - CPU used percent on **server**
- `das_memory_percent` - Memory used percent on **server**
- `das_ChassisTemperature_current` - Current Chassis Temperature overall on **server**
- `das_CpuTemperature_current` - Current CPU Temperature overall on **server**
- `das_temperature` - Temperature overall; Labels **server**, **metric=(CPU|Chassis)**;
**Note:** there are no doubles in metrics names supported by Prometheus. If so the exception occurs ant the application will be stopped.
### 🚀 Launching the application

View File

@@ -1,6 +1,6 @@
import os
APP_VERSION="2.0"
APP_VERSION="2.3"
SCRIPT_PATH = os.path.dirname(__file__)
CONFIGS_DIR = SCRIPT_PATH + "/configs"
CONFIG_FILE_NAME = CONFIGS_DIR + "/config.json"

12
main.py
View File

@@ -44,12 +44,12 @@ def parse_config(cfg):
def init_metric_entities(data):
return {
M.DiskMetric(data, app_config.INSTANCE_PREFIX),
M.HealthMetric(data, app_config.INSTANCE_PREFIX),
M.IcmpMetric(data, app_config.INSTANCE_PREFIX),
M.InterfaceMetric(data, app_config.INSTANCE_PREFIX),
M.RestValueMetric(data, app_config.INSTANCE_PREFIX),
M.ShellValueMetric(data, app_config.INSTANCE_PREFIX),
M.DiskMetric(data),
M.HealthMetric(data),
M.IcmpMetric(data),
M.InterfaceMetric(data),
M.RestValueMetric(data),
M.ShellValueMetric(data),
M.UptimeMetric(app_config.UPTIME_UPDATE_SECONDS),
M.SystemMetric(app_config.SYSTEM_UPDATE_SECONDS)
}

View File

@@ -122,13 +122,16 @@ class HealthData(AbstractData):
self.e_state.labels(name=name, url=url, method=method, server=self.instance_prefix)
self.set_data(is_up)
def set_data(self, is_up):
def set_data(self, is_up, working_time = None):
time_ms = get_time_millis()
self.is_up = is_up
self.e_state.labels(name=self.name, url=self.url, method=self.method, server=self.instance_prefix).state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
self.set_collect_time(get_time_millis() - time_ms)
self.set_update_time()
self.print_trigger_info()
if working_time:
self.set_collect_time(working_time)
else:
self.set_collect_time(get_time_millis() - time_ms)
class RestValueData(AbstractData):
@@ -153,7 +156,7 @@ class RestValueData(AbstractData):
self.g_value.labels(name=name, url=url, method=method, server=self.instance_prefix)
self.set_data(value)
def set_data(self, value):
def set_data(self, value, working_time = None):
time_ms = get_time_millis()
self.value = value
try:
@@ -161,9 +164,12 @@ class RestValueData(AbstractData):
except:
self.g_value.labels(name=self.name, url=self.url, method=self.method, server=self.instance_prefix).set(0)
self.set_collect_time(get_time_millis() - time_ms)
self.set_update_time()
self.print_trigger_info()
if working_time:
self.set_collect_time(working_time)
else:
self.set_collect_time(get_time_millis() - time_ms)
class ShellValueData(AbstractData):
@@ -181,7 +187,7 @@ class ShellValueData(AbstractData):
self.g_value.labels(name=name, command=command, server=self.instance_prefix)
self.set_data(value)
def set_data(self, value):
def set_data(self, value, working_time = None):
time_ms = get_time_millis()
self.value = value
try:
@@ -189,9 +195,12 @@ class ShellValueData(AbstractData):
except:
self.g_value.labels(name=self.name, command=self.command, server=self.instance_prefix).set(0)
self.set_collect_time(get_time_millis() - time_ms)
self.set_update_time()
self.print_trigger_info()
if working_time:
self.set_collect_time(working_time)
else:
self.set_collect_time(get_time_millis() - time_ms)
class IcmpData(AbstractData):
@@ -207,13 +216,16 @@ class IcmpData(AbstractData):
self.e_state.labels(name=name, ip=ip, server=self.instance_prefix)
self.set_data(is_up)
def set_data(self, is_up):
def set_data(self, is_up, working_time = None):
time_ms = get_time_millis()
self.is_up = is_up
self.e_state.labels(name=self.name, ip=self.ip, server=self.instance_prefix).state(ENUM_UP_DN_STATES[0] if is_up else ENUM_UP_DN_STATES[1])
self.set_collect_time(get_time_millis() - time_ms)
self.set_update_time()
self.print_trigger_info()
if working_time:
self.set_collect_time(working_time)
else:
self.set_collect_time(get_time_millis() - time_ms)
class InterfaceData(AbstractData):
@@ -249,7 +261,7 @@ class UptimeData(AbstractData):
def __init__(self, interval, prefix=''):
super().__init__('uptime', interval, prefix)
self.uptime = 0
self.c_uptime = get_counter_metric('das_exporter',
self.c_uptime = get_counter_metric('das_exporter_uptime',
'Exporter Uptime for [server] in seconds',
['server'])
self.c_uptime.labels(server=self.instance_prefix)
@@ -270,7 +282,7 @@ class SystemData(AbstractData):
c_uptime: Counter
g_cpu: Gauge
g_memory: Gauge
g_chassis_temp: Gauge
g_tempr: Gauge
g_cpu_temp: Gauge
def __init__(self, interval, prefix=''):
super().__init__('system', interval, prefix)
@@ -285,10 +297,9 @@ class SystemData(AbstractData):
self.g_cpu.labels(server=self.instance_prefix)
self.g_memory = get_gauge_metric('das_memory_percent', 'Memory used percent on [server]', ['server'])
self.g_memory.labels(server=self.instance_prefix)
self.g_chassis_temp = get_gauge_metric('das_ChassisTemperature_current', 'Current Chassis Temperature overall on [server]', ['server'])
self.g_chassis_temp.labels(server=self.instance_prefix)
self.g_cpu_temp = get_gauge_metric('das_CpuTemperature_current', 'Current CPU Temperature overall on [server]', ['server'])
self.g_cpu_temp.labels(server=self.instance_prefix)
self.g_tempr = get_gauge_metric('das_temperature', 'Temperature of [type] overall on [server]', ['metric', 'server'])
self.g_tempr.labels(server=self.instance_prefix, metric='CPU')
self.g_tempr.labels(server=self.instance_prefix, metric='Chassis')
def set_data(self):
time_ms = get_time_millis()
@@ -320,13 +331,13 @@ class SystemData(AbstractData):
else:
self.ch_temp = self.cpu_temp
self.g_chassis_temp.labels(server=self.instance_prefix).set(self.ch_temp)
self.g_cpu_temp.labels(server=self.instance_prefix).set(self.cpu_temp)
self.g_tempr.labels(server=self.instance_prefix, metric='Chassis').set(self.ch_temp)
self.g_tempr.labels(server=self.instance_prefix, metric='CPU').set(self.cpu_temp)
except:
self.ch_temp = -500
self.cpu_temp = -500
self.g_chassis_temp.labels(server=self.instance_prefix).set(self.ch_temp)
self.g_cpu_temp.labels(server=self.instance_prefix).set(self.cpu_temp)
self.g_tempr.labels(server=self.instance_prefix, metric='Chassis').set(self.ch_temp)
self.g_tempr.labels(server=self.instance_prefix, metric='CPU').set(self.cpu_temp)
self.set_collect_time(get_time_millis() - time_ms)
self.set_update_time()

View File

@@ -18,8 +18,10 @@ from metrics.DataStructures import DiskData, HealthData, IcmpData, ENUM_UP_DN_ST
class AbstractMetric:
metric_key = ""
config = {}
prefix = ""
def __init__(self, key, config):
self.metric_key = key
self.prefix = app_config.INSTANCE_PREFIX
if key and key in config:
self.config = config[key]
self.data_array = []
@@ -34,6 +36,7 @@ class AbstractMetric:
def is_health_check(url, timeout, method, user, pwd, headers, callback=None):
time_ms = get_time_millis()
session = requests.Session()
if user and pwd:
session.auth = (user, pwd)
@@ -46,13 +49,15 @@ def is_health_check(url, timeout, method, user, pwd, headers, callback=None):
)
result = response.status_code == 200
if callback is not None:
callback(result)
working_time = get_time_millis() - time_ms
callback(result, working_time)
else:
return result
except (requests.ConnectTimeout, requests.exceptions.ConnectionError) as e:
return False
def get_rest_value(url, timeout, method, user, pwd, headers, callback=None, result_type='single', path=''):
time_ms = get_time_millis()
session = requests.Session()
if user and pwd:
session.auth = (user, pwd)
@@ -68,7 +73,8 @@ def get_rest_value(url, timeout, method, user, pwd, headers, callback=None, resu
if not result.isalnum():
result = 0
if callback is not None:
callback(result)
working_time = get_time_millis() - time_ms
callback(result, working_time)
else:
return result
except (requests.ConnectTimeout, requests.exceptions.ConnectionError) as e:
@@ -96,6 +102,7 @@ def parse_response(resp, path):
return ''
def get_shell_value(command, args, callback=None):
time_ms = get_time_millis()
cmd = [command, ' '.join(str(s) for s in args)]
try:
output = subprocess.check_output(cmd)
@@ -107,11 +114,13 @@ def get_shell_value(command, args, callback=None):
result = 0
if callback is not None:
callback(result)
working_time = get_time_millis() - time_ms
callback(result, working_time)
else:
return result
def is_ping(ip, count, callback=None):
time_ms = get_time_millis()
param = '-n' if platform.system().lower() == 'windows' else '-c'
command = ['ping', param, str(count), ip]
try:
@@ -121,8 +130,10 @@ def is_ping(ip, count, callback=None):
'time out'.upper() not in str(output).upper())
except:
result = False
if callback is not None:
callback(result)
working_time = get_time_millis() - time_ms
callback(result, working_time)
else:
return result
@@ -132,14 +143,16 @@ def get_net_iface_stat(name):
def get_next_update_time(d):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(d.updated_at + d.interval))
def get_time_millis():
return round(time.time() * 1000)
class DiskMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('disk', config)
for d in self.config:
mount_point, interval, name = d['path'], d['interval'], d['name']
total, used, free = shutil.disk_usage(mount_point)
self.data_array.append(DiskData(mount_point, total, used, free, interval, name, prefix))
self.data_array.append(DiskData(mount_point, total, used, free, interval, name, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -154,7 +167,7 @@ class DiskMetric(AbstractMetric):
class HealthMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('health', config)
for d in self.config:
name, url, interval, timeout, method = d['name'], d['url'], d['interval'], d['timeout'], d['method']
@@ -169,7 +182,7 @@ class HealthMetric(AbstractMetric):
else:
headers = ''
result = is_health_check(url, timeout, method, user, pwd, headers)
self.data_array.append(HealthData(name, url, interval, timeout, result, method, user, pwd, headers, prefix))
self.data_array.append(HealthData(name, url, interval, timeout, result, method, user, pwd, headers, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -183,12 +196,12 @@ class HealthMetric(AbstractMetric):
class IcmpMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('ping', config)
for d in self.config:
name, ip, count, interval = d['name'], d['ip'], d['count'], d['interval']
result = is_ping(ip, count)
self.data_array.append(IcmpData(name, ip, count, interval, result, prefix))
self.data_array.append(IcmpData(name, ip, count, interval, result, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -202,12 +215,12 @@ class IcmpMetric(AbstractMetric):
class InterfaceMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('iface', config)
for d in self.config:
name, iface, interval = d['name'], d['iface'], d['interval']
result = get_net_iface_stat(iface)
self.data_array.append(InterfaceData(name, iface, interval, result.bytes_sent, result.bytes_recv, prefix))
self.data_array.append(InterfaceData(name, iface, interval, result.bytes_sent, result.bytes_recv, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -221,7 +234,7 @@ class InterfaceMetric(AbstractMetric):
class RestValueMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('rest_value', config)
for d in self.config:
name, url, interval, timeout, method = d['name'], d['url'], d['interval'], d['timeout'], d['method']
@@ -238,7 +251,7 @@ class RestValueMetric(AbstractMetric):
result_type, result_path = d['result_type'], d['result_path']
result = get_rest_value(url=url, timeout=timeout, method=method, user=user, pwd=pwd, headers=headers,
result_type=result_type, path=result_path)
self.data_array.append(RestValueData(name, url, interval, timeout, result, method, user, pwd, headers, prefix, result_type, result_path))
self.data_array.append(RestValueData(name, url, interval, timeout, result, method, user, pwd, headers, self.prefix, result_type, result_path))
def proceed_metric(self):
for d in self.data_array:
@@ -253,12 +266,12 @@ class RestValueMetric(AbstractMetric):
class ShellValueMetric(AbstractMetric):
def __init__(self, config, prefix=''):
def __init__(self, config):
super().__init__('shell_value', config)
for d in self.config:
name, command, interval, args = d['name'], d['command'], d['interval'], d['args']
result = get_shell_value(command, args)
self.data_array.append(ShellValueData(name, interval, command, result, args, prefix))
self.data_array.append(ShellValueData(name, interval, command, result, args, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -274,7 +287,7 @@ class ShellValueMetric(AbstractMetric):
class UptimeMetric(AbstractMetric):
def __init__(self, interval):
super().__init__(None, {})
self.data_array.append(UptimeData(interval))
self.data_array.append(UptimeData(interval, self.prefix))
def proceed_metric(self):
for d in self.data_array:
@@ -289,7 +302,7 @@ class UptimeMetric(AbstractMetric):
class SystemMetric(AbstractMetric):
def __init__(self, interval):
super().__init__(None, {})
self.data_array.append(SystemData(interval, app_config.INSTANCE_PREFIX))
self.data_array.append(SystemData(interval, self.prefix))
def proceed_metric(self):
for d in self.data_array: