...
 
Commits (11)
# Collector for X-tee subsystems and methods catalogue
This application will collect data for X-tee subsystems and methods catalogue. Collector will output collected data to a directory that is ready to by served by web server (like Apache or Nginx). Subsequent executions will create new versions of catalogue while preserving old versions.
## Configuration
Create a configuration file for your X-Road instance using an example configuration file: [example-config.json](example-config.json). If you need to provide catalogue data for multiple X-Road instances then you will need separate configurations for each X-Road instance.
Configuration parameters:
* `output_path` - output directory for collected data;
* `server_url` - address of your security server;
* `client` - array of X-Road client identifiers;
* `instance` - X-Road instance to collect data from;
* `timeout` - X-Road query timeout;
* `server_cert` - optional TLS certificate of your security server for verification;
* `client_cert` - optional application TLS certificate for authentication with security server;
* `client_key` - optional application key for authentication with security server;
* `thread_count` - amount of parallel threads to use;
* `wsdl_replaces` - replace metadata like creation timestamp in WSDLs to avoid duplicates;
* `excluded_member_codes` - exclude certain members who are permanently in faulty state or should not be queried for any other reasons;
* `excluded_subsystem_codes` - exclude certain members who are permanently in faulty state or should not be queried for any other reasons;
* `logging-config` - logging configuration passed to logging.config.dictConfig(). You can read more about Python3 logging here: [https://docs.python.org/3/library/logging.config.html](https://docs.python.org/3/library/logging.config.html).
## Installing python venv
Python virtual environment is an easy way to manage application dependencies. First You will need to install support for python venv:
```bash
sudo apt-get install python3-venv
```
Then install required python modules into venv:
```bash
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
```
## Running
You can run the collector by issuing command (with activated venv):
```bash
python catalogue-collector.py config-instance1.json
```
## Systemd timer
Systemd timer can be used as more advanced version of cron. You can use provided example timer and service definitions to perform scheduled collection of data from your instances.
Add service description `systemd/catalogue-collector.service` to `/lib/systemd/system/catalogue-collector.service` and timer description `systemd/catalogue-collector.timer` to `/lib/systemd/system/catalogue-collector.timer`.
Then start and enable automatic startup:
```bash
sudo systemctl daemon-reload
sudo systemctl start catalogue-collector.timer
sudo systemctl enable catalogue-collector.timer
```
## Helper scripts
* `recreate_history.py` - This script can be used to to update history.json file when it was corrupted or when some of the reports were deleted. Usage: `python3 recreate_history.py <path to catalogue>`
* `remove_unused.py` - This script can be used to remove WSDL files that are no longer used in X-tee catalogue. For example due to deletion of older catalogue reports. Usage: `python3 remove_unused.py <path to catalogue>`. Or to simply list unused WSDLs: `python3 remove_unused.py --only-list <path to catalogue>`
If after usage of `remove_unused.py` you need to also delete empty directories then execute the following command inside catalogue directory:
```bash
find . -type d -empty -delete
```
......@@ -315,14 +315,27 @@ def sort_by_time(item):
return item['reportTime']
def all_results_failed(results):
"""Check if all results have failed status"""
for result in results.values():
if result['ok']:
# Found non-failed result
return False
# All results failed
return True
def process_results(params):
"""Process results collected by worker threads"""
results = params['results']
card_nr = 0
if all_results_failed(results):
# Skipping this version
LOGGER.error('All subsystems failed, skipping this catalogue version!')
return
json_data = []
for subsystem_key in sorted(results.keys()):
card_nr += 1
subsystem_result = results[subsystem_key] # type: dict
methods = subsystem_result['methods']
if subsystem_result['ok'] and methods:
......
{
"output_path": "/var/www/catalogue-data/",
"server_url": "security-server.domain.local",
"server_url": "https://security-server.domain.local",
"client": [
"INST",
"CLASS",
......
#!/usr/bin/python3
import argparse
import json
import re
import os
def sort_by_time(item):
return item['reportTime']
def get_catalogue_reports(path):
reports = []
for file_name in os.listdir(path):
s = re.search(
'^index_(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.json$',
file_name)
if s:
reports.append({
'reportTime': '{}-{}-{} {}:{}:{}'.format(
s.group(1), s.group(2), s.group(3), s.group(4), s.group(5), s.group(6)),
'reportPath': file_name})
reports.sort(key=sort_by_time, reverse=True)
return reports
def main():
parser = argparse.ArgumentParser(
description='Recreate history.json file for X-tee catalogue',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('path', metavar='PATH', help='Path to the catalogue')
args = parser.parse_args()
if not os.path.exists(args.path):
print('Directory not found "{}"'.format(args.path))
exit(1)
reports = get_catalogue_reports(args.path)
if len(reports):
with open(os.path.join(args.path, 'history.json'), 'w') as f:
json.dump(reports, f, indent=2, ensure_ascii=False)
print('Writing {} reports to {}'.format(
len(reports), os.path.join(args.path, 'history.json')))
else:
print('No JSON reports found in directory: {}'.format(args.path))
if __name__ == '__main__':
main()
#!/usr/bin/python3
import argparse
import json
import re
import os
def get_report_files(path):
reports = set()
for file_name in os.listdir(path):
s = re.search(
'^index_(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.json$',
file_name)
if s:
reports.add(file_name)
return reports
def get_wsdls_in_report(path, report_file):
with open(os.path.join(path, report_file), 'r') as fh:
report_data = json.load(fh)
used_wsdls = set()
for system in report_data:
for method in system['methods']:
if method['wsdl']:
used_wsdls.add(os.path.join(path, method['wsdl']))
return used_wsdls
def get_available_wsdls(path):
available_wsdls = set()
for root, _, files in os.walk(path):
for file_name in files:
s = re.search('^\\d+\\.wsdl$', file_name)
if s:
available_wsdls.add(os.path.join(root, file_name))
return available_wsdls
def get_unused_wsdls(path):
reports = get_report_files(path)
if not reports:
print('No catalogue reports found, exiting!')
exit(1)
used_wsdls = set()
for report_file in reports:
used_wsdls = used_wsdls.union(get_wsdls_in_report(path, report_file))
if not used_wsdls:
print('Catalogue does not use any WSDLs? Possibly this is an error, exiting!')
exit(1)
available_wsdls = get_available_wsdls(path)
return available_wsdls - used_wsdls
def main():
parser = argparse.ArgumentParser(
description='Remove WSDLs that are no longer used by X-tee catalogue',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('path', metavar='PATH', help='Path to the catalogue')
parser.add_argument(
'--only-list', action='store_true', help='Only list unused WSDLs, do not remove anything')
args = parser.parse_args()
if not os.path.exists(args.path):
print('Directory not found "{}"'.format(args.path))
exit(1)
unused_wsdls = get_unused_wsdls(args.path)
if args.only_list:
for wsdl_path in unused_wsdls:
print(wsdl_path)
else:
if unused_wsdls:
print('Removing {} unused WSDLs:'.format(len(unused_wsdls)))
for wsdl_path in unused_wsdls:
print(wsdl_path)
os.remove(wsdl_path)
else:
print('No unused WSDLs found')
if __name__ == '__main__':
main()