From 0296d622435e02166c46c9df36a703840b794775 Mon Sep 17 00:00:00 2001 From: Daniel Trujillo Viedma Date: Tue, 12 May 2020 11:40:11 +0200 Subject: [PATCH] First draft implementation --- dqc_wrapper/scripts/json2txt_cdm.py | 127 ++++++++++++++++++++ dqc_wrapper/scripts/json2txt_integration.py | 72 +++++++++++ dqc_wrapper/scripts/json2txt_unit.py | 61 ++++++++++ dqc_wrapper/scripts/txt2pdf_customTitle.py | 21 ++++ dqc_wrapper/utils/utils_pdf.py | 12 +- 5 files changed, 287 insertions(+), 6 deletions(-) create mode 100644 dqc_wrapper/scripts/json2txt_cdm.py create mode 100644 dqc_wrapper/scripts/json2txt_integration.py create mode 100644 dqc_wrapper/scripts/json2txt_unit.py create mode 100644 dqc_wrapper/scripts/txt2pdf_customTitle.py diff --git a/dqc_wrapper/scripts/json2txt_cdm.py b/dqc_wrapper/scripts/json2txt_cdm.py new file mode 100644 index 000000000..1cf8198f3 --- /dev/null +++ b/dqc_wrapper/scripts/json2txt_cdm.py @@ -0,0 +1,127 @@ +''' +Created on 29 abr. 2020 + +@author: daniel +''' + +import json +import sys +import dateutil.parser +from _functools import * +from textwrap import wrap +import _functools + +if len(sys.argv) != 4: + print('usage: python json2txt_cdm.py ') + sys.exit() + +with open(sys.argv[1], 'r') as f: + input_read = f.read() + +json_parsed = json.loads(input_read) + +########## +## Extract preliminary data +########## + +# Any +tests_data = json_parsed["results"] +# Any -> [String] +retrieve_cdm_data = lambda result: result["output"]["cdm_compliance"][result["output"]["cdm_compliance"].find("\n")+1:].strip().split("\n") +# List[List[String]] +cdm_data = list(map(retrieve_cdm_data,tests_data)) +# String +tool_name = json_parsed["info"]["tool_name"] +# String +test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y') + +########## +## First, build the summary information +########## + +# List[String] -> Bool +is_test_failed = lambda result: reduce(lambda x,y: x|y, map(lambda x: x.startswith("ERROR"), result)) +# Int +number_of_tests_failed = len(list(filter(is_test_failed, cdm_data))) +# Int +percentage_failed = 100 * number_of_tests_failed / len(cdm_data) + +# List[String] -> Bool +is_test_warning = lambda result: reduce(lambda x,y: x&y, map(lambda x: x.startswith("WARN"), result)) +# Int +number_of_tests_warning = len(list(filter(is_test_warning, cdm_data))) +# Int +percentage_warning = 100 * number_of_tests_warning / len(cdm_data) + +# Int +number_of_test_passed = len(cdm_data) - number_of_tests_warning - number_of_tests_failed +# Int +percentage_passed = 100 - percentage_warning - percentage_failed + +# (All of the following are) String +results = "res = {}% passed, {}% passed with warnings, {}% not passed".format(percentage_passed, percentage_warning, percentage_failed) +message = "msg = The tool " + tool_name + (" has " if (percentage_failed == 0) else " hasn't ") + "passed all the CDM compliance checks" +number_of_tests = "number of tests = {}".format(len(tests_data)) +generation_time = "Generated on " + test_timestamp + +# Any -> String +get_dataset_name = lambda result: result["input"][0] +# List[String] +dataset_names = map(get_dataset_name, tests_data) +# String -> String +get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")] +# List[String] +dataset_types = list(set(map(get_dataset_type, dataset_names))) +# String +dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0]) + + +########## +## Now extract relevant data for the report +########## + +# Any -> String +def dictToString(obj): + return reduce(lambda a,b : "{}\n {}: {}".format(a, b[0],b[1]), obj.items(), "") + +# List[String] -> String +def test_outcome(cdm_check_output:[str]): + return "With warnings" if(is_test_warning(cdm_check_output)) else ("False" if(is_test_failed(cdm_check_output)) else "True") + +# List[String] -> String +def format_CDM_check_results(cdm_check_output:[str]): + return reduce(lambda a,b : "{}\n {}".format(a, b), cdm_check_output, "") + +# Any -> String +extract_data = lambda result: "** CDM check number {} **\nPassed: {}\nDataset: {}\nParameters applied: {}\nResults:{}\n \n \n \n".format(result[1], test_outcome(result[0][1]), result[0][0]["input"][0], dictToString(result[0][0]["input"][1]), format_CDM_check_results(result[0][1])) + +# List[T] -> List[(T, Int)] +def zipWithIndex(array): + return list(zip(array, range(0, len(array)))) + +# List[(Any, +combined_data = list(zip(tests_data, cdm_data)) +combined_indexed_data = zipWithIndex(combined_data) + +relevant_data = list(map(extract_data, combined_indexed_data)) + +#print(tests_data) + +#sys.exit() + +# The final step is to format the relevant data +format_log = lambda data: "{}\n\n".format(data) + +relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time) +relevant_log_formatted = relevant_data + +# And write the files +with open(sys.argv[2], 'w') as f: + list(map(lambda s: f.write(s), relevant_data_formatted)) + +with open(sys.argv[3], 'w') as f: + list(map(lambda s: f.write(s), relevant_log_formatted)) + + + + diff --git a/dqc_wrapper/scripts/json2txt_integration.py b/dqc_wrapper/scripts/json2txt_integration.py new file mode 100644 index 000000000..327f4a568 --- /dev/null +++ b/dqc_wrapper/scripts/json2txt_integration.py @@ -0,0 +1,72 @@ +''' +Created on 29 abr. 2020 + +@author: daniel +''' + +import json +import sys +import dateutil.parser +from _functools import reduce + +if len(sys.argv) != 4: + print('usage: python json2txt_integration.py ') + sys.exit() + +with open(sys.argv[1], 'r') as f: + input_read = f.read() + +json_parsed = json.loads(input_read) + +# Extract preliminary data +tests_data = json_parsed["results"] +tool_name = json_parsed["info"]["tool_name"] +test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y') + +# First, build the summary information +passed_test = lambda result: result["output"]["passed"] == True +number_of_tests_passed = len(list(filter(passed_test, tests_data))) +percentage_passed = 100 * number_of_tests_passed / len(tests_data) + +results = "res = {}% passed, {}% not passed".format(percentage_passed, (100-percentage_passed)) +message = "msg = The tool " + tool_name + (" has " if (percentage_passed == 100) else " hasn't ") + "passed all integration tests" +number_of_tests = "number of tests = {}".format(len(tests_data)) +generation_time = "Generated on " + test_timestamp + +get_dataset_name = lambda result: result["input"][0] +dataset_names = map(get_dataset_name, tests_data) +get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")] +dataset_types = list(set(map(get_dataset_type, dataset_names))) +dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0]) + + +# Now extract relevant data for the report +def dictToString(obj): + return reduce(lambda a,b : "{}\n {}: {}".format(a, b[0],b[1]), obj.items(), "") + +extract_data = lambda result: "** Integration test number {} **\nPassed: {}\nInput: {}\nParameters applied:{}\nResults:{}\n \n \n \n".format(result[1], result[0]["output"]["passed"], result[0]["input"][0], dictToString(result[0]["input"][1]), dictToString(result[0]["output"]["result"][0])) + +# So, this has to be inside a list() because in the next section we use 2 times +# relevant_data and, although iterators are lazy, they are also mutable, so the first +# following map would consume the iterator, leaving none for the second map. +def zipWithIndex(array): + return list(zip(array, range(0, len(array)))) + +relevant_data = list(map(extract_data, zipWithIndex(tests_data))) + +# The final step is to format the relevant data +format_log = lambda data: "{}\n\n".format(data) + +relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time) +relevant_log_formatted = relevant_data + +# And write the files +with open(sys.argv[2], 'w') as f: + list(map(lambda s: f.write(s), relevant_data_formatted)) + +with open(sys.argv[3], 'w') as f: + list(map(lambda s: f.write(s), relevant_log_formatted)) + + + + diff --git a/dqc_wrapper/scripts/json2txt_unit.py b/dqc_wrapper/scripts/json2txt_unit.py new file mode 100644 index 000000000..e82d987fd --- /dev/null +++ b/dqc_wrapper/scripts/json2txt_unit.py @@ -0,0 +1,61 @@ +''' +Created on 29 abr. 2020 + +@author: daniel +''' + +import json +import sys +from datetime import datetime + +if len(sys.argv) != 4: + print('usage: python json2txt_util.py ') + sys.exit() + +with open(sys.argv[1], 'r') as f: + input_read = f.read() + +json_string = "[" + (input_read.rstrip("\n").replace("\n", ",")) + "]" + +json_parsed = json.loads(json_string) + +# Extract preliminary data +is_test_report = lambda report: report["$report_type"] == "TestReport" +tests_data = filter(is_test_report, json_parsed) + +# Filter out non-relevant setup and teardown entries +is_call_result = lambda result: result["when"] == "call" +tests_call_data = filter(is_call_result, tests_data) + +# Now extract relevant data for the report +# Creates tuples (res, msg, unit test, generation time) +extract_data = (lambda result: + ("res = {outcome}".format(**result), + "msg = the tool " + result["location"][2][5:] + " has " + ("" if (result["outcome"]=="passed") else "not") + "passed the unit test " + result["location"][2], + "unit test applied = " + result["nodeid"], + "Generated on " + datetime.today().strftime('%d/%m/%Y'), + "No log output captured" if (result["outcome"]=="passed") else "{}:{} - {}".format(result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["path"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["lineno"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["message"])) + ) + +# So, this has to be inside a list() because in the next section we use 2 times +# relevant_data and, although iterators are lazy, they are also mutable, so the first +# following map would consume the iterator, leaving none for the second map. +relevant_data = list(map(extract_data, tests_call_data)) + +# The final step is to format the relevant data +format_data = lambda data: "{}\n{}\n{}\n{}\n".format(data[0], data[1], data[2], data[3]) +format_log = lambda data: "{}\n".format(data[4]) + +relevant_data_formatted = map(format_data, relevant_data) +relevant_log_formatted = map(format_log, relevant_data) + +# And write the files +with open(sys.argv[2], 'w') as f: + list(map(lambda s: f.write(s), relevant_data_formatted)) + +with open(sys.argv[3], 'w') as f: + list(map(lambda s: f.write(s), relevant_log_formatted)) + + + + diff --git a/dqc_wrapper/scripts/txt2pdf_customTitle.py b/dqc_wrapper/scripts/txt2pdf_customTitle.py new file mode 100644 index 000000000..de3051d66 --- /dev/null +++ b/dqc_wrapper/scripts/txt2pdf_customTitle.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# +# Data Quality Checker also known as DQC +# +# Read the README.md for more detailed information +# +# Barcelona Supercomputing Center - Centro Nacional de Supercomputacion +# Earth Sciences Department - Computational Earth Sciences +# +# Authors: +# Joan Sala Calero (joan.sala@bsc.es) +# + +import sys + +from dqc_wrapper.utils.utils_pdf import * + +if len(sys.argv) != 6: + print('usage: python txt2pdf_customTitle.py ') +else: + save_txt_report(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5] == "True") diff --git a/dqc_wrapper/utils/utils_pdf.py b/dqc_wrapper/utils/utils_pdf.py index 1be8a1d10..368a56621 100644 --- a/dqc_wrapper/utils/utils_pdf.py +++ b/dqc_wrapper/utils/utils_pdf.py @@ -31,10 +31,10 @@ registerFont(TTFont('Calibri', os.path.join(fontdir, 'Calibri.ttf'))) """ Convert a txt to a pdf file (reporting) """ -def line_breaks(text, chars_per_line=125): +def line_breaks(text, chars_per_line=125, drop_whitespace = True): ls = '' for t in text.split('\n'): - pieces = wrap(t, chars_per_line) + pieces = wrap(t, chars_per_line, drop_whitespace = drop_whitespace) for p in pieces: ls = ls + p + '\n' return ls @@ -42,7 +42,7 @@ def line_breaks(text, chars_per_line=125): """ Convert a txt to a pdf file (reporting) """ -def save_txt_report(txt1_file, txt2_file, pdf_file): +def save_txt_report(txt1_file, txt2_file, pdf_file, fstTitle='Summary of data checks', drop_whitespace = True): # PDF report name print('INFO: Saving report to {}'.format(pdf_file)) @@ -54,14 +54,14 @@ def save_txt_report(txt1_file, txt2_file, pdf_file): txt_str2 = f.read() # Line breaks - txt_str1 = line_breaks(txt_str1) - txt_str2 = line_breaks(txt_str2) + txt_str1 = line_breaks(txt_str1, drop_whitespace = drop_whitespace) + txt_str2 = line_breaks(txt_str2, drop_whitespace = drop_whitespace) # Write PDF try: # Init c = canvas.Canvas(pdf_file) - break_into_pages(txt_str1, c, 58, 'Summary of data checks', False, 15) + break_into_pages(txt_str1, c, 58, fstTitle, False, 15) break_into_pages(txt_str2, c, 58, 'Detailed log output', True, 10) # Save c.save() -- GitLab