From 0296d622435e02166c46c9df36a703840b794775 Mon Sep 17 00:00:00 2001
From: Daniel Trujillo Viedma <danihacker.viedma@gmail.com>
Date: Tue, 12 May 2020 11:40:11 +0200
Subject: [PATCH] First draft implementation

---
 dqc_wrapper/scripts/json2txt_cdm.py         | 127 ++++++++++++++++++++
 dqc_wrapper/scripts/json2txt_integration.py |  72 +++++++++++
 dqc_wrapper/scripts/json2txt_unit.py        |  61 ++++++++++
 dqc_wrapper/scripts/txt2pdf_customTitle.py  |  21 ++++
 dqc_wrapper/utils/utils_pdf.py              |  12 +-
 5 files changed, 287 insertions(+), 6 deletions(-)
 create mode 100644 dqc_wrapper/scripts/json2txt_cdm.py
 create mode 100644 dqc_wrapper/scripts/json2txt_integration.py
 create mode 100644 dqc_wrapper/scripts/json2txt_unit.py
 create mode 100644 dqc_wrapper/scripts/txt2pdf_customTitle.py
diff --git a/dqc_wrapper/scripts/json2txt_cdm.py b/dqc_wrapper/scripts/json2txt_cdm.py
new file mode 100644
index 000000000..1cf8198f3
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_cdm.py
@@ -0,0 +1,127 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+import dateutil.parser
+from _functools import *
+from textwrap import wrap
+import _functools
+
+if len(sys.argv) != 4:
+    print('usage: python json2txt_cdm.py <input JSON> <section1.txt> <section2.txt>')
+    sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+    input_read = f.read()
+
+json_parsed = json.loads(input_read)
+
+##########
+## Extract preliminary data
+##########
+
+# Any
+tests_data = json_parsed["results"]
+# Any -> [String]
+retrieve_cdm_data = lambda result: result["output"]["cdm_compliance"][result["output"]["cdm_compliance"].find("\n")+1:].strip().split("\n")
+# List[List[String]]
+cdm_data = list(map(retrieve_cdm_data,tests_data))
+# String
+tool_name = json_parsed["info"]["tool_name"]
+# String
+test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y')
+
+##########
+## First, build the summary information
+##########
+
+# List[String] -> Bool
+is_test_failed = lambda result: reduce(lambda x,y: x|y, map(lambda x: x.startswith("ERROR"), result))
+# Int
+number_of_tests_failed = len(list(filter(is_test_failed, cdm_data)))
+# Int
+percentage_failed = 100 * number_of_tests_failed / len(cdm_data)
+
+# List[String] -> Bool
+is_test_warning = lambda result: reduce(lambda x,y: x&y, map(lambda x: x.startswith("WARN"), result))
+# Int
+number_of_tests_warning = len(list(filter(is_test_warning, cdm_data)))
+# Int
+percentage_warning = 100 * number_of_tests_warning / len(cdm_data)
+
+# Int
+number_of_test_passed = len(cdm_data) - number_of_tests_warning - number_of_tests_failed
+# Int
+percentage_passed = 100 - percentage_warning - percentage_failed
+
+# (All of the following are) String
+results = "res = {}% passed, {}% passed with warnings, {}% not passed".format(percentage_passed, percentage_warning, percentage_failed)
+message = "msg = The tool " + tool_name + (" has " if (percentage_failed == 0) else " hasn't ") + "passed all the CDM compliance checks"
+number_of_tests = "number of tests = {}".format(len(tests_data))
+generation_time = "Generated on " + test_timestamp
+
+# Any -> String
+get_dataset_name = lambda result: result["input"][0]
+# List[String]
+dataset_names = map(get_dataset_name, tests_data)
+# String -> String
+get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")]
+# List[String]
+dataset_types = list(set(map(get_dataset_type, dataset_names)))
+# String
+dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0])
+
+
+##########
+## Now extract relevant data for the report  
+##########
+
+# Any -> String
+def dictToString(obj):
+    return reduce(lambda a,b : "{}\n  {}: {}".format(a, b[0],b[1]), obj.items(), "")
+
+# List[String] -> String
+def test_outcome(cdm_check_output:[str]):
+    return "With warnings" if(is_test_warning(cdm_check_output)) else ("False" if(is_test_failed(cdm_check_output)) else "True")
+
+# List[String] -> String
+def format_CDM_check_results(cdm_check_output:[str]):
+    return reduce(lambda a,b : "{}\n  {}".format(a, b), cdm_check_output, "")
+
+# Any -> String
+extract_data = lambda result: "**  CDM check number {}  **\nPassed: {}\nDataset: {}\nParameters applied: {}\nResults:{}\n \n \n \n".format(result[1], test_outcome(result[0][1]), result[0][0]["input"][0], dictToString(result[0][0]["input"][1]), format_CDM_check_results(result[0][1]))
+
+# List[T] -> List[(T, Int)]
+def zipWithIndex(array):
+    return list(zip(array, range(0, len(array))))
+
+# List[(Any, 
+combined_data = list(zip(tests_data, cdm_data))
+combined_indexed_data = zipWithIndex(combined_data) 
+
+relevant_data = list(map(extract_data, combined_indexed_data))
+
+#print(tests_data)
+
+#sys.exit()
+
+# The final step is to format the relevant data
+format_log  = lambda data: "{}\n\n".format(data)
+     
+relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time)
+relevant_log_formatted  = relevant_data
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/json2txt_integration.py b/dqc_wrapper/scripts/json2txt_integration.py
new file mode 100644
index 000000000..327f4a568
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_integration.py
@@ -0,0 +1,72 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+import dateutil.parser
+from _functools import reduce
+
+if len(sys.argv) != 4:
+    print('usage: python json2txt_integration.py <input JSON> <section1.txt> <section2.txt>')
+    sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+    input_read = f.read()
+
+json_parsed = json.loads(input_read)
+
+# Extract preliminary data
+tests_data = json_parsed["results"]
+tool_name = json_parsed["info"]["tool_name"]
+test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y')
+
+# First, build the summary information
+passed_test = lambda result: result["output"]["passed"] == True
+number_of_tests_passed = len(list(filter(passed_test, tests_data)))
+percentage_passed = 100 * number_of_tests_passed / len(tests_data)
+
+results = "res = {}% passed, {}% not passed".format(percentage_passed, (100-percentage_passed))
+message = "msg = The tool " + tool_name + (" has " if (percentage_passed == 100) else " hasn't ") + "passed all integration tests"
+number_of_tests = "number of tests = {}".format(len(tests_data))
+generation_time = "Generated on " + test_timestamp
+
+get_dataset_name = lambda result: result["input"][0]
+dataset_names = map(get_dataset_name, tests_data)
+get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")]
+dataset_types = list(set(map(get_dataset_type, dataset_names)))
+dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0])
+
+
+# Now extract relevant data for the report    
+def dictToString(obj):
+    return reduce(lambda a,b : "{}\n  {}: {}".format(a, b[0],b[1]), obj.items(), "")
+
+extract_data = lambda result: "**  Integration test number {}  **\nPassed: {}\nInput: {}\nParameters applied:{}\nResults:{}\n \n \n \n".format(result[1], result[0]["output"]["passed"], result[0]["input"][0], dictToString(result[0]["input"][1]), dictToString(result[0]["output"]["result"][0]))
+
+# So, this has to be inside a list() because in the next section we use 2 times
+# relevant_data and, although iterators are lazy, they are also mutable, so the first
+# following map would consume the iterator, leaving none for the second map.
+def zipWithIndex(array):
+    return list(zip(array, range(0, len(array))))
+
+relevant_data = list(map(extract_data, zipWithIndex(tests_data)))
+
+# The final step is to format the relevant data
+format_log  = lambda data: "{}\n\n".format(data)
+     
+relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time)
+relevant_log_formatted  = relevant_data
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/json2txt_unit.py b/dqc_wrapper/scripts/json2txt_unit.py
new file mode 100644
index 000000000..e82d987fd
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_unit.py
@@ -0,0 +1,61 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+from datetime import datetime
+
+if len(sys.argv) != 4:
+    print('usage: python json2txt_util.py <input JSON> <section1.txt> <section2.txt>')
+    sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+    input_read = f.read()
+
+json_string = "[" + (input_read.rstrip("\n").replace("\n", ",")) + "]"
+
+json_parsed = json.loads(json_string)
+
+# Extract preliminary data
+is_test_report = lambda report: report["$report_type"] == "TestReport"
+tests_data     = filter(is_test_report, json_parsed)
+
+# Filter out non-relevant setup and teardown entries
+is_call_result = lambda result: result["when"] == "call"
+tests_call_data = filter(is_call_result, tests_data)
+
+# Now extract relevant data for the report
+# Creates tuples (res, msg, unit test, generation time)
+extract_data = (lambda result: 
+                ("res = {outcome}".format(**result), 
+                 "msg = the tool " + result["location"][2][5:] + " has " + ("" if (result["outcome"]=="passed") else "not") + "passed the unit test " + result["location"][2], 
+                 "unit test applied = " + result["nodeid"], 
+                 "Generated on " + datetime.today().strftime('%d/%m/%Y'),
+                 "No log output captured" if (result["outcome"]=="passed") else "{}:{} - {}".format(result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["path"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["lineno"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["message"]))          
+                )
+
+# So, this has to be inside a list() because in the next section we use 2 times
+# relevant_data and, although iterators are lazy, they are also mutable, so the first
+# following map would consume the iterator, leaving none for the second map.
+relevant_data = list(map(extract_data, tests_call_data))
+
+# The final step is to format the relevant data
+format_data = lambda data: "{}\n{}\n{}\n{}\n".format(data[0], data[1], data[2], data[3])
+format_log  = lambda data: "{}\n".format(data[4])
+     
+relevant_data_formatted = map(format_data, relevant_data)
+relevant_log_formatted  = map(format_log, relevant_data)
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+    list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/txt2pdf_customTitle.py b/dqc_wrapper/scripts/txt2pdf_customTitle.py
new file mode 100644
index 000000000..de3051d66
--- /dev/null
+++ b/dqc_wrapper/scripts/txt2pdf_customTitle.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+#
+# Data Quality Checker also known as DQC
+#
+# Read the README.md for more detailed information
+#
+#       Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
+#       Earth Sciences Department - Computational Earth Sciences
+#
+#     Authors:
+#       Joan Sala Calero (joan.sala@bsc.es)
+#
+
+import sys
+
+from dqc_wrapper.utils.utils_pdf import *
+
+if len(sys.argv) != 6:
+    print('usage: python txt2pdf_customTitle.py <section1.txt> <section2.txt> <output.pdf> <customTitle> <drop_whitespace>')
+else:
+    save_txt_report(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5] == "True")
diff --git a/dqc_wrapper/utils/utils_pdf.py b/dqc_wrapper/utils/utils_pdf.py
index 1be8a1d10..368a56621 100644
--- a/dqc_wrapper/utils/utils_pdf.py
+++ b/dqc_wrapper/utils/utils_pdf.py
@@ -31,10 +31,10 @@ registerFont(TTFont('Calibri', os.path.join(fontdir, 'Calibri.ttf')))
 """
 Convert a txt to a pdf file (reporting)
 """
-def line_breaks(text, chars_per_line=125):
+def line_breaks(text, chars_per_line=125, drop_whitespace = True):
 	ls = ''
 	for t in text.split('\n'):
-		pieces = wrap(t, chars_per_line)
+		pieces = wrap(t, chars_per_line, drop_whitespace = drop_whitespace)
 		for p in pieces:
 			ls = ls + p + '\n'
 	return ls
@@ -42,7 +42,7 @@ def line_breaks(text, chars_per_line=125):
 """
 Convert a txt to a pdf file (reporting)
 """
-def save_txt_report(txt1_file, txt2_file, pdf_file):
+def save_txt_report(txt1_file, txt2_file, pdf_file, fstTitle='Summary of data checks', drop_whitespace = True):
 
 	# PDF report name
 	print('INFO: Saving report to {}'.format(pdf_file))
@@ -54,14 +54,14 @@ def save_txt_report(txt1_file, txt2_file, pdf_file):
 		txt_str2 = f.read()
 
 	# Line breaks
-	txt_str1 = line_breaks(txt_str1)
-	txt_str2 = line_breaks(txt_str2)
+	txt_str1 = line_breaks(txt_str1, drop_whitespace = drop_whitespace)
+	txt_str2 = line_breaks(txt_str2, drop_whitespace = drop_whitespace)
 
 	# Write PDF
 	try:
 		# Init
 		c = canvas.Canvas(pdf_file)
-		break_into_pages(txt_str1, c, 58, 'Summary of data checks', False, 15)
+		break_into_pages(txt_str1, c, 58, fstTitle, False, 15)
 		break_into_pages(txt_str2, c, 58, 'Detailed log output', True, 10)
 		# Save
 		c.save()
-- 
GitLab