diff --git a/dqc_wrapper/scripts/json2txt_cdm.py b/dqc_wrapper/scripts/json2txt_cdm.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf8198f3204dcfeacf0ab8b1ea3aecd51721316
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_cdm.py
@@ -0,0 +1,127 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+import dateutil.parser
+from _functools import *
+from textwrap import wrap
+import _functools
+
+if len(sys.argv) != 4:
+ print('usage: python json2txt_cdm.py ')
+ sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+ input_read = f.read()
+
+json_parsed = json.loads(input_read)
+
+##########
+## Extract preliminary data
+##########
+
+# Any
+tests_data = json_parsed["results"]
+# Any -> [String]
+retrieve_cdm_data = lambda result: result["output"]["cdm_compliance"][result["output"]["cdm_compliance"].find("\n")+1:].strip().split("\n")
+# List[List[String]]
+cdm_data = list(map(retrieve_cdm_data,tests_data))
+# String
+tool_name = json_parsed["info"]["tool_name"]
+# String
+test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y')
+
+##########
+## First, build the summary information
+##########
+
+# List[String] -> Bool
+is_test_failed = lambda result: reduce(lambda x,y: x|y, map(lambda x: x.startswith("ERROR"), result))
+# Int
+number_of_tests_failed = len(list(filter(is_test_failed, cdm_data)))
+# Int
+percentage_failed = 100 * number_of_tests_failed / len(cdm_data)
+
+# List[String] -> Bool
+is_test_warning = lambda result: reduce(lambda x,y: x&y, map(lambda x: x.startswith("WARN"), result))
+# Int
+number_of_tests_warning = len(list(filter(is_test_warning, cdm_data)))
+# Int
+percentage_warning = 100 * number_of_tests_warning / len(cdm_data)
+
+# Int
+number_of_test_passed = len(cdm_data) - number_of_tests_warning - number_of_tests_failed
+# Int
+percentage_passed = 100 - percentage_warning - percentage_failed
+
+# (All of the following are) String
+results = "res = {}% passed, {}% passed with warnings, {}% not passed".format(percentage_passed, percentage_warning, percentage_failed)
+message = "msg = The tool " + tool_name + (" has " if (percentage_failed == 0) else " hasn't ") + "passed all the CDM compliance checks"
+number_of_tests = "number of tests = {}".format(len(tests_data))
+generation_time = "Generated on " + test_timestamp
+
+# Any -> String
+get_dataset_name = lambda result: result["input"][0]
+# List[String]
+dataset_names = map(get_dataset_name, tests_data)
+# String -> String
+get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")]
+# List[String]
+dataset_types = list(set(map(get_dataset_type, dataset_names)))
+# String
+dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0])
+
+
+##########
+## Now extract relevant data for the report
+##########
+
+# Any -> String
+def dictToString(obj):
+ return reduce(lambda a,b : "{}\n {}: {}".format(a, b[0],b[1]), obj.items(), "")
+
+# List[String] -> String
+def test_outcome(cdm_check_output:[str]):
+ return "With warnings" if(is_test_warning(cdm_check_output)) else ("False" if(is_test_failed(cdm_check_output)) else "True")
+
+# List[String] -> String
+def format_CDM_check_results(cdm_check_output:[str]):
+ return reduce(lambda a,b : "{}\n {}".format(a, b), cdm_check_output, "")
+
+# Any -> String
+extract_data = lambda result: "** CDM check number {} **\nPassed: {}\nDataset: {}\nParameters applied: {}\nResults:{}\n \n \n \n".format(result[1], test_outcome(result[0][1]), result[0][0]["input"][0], dictToString(result[0][0]["input"][1]), format_CDM_check_results(result[0][1]))
+
+# List[T] -> List[(T, Int)]
+def zipWithIndex(array):
+ return list(zip(array, range(0, len(array))))
+
+# List[(Any,
+combined_data = list(zip(tests_data, cdm_data))
+combined_indexed_data = zipWithIndex(combined_data)
+
+relevant_data = list(map(extract_data, combined_indexed_data))
+
+#print(tests_data)
+
+#sys.exit()
+
+# The final step is to format the relevant data
+format_log = lambda data: "{}\n\n".format(data)
+
+relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time)
+relevant_log_formatted = relevant_data
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/json2txt_integration.py b/dqc_wrapper/scripts/json2txt_integration.py
new file mode 100644
index 0000000000000000000000000000000000000000..327f4a5685c0c7d54325fdb3302639a25e793169
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_integration.py
@@ -0,0 +1,72 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+import dateutil.parser
+from _functools import reduce
+
+if len(sys.argv) != 4:
+ print('usage: python json2txt_integration.py ')
+ sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+ input_read = f.read()
+
+json_parsed = json.loads(input_read)
+
+# Extract preliminary data
+tests_data = json_parsed["results"]
+tool_name = json_parsed["info"]["tool_name"]
+test_timestamp = dateutil.parser.isoparse(json_parsed["info"]["check_run_time"]).strftime('%d/%m/%Y')
+
+# First, build the summary information
+passed_test = lambda result: result["output"]["passed"] == True
+number_of_tests_passed = len(list(filter(passed_test, tests_data)))
+percentage_passed = 100 * number_of_tests_passed / len(tests_data)
+
+results = "res = {}% passed, {}% not passed".format(percentage_passed, (100-percentage_passed))
+message = "msg = The tool " + tool_name + (" has " if (percentage_passed == 100) else " hasn't ") + "passed all integration tests"
+number_of_tests = "number of tests = {}".format(len(tests_data))
+generation_time = "Generated on " + test_timestamp
+
+get_dataset_name = lambda result: result["input"][0]
+dataset_names = map(get_dataset_name, tests_data)
+get_dataset_type = lambda dataset_name: dataset_name[0:dataset_name.find("-")]
+dataset_types = list(set(map(get_dataset_type, dataset_names)))
+dataset_types_message = "dataset types considered = " + reduce(lambda msg,new_type: "{}, {}".format(msg, new_type), dataset_types[1:], dataset_types[0])
+
+
+# Now extract relevant data for the report
+def dictToString(obj):
+ return reduce(lambda a,b : "{}\n {}: {}".format(a, b[0],b[1]), obj.items(), "")
+
+extract_data = lambda result: "** Integration test number {} **\nPassed: {}\nInput: {}\nParameters applied:{}\nResults:{}\n \n \n \n".format(result[1], result[0]["output"]["passed"], result[0]["input"][0], dictToString(result[0]["input"][1]), dictToString(result[0]["output"]["result"][0]))
+
+# So, this has to be inside a list() because in the next section we use 2 times
+# relevant_data and, although iterators are lazy, they are also mutable, so the first
+# following map would consume the iterator, leaving none for the second map.
+def zipWithIndex(array):
+ return list(zip(array, range(0, len(array))))
+
+relevant_data = list(map(extract_data, zipWithIndex(tests_data)))
+
+# The final step is to format the relevant data
+format_log = lambda data: "{}\n\n".format(data)
+
+relevant_data_formatted = "{}\n{}\n{}\n{}\n{}".format(results, message, number_of_tests, dataset_types_message, generation_time)
+relevant_log_formatted = relevant_data
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/json2txt_unit.py b/dqc_wrapper/scripts/json2txt_unit.py
new file mode 100644
index 0000000000000000000000000000000000000000..e82d987fd8370a6cf884c3a019da096826bbf763
--- /dev/null
+++ b/dqc_wrapper/scripts/json2txt_unit.py
@@ -0,0 +1,61 @@
+'''
+Created on 29 abr. 2020
+
+@author: daniel
+'''
+
+import json
+import sys
+from datetime import datetime
+
+if len(sys.argv) != 4:
+ print('usage: python json2txt_util.py ')
+ sys.exit()
+
+with open(sys.argv[1], 'r') as f:
+ input_read = f.read()
+
+json_string = "[" + (input_read.rstrip("\n").replace("\n", ",")) + "]"
+
+json_parsed = json.loads(json_string)
+
+# Extract preliminary data
+is_test_report = lambda report: report["$report_type"] == "TestReport"
+tests_data = filter(is_test_report, json_parsed)
+
+# Filter out non-relevant setup and teardown entries
+is_call_result = lambda result: result["when"] == "call"
+tests_call_data = filter(is_call_result, tests_data)
+
+# Now extract relevant data for the report
+# Creates tuples (res, msg, unit test, generation time)
+extract_data = (lambda result:
+ ("res = {outcome}".format(**result),
+ "msg = the tool " + result["location"][2][5:] + " has " + ("" if (result["outcome"]=="passed") else "not") + "passed the unit test " + result["location"][2],
+ "unit test applied = " + result["nodeid"],
+ "Generated on " + datetime.today().strftime('%d/%m/%Y'),
+ "No log output captured" if (result["outcome"]=="passed") else "{}:{} - {}".format(result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["path"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["lineno"], result["longrepr"]["reprtraceback"]["reprentries"][0]["data"]["reprfileloc"]["message"]))
+ )
+
+# So, this has to be inside a list() because in the next section we use 2 times
+# relevant_data and, although iterators are lazy, they are also mutable, so the first
+# following map would consume the iterator, leaving none for the second map.
+relevant_data = list(map(extract_data, tests_call_data))
+
+# The final step is to format the relevant data
+format_data = lambda data: "{}\n{}\n{}\n{}\n".format(data[0], data[1], data[2], data[3])
+format_log = lambda data: "{}\n".format(data[4])
+
+relevant_data_formatted = map(format_data, relevant_data)
+relevant_log_formatted = map(format_log, relevant_data)
+
+# And write the files
+with open(sys.argv[2], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_data_formatted))
+
+with open(sys.argv[3], 'w') as f:
+ list(map(lambda s: f.write(s), relevant_log_formatted))
+
+
+
+
diff --git a/dqc_wrapper/scripts/txt2pdf_customTitle.py b/dqc_wrapper/scripts/txt2pdf_customTitle.py
new file mode 100644
index 0000000000000000000000000000000000000000..de3051d6635973f5e1f66aa77e5a891c111385ac
--- /dev/null
+++ b/dqc_wrapper/scripts/txt2pdf_customTitle.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+#
+# Data Quality Checker also known as DQC
+#
+# Read the README.md for more detailed information
+#
+# Barcelona Supercomputing Center - Centro Nacional de Supercomputacion
+# Earth Sciences Department - Computational Earth Sciences
+#
+# Authors:
+# Joan Sala Calero (joan.sala@bsc.es)
+#
+
+import sys
+
+from dqc_wrapper.utils.utils_pdf import *
+
+if len(sys.argv) != 6:
+ print('usage: python txt2pdf_customTitle.py ')
+else:
+ save_txt_report(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5] == "True")
diff --git a/dqc_wrapper/utils/utils_pdf.py b/dqc_wrapper/utils/utils_pdf.py
index 1be8a1d106a9126f4154a731debb6b9bebd9a66a..368a566215f9ca62fc5c1e06095d423829b702ff 100644
--- a/dqc_wrapper/utils/utils_pdf.py
+++ b/dqc_wrapper/utils/utils_pdf.py
@@ -31,10 +31,10 @@ registerFont(TTFont('Calibri', os.path.join(fontdir, 'Calibri.ttf')))
"""
Convert a txt to a pdf file (reporting)
"""
-def line_breaks(text, chars_per_line=125):
+def line_breaks(text, chars_per_line=125, drop_whitespace = True):
ls = ''
for t in text.split('\n'):
- pieces = wrap(t, chars_per_line)
+ pieces = wrap(t, chars_per_line, drop_whitespace = drop_whitespace)
for p in pieces:
ls = ls + p + '\n'
return ls
@@ -42,7 +42,7 @@ def line_breaks(text, chars_per_line=125):
"""
Convert a txt to a pdf file (reporting)
"""
-def save_txt_report(txt1_file, txt2_file, pdf_file):
+def save_txt_report(txt1_file, txt2_file, pdf_file, fstTitle='Summary of data checks', drop_whitespace = True):
# PDF report name
print('INFO: Saving report to {}'.format(pdf_file))
@@ -54,14 +54,14 @@ def save_txt_report(txt1_file, txt2_file, pdf_file):
txt_str2 = f.read()
# Line breaks
- txt_str1 = line_breaks(txt_str1)
- txt_str2 = line_breaks(txt_str2)
+ txt_str1 = line_breaks(txt_str1, drop_whitespace = drop_whitespace)
+ txt_str2 = line_breaks(txt_str2, drop_whitespace = drop_whitespace)
# Write PDF
try:
# Init
c = canvas.Canvas(pdf_file)
- break_into_pages(txt_str1, c, 58, 'Summary of data checks', False, 15)
+ break_into_pages(txt_str1, c, 58, fstTitle, False, 15)
break_into_pages(txt_str2, c, 58, 'Detailed log output', True, 10)
# Save
c.save()