singe/thirdparty/openssl/tlsfuzzer/tlsfuzzer/analysis.py
2023-11-16 22:15:24 -06:00

1000 lines
38 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Jan Koscielniak, (c) 2020
# Author: Hubert Kario, (c) 2020
# Released under Gnu GPL v2.0, see LICENSE file for details
"""Analysis of timing information."""
from __future__ import print_function
import csv
import getopt
import sys
import multiprocessing as mp
import shutil
from itertools import chain
from os.path import join
from collections import namedtuple
from itertools import combinations, repeat, chain
import os
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib as mpl
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from tlsfuzzer.utils.ordered_dict import OrderedDict
TestPair = namedtuple('TestPair', 'index1 index2')
mpl.use('Agg')
VERSION = 5
_diffs = None
_DATA = None
def help_msg():
"""Print help message"""
print("""Usage: analysis [-o output]
-o output Directory where to place results (required)
and where timing.csv is located
--no-ecdf-plot Don't create the ecdf_plot.png file
--no-scatter-plot Don't create the scatter_plot.png file
--no-conf-interval-plot Don't create the conf_interval_plot.png file
--multithreaded-graph Create graph and calculate statistical tests at the
same time. Note: this increases memory usage of analysis by
a factor of 8.
--help Display this message""")
def main():
"""Process arguments and start analysis."""
output = None
ecdf_plot = True
scatter_plot = True
conf_int_plot = True
multithreaded_graph = False
argv = sys.argv[1:]
opts, args = getopt.getopt(argv, "o:",
["help", "no-ecdf-plot", "no-scatter-plot",
"no-conf-interval-plot",
"multithreaded-graph"])
for opt, arg in opts:
if opt == '-o':
output = arg
elif opt == "--help":
help_msg()
sys.exit(0)
elif opt == "--no-ecdf-plot":
ecdf_plot = False
elif opt == "--no-scatter-plot":
scatter_plot = False
elif opt == "--no-conf-interval-plot":
conf_int_plot = False
elif opt == "--multithreaded-graph":
multithreaded_graph = True
if output:
analysis = Analysis(output, ecdf_plot, scatter_plot, conf_int_plot,
multithreaded_graph)
ret = analysis.generate_report()
return ret
else:
raise ValueError("Missing -o option!")
class Analysis(object):
"""Analyse extracted timing information from csv file."""
def __init__(self, output, draw_ecdf_plot=True, draw_scatter_plot=True,
draw_conf_interval_plot=True, multithreaded_graph=False):
self.output = output
data = self.load_data()
self.class_names = list(data)
self.draw_ecdf_plot = draw_ecdf_plot
self.draw_scatter_plot = draw_scatter_plot
self.draw_conf_interval_plot = draw_conf_interval_plot
self.multithreaded_graph = multithreaded_graph
def _convert_to_binary(self):
timing_bin_path = join(self.output, "timing.bin")
timing_csv_path = join(self.output, "timing.csv")
legend_csv_path = join(self.output, "legend.csv")
timing_bin_shape_path = join(self.output, "timing.bin.shape")
if os.path.isfile(timing_bin_path) and \
os.path.isfile(legend_csv_path) and \
os.path.isfile(timing_bin_shape_path) and \
os.path.getmtime(timing_csv_path) < \
os.path.getmtime(timing_bin_path):
return
for chunk in pd.read_csv(timing_csv_path, chunksize=1,
dtype=np.float64):
self.class_names = list(chunk)
self._write_legend()
break
ncol = len(self.class_names)
rows_written = 0
# as we're dealing with 9 digits of precision (nanosecond range)
# and the responses can be assumed to take less than a second,
# we need to use the double precision IEEE floating point numbers
# load 512000 rows at a time so that we don't use more than 2000MiB
# (including pandas overhead) of memory at a time to process a file
# with 256 columns
csv_reader = pd.read_csv(timing_csv_path, chunksize=512000,
dtype=np.float64)
chunk = next(csv_reader)
timing_bin = np.memmap(timing_bin_path, dtype=np.float64,
mode="w+",
shape=(len(chunk.index), ncol),
order="C")
timing_bin[:, :] = chunk.iloc[:, :]
rows_written += len(chunk.index)
del timing_bin
for chunk in csv_reader:
timing_bin = np.memmap(timing_bin_path, dtype=np.float64,
mode="r+",
shape=(rows_written + len(chunk.index),
ncol),
order="C")
timing_bin[rows_written:, :] = chunk.iloc[:, :]
rows_written += len(chunk.index)
del timing_bin
with open(timing_bin_shape_path, "w") as f:
writer = csv.writer(f)
writer.writerow(["nrow", "ncol"])
writer.writerow([rows_written, ncol])
def load_data(self):
"""Loads data into pandas Dataframe for generating plots and stats."""
self._convert_to_binary()
timing_bin_path = join(self.output, "timing.bin")
legend_csv_path = join(self.output, "legend.csv")
timing_bin_shape_path = join(self.output, "timing.bin.shape")
with open(timing_bin_shape_path, "r") as f:
reader = csv.reader(f)
if next(reader) != ["nrow", "ncol"]:
raise ValueError("Malformed {0} file, delete it and try again"
.format(timing_bin_shape_path))
nrow, ncol = next(reader)
nrow = int(nrow)
ncol = int(ncol)
legend = pd.read_csv(legend_csv_path)
if len(legend.index) != ncol:
raise ValueError("Inconsistent {0} and {1} files, delete and try "
"again".format(legend_csv_path,
timing_bin_shape_path))
columns = list(legend.iloc[:, 1])
timing_bin = np.memmap(timing_bin_path, dtype=np.float64,
mode="r", shape=(nrow, ncol), order="C")
data = pd.DataFrame(timing_bin, columns=columns, copy=False)
return data
def _box_test(self, interval1, interval2, quantile_start, quantile_end):
"""
Internal configurable function to perform the box test.
:param int interval1: index to data representing first sample
:param int interval2: index to data representing second sample
:param float quantile_start: starting quantile of the box
:param float quantile_end: closing quantile of the box
:return: None on no difference, int index of smaller sample if there
is a difference
"""
data = self.load_data()
box1_start = np.quantile(data.iloc[:, interval1], quantile_start)
box1_end = np.quantile(data.iloc[:, interval1], quantile_end)
box2_start = np.quantile(data.iloc[:, interval2], quantile_start)
box2_end = np.quantile(data.iloc[:, interval2], quantile_end)
if box1_start == box2_start or box1_end == box2_end:
# can return early because the intervals overlap
return None
intervals = {interval1: (box1_start, box1_end),
interval2: (box2_start, box2_end)}
is_smaller = min(box1_start, box2_start) == box1_start
smaller = interval1 if is_smaller else interval2
bigger = interval2 if smaller == interval1 else interval1
if (intervals[smaller][0] < intervals[bigger][0] and
intervals[smaller][1] < intervals[bigger][0]):
return smaller, bigger
return None
def box_test(self):
"""Cross-test all classes with the box test"""
results = {}
comb = combinations(list(range(len(self.class_names))), 2)
for index1, index2 in comb:
result = self._box_test(index1, index2, 0.03, 0.04)
results[TestPair(index1, index2)] = result
return results
@staticmethod
def _wilcox_test(data1, data2):
return stats.wilcoxon(data1, data2)[1]
def wilcoxon_test(self):
"""Cross-test all classes with the Wilcoxon signed-rank test"""
return self.mt_process(self._wilcox_test)
@staticmethod
def _rel_t_test(data1, data2):
"""Calculate ttest statistic, return p-value."""
return stats.ttest_rel(data1, data2)[1]
def rel_t_test(self):
"""Cross-test all classes using the t-test for dependent, paired
samples."""
return self.mt_process(self._rel_t_test)
# skip the coverage for this method as it doesn't have conditional
# statements and is tested by mt_process() coverage (we don't see it
# because coverage can't handle multiprocessing)
def _mt_process_runner(self, params): # pragma: no cover
pair, sum_func, args = params
data = self.load_data()
index1, index2 = pair
data1 = data.iloc[:, index1]
data2 = data.iloc[:, index2]
ret = sum_func(data1, data2, *args)
return pair, ret
def mt_process(self, sum_func, args=()):
"""Calculate sum_func values for all pairs of classes in data.
Uses multiprocessing for calculation
sum_func needs to accept two parameters, the values from first
and second sample.
Returns a dictionary with keys being the pairs of values and
values being the returns from the sum_func
"""
comb = list(combinations(list(range(len(self.class_names))), 2))
job_size = max(len(comb) // os.cpu_count(), 1)
with mp.Pool() as pool:
pvals = list(pool.imap_unordered(
self._mt_process_runner,
zip(comb, repeat(sum_func), repeat(args)),
job_size))
results = dict(pvals)
return results
@staticmethod
def _sign_test(data1, data2, med, alternative):
diff = data2 - data1
return stats.binom_test([sum(diff < med), sum(diff > med)], p=0.5,
alternative=alternative)
def sign_test(self, med=0.0, alternative="two-sided"):
"""
Cross-test all classes using the sign test.
med: expected median value
alternative: the alternative hypothesis, "two-sided" by default,
can be "less" or "greater". If called with "less" and returned
p-value is much smaller than 0.05, then it's likely that the
*second* sample in a pair is bigger than the first one. IOW,
with "less" it tells the probability that second sample is smaller
than the first sample.
"""
return self.mt_process(self._sign_test, (med, alternative))
def friedman_test(self):
"""
Test all classes using Friedman chi-square test.
Note, as the scipy stats package uses a chisquare approximation, the
test results are valid only when we have more than 10 samples.
"""
data = self.load_data()
if len(self.class_names) < 3:
return 1
_, pval = stats.friedmanchisquare(
*(data.iloc[:, i] for i in range(len(self.class_names))))
return pval
def _calc_percentiles(self):
data = self.load_data()
try:
quantiles_file_name = join(self.output, ".quantiles.tmp")
shutil.copyfile(join(self.output, "timing.bin"),
quantiles_file_name)
quant_in = np.memmap(quantiles_file_name,
dtype=np.float64,
mode="r+",
shape=data.shape)
percentiles = np.quantile(quant_in,
[0.05, 0.25, 0.5, 0.75, 0.95],
overwrite_input=True,
axis=0)
percentiles = pd.DataFrame(percentiles, columns=list(data),
copy=False)
return percentiles
finally:
del quant_in
os.remove(quantiles_file_name)
def box_plot(self):
"""Generate box plot for the test classes."""
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
ax = fig.add_subplot(1, 1, 1)
data = self.load_data()
# a simpler alternative would use data.boxplot() but that
# copies the data to the mathplot object
# which means it doesn't keep it in a neat array.array, blowing up
# the memory usage significantly
# so calculate the values externally and just provide the computed
# quantiles to the boxplot drawing function
percentiles = self._calc_percentiles()
boxes = []
for name in percentiles:
vals = [i for i in percentiles.loc[:, name]]
boxes += [{'label': name,
'whislo': vals[0],
'q1': vals[1],
'med': vals[2],
'q3': vals[3],
'whishi': vals[4],
'fliers': []}]
ax.bxp(boxes, showfliers=False)
ax.set_xticks(list(range(len(data.columns)+1)))
ax.set_xticklabels([''] + list(range(len(data.columns))))
ax.set_title("Box plot")
ax.set_ylabel("Time [s]")
ax.set_xlabel("Class index")
canvas.print_figure(join(self.output, "box_plot.png"),
bbox_inches="tight")
def scatter_plot(self):
"""Generate scatter plot showing how the measurement went."""
if not self.draw_scatter_plot:
return None
data = self.load_data()
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
ax = fig.add_subplot(1, 1, 1)
ax.plot(data, ".", fillstyle='none', alpha=0.6)
ax.set_title("Scatter plot")
ax.set_ylabel("Time [s]")
ax.set_xlabel("Sample index")
ax.set_yscale("log")
self.make_legend(ax)
canvas.print_figure(join(self.output, "scatter_plot.png"),
bbox_inches="tight")
quant = np.quantile(data, [0.005, 0.95])
# make sure the quantile point is visible on the graph
quant[0] *= 0.98
quant[1] *= 1.02
ax.set_ylim(quant)
canvas.print_figure(join(self.output, "scatter_plot_zoom_in.png"),
bbox_inches="tight")
def diff_scatter_plot(self):
"""Generate scatter plot showing differences between samples."""
if not self.draw_scatter_plot:
return
data = self.load_data()
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
axes = fig.add_subplot(1, 1, 1)
classnames = iter(data)
base = next(classnames)
base_data = data.loc[:, base]
values = pd.DataFrame()
for ctr, name in enumerate(classnames, start=1):
diff = data.loc[:, name] - base_data
values["{0}-0".format(ctr)] = diff
axes.plot(values, ".", fillstyle='none', alpha=0.6)
axes.set_title("Scatter plot of class differences")
axes.set_ylabel("Time [s]")
axes.set_xlabel("Sample index")
axes.legend(values, ncol=6, loc='upper center',
bbox_to_anchor=(0.5, -0.15))
canvas.print_figure(join(self.output, "diff_scatter_plot.png"),
bbox_inches="tight")
quant = np.quantile(values, [0.25, 0.75])
quant[0] *= 0.98
quant[1] *= 1.02
axes.set_ylim(quant)
canvas.print_figure(join(self.output, "diff_scatter_plot_zoom_in.png"),
bbox_inches="tight")
def ecdf_plot(self):
"""Generate ECDF plot comparing distributions of the test classes."""
if not self.draw_ecdf_plot:
return None
data = self.load_data()
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
ax = fig.add_subplot(1, 1, 1)
for classname in data:
values = data.loc[:, classname]
levels = np.linspace(1. / len(values), 1, len(values))
ax.step(sorted(values), levels, where='post')
self.make_legend(ax)
ax.set_title("Empirical Cumulative Distribution Function")
ax.set_xlabel("Time [s]")
ax.set_ylabel("Cumulative probability")
canvas.print_figure(join(self.output, "ecdf_plot.png"),
bbox_inches="tight")
quant = np.quantile(values, [0.01, 0.95])
quant[0] *= 0.98
quant[1] *= 1.02
ax.set_xlim(quant)
canvas.print_figure(join(self.output, "ecdf_plot_zoom_in.png"),
bbox_inches="tight")
def diff_ecdf_plot(self):
"""Generate ECDF plot of differences between test classes."""
if not self.draw_ecdf_plot:
return
data = self.load_data()
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
axes = fig.add_subplot(1, 1, 1)
classnames = iter(data)
base = next(classnames)
base_data = data.loc[:, base]
# parameters for the zoomed-in graphs of ecdf
zoom_params = OrderedDict([("98", (0.01, 0.99)),
("33", (0.33, 0.66)),
("10", (0.45, 0.55))])
zoom_values = OrderedDict((name, [float("inf"), float("-inf")])
for name in zoom_params.keys())
for classname in classnames:
# calculate the ECDF
values = data.loc[:, classname]
levels = np.linspace(1. / len(values), 1, len(values))
values = sorted(values-base_data)
axes.step(values, levels, where='post')
# calculate the bounds for the zoom positions
quantiles = np.quantile(values, list(chain(*zoom_params.values())))
quantiles = iter(quantiles)
for low, high, name in \
zip(quantiles, quantiles, zoom_params.keys()):
zoom_values[name][0] = min(zoom_values[name][0], low)
zoom_values[name][1] = max(zoom_values[name][1], high)
fig.legend(list("{0}-0".format(i)
for i in range(1, len(list(values)))),
ncol=6,
loc='upper center',
bbox_to_anchor=(0.5, -0.05))
axes.set_title("Empirical Cumulative Distribution Function of "
"class differences")
axes.set_xlabel("Time")
axes.set_ylabel("Cumulative probability")
formatter = mpl.ticker.EngFormatter('s')
axes.get_xaxis().set_major_formatter(formatter)
canvas.print_figure(join(self.output, "diff_ecdf_plot.png"),
bbox_inches="tight")
# now graph progressive zooms of the central portion
for name, quantiles, values in \
zip(zoom_params.keys(), zoom_params.values(),
zoom_values.values()):
axes.set_ylim(quantiles)
# make the bounds a little weaker so that the extreme positions
# are visible of graph too
axes.set_xlim([values[0]*0.98, values[1]*1.02])
canvas.print_figure(join(self.output,
"diff_ecdf_plot_zoom_in_{0}.png"
.format(name)),
bbox_inches="tight")
def make_legend(self, fig):
"""Generate common legend for plots that need it."""
data = self.load_data()
header = list(range(len(list(data))))
fig.legend(header,
ncol=6,
loc='upper center',
bbox_to_anchor=(0.5, -0.15)
)
@staticmethod
def _cent_tend_of_random_sample(reps=100):
"""
Calculate mean, median, trimmed means (5% and 25%) and trimean with
bootstrapping.
"""
ret = []
global _diffs
diffs = _diffs
for _ in range(reps):
boot = np.random.choice(diffs, replace=True, size=len(diffs))
q1, median, q3 = np.quantile(boot, [0.25, 0.5, 0.75])
# use tuple instead of a dict because tuples are much quicker
# to instantiate
ret.append((np.mean(boot, 0),
median,
stats.trim_mean(boot, 0.05, 0),
stats.trim_mean(boot, 0.25, 0),
(q1+2*median+q3)/4))
return ret
def _bootstrap_differences(self, pair, reps=5000):
"""Return a list of bootstrapped central tendencies of differences."""
# don't pickle the diffs as they are read-only, use a global to pass
# it to workers
global _diffs
# because the samples are not independent, we calculate mean of
# differences not a difference of means
data = self.load_data()
_diffs = data.iloc[:, pair.index2] -\
data.iloc[:, pair.index1]
job_size = os.cpu_count() * 10
keys = ("mean", "median", "trim_mean_05", "trim_mean_25", "trimean")
ret = dict((k, list()) for k in keys)
with mp.Pool() as pool:
cent_tend = pool.imap_unordered(
self._cent_tend_of_random_sample,
chain(repeat(job_size, reps//job_size), [reps % job_size]))
for values in cent_tend:
# handle reps % job_size == 0
if not values:
continue
# transpose the results so that they can be added to lists
chunk = list(map(list, zip(*values)))
for key, i in zip(keys, range(5)):
ret[key].extend(chunk[i])
_diffs = None
return ret
def calc_diff_conf_int(self, pair, reps=5000, ci=0.95):
"""
Bootstrap a confidence interval for the central tendencies of
differences.
:param TestPair pair: identification of samples to calculate the
confidence interval
:param int reps: how many bootstraping repetitions to perform
:param float ci: confidence interval for the low and high estimate.
0.95, i.e. "2 sigma", by default
:return: dictionary of tuples with low estimate, estimate, and high
estimate of mean, median, trimmed mean (5% and 25%) and trimean
of differences of observations
"""
cent_tend = self._bootstrap_differences(pair, reps)
data = self.load_data()
diff = data.iloc[:, pair.index2] - data.iloc[:, pair.index1]
mean = np.mean(diff)
q1, median, q3 = np.quantile(diff, [0.25, 0.5, 0.75])
trim_mean_05 = stats.trim_mean(diff, 0.05, 0)
trim_mean_25 = stats.trim_mean(diff, 0.25, 0)
trimean = (q1 + 2*median + q3)/4
quantiles = [(1-ci)/2, 1-(1-ci)/2]
exact_values = {"mean": mean, "median": median,
"trim_mean_05": trim_mean_05,
"trim_mean_25": trim_mean_25,
"trimean": trimean}
ret = {}
for key, value in exact_values.items():
calc_quant = np.quantile(cent_tend[key], quantiles)
ret[key] = (calc_quant[0], value, calc_quant[1])
return ret
def conf_interval_plot(self):
"""Generate the confidence inteval for differences between samples."""
if not self.draw_conf_interval_plot:
return
reps = 5000
boots = {"mean": pd.DataFrame(),
"median": pd.DataFrame(),
"trim mean (5%)": pd.DataFrame(),
"trim mean (25%)": pd.DataFrame(),
"trimean": pd.DataFrame()}
for i in range(1, len(self.class_names)):
pair = TestPair(0, i)
diffs = self._bootstrap_differences(pair, reps)
boots["mean"]['{}-0'.format(i)] = diffs["mean"]
boots["median"]['{}-0'.format(i)] = diffs["median"]
boots["trim mean (5%)"]['{}-0'.format(i)] = diffs["trim_mean_05"]
boots["trim mean (25%)"]['{}-0'.format(i)] = diffs["trim_mean_25"]
boots["trimean"]['{}-0'.format(i)] = diffs["trimean"]
for name, data in boots.items():
fig = Figure(figsize=(16, 12))
canvas = FigureCanvas(fig)
ax = fig.add_subplot(1, 1, 1)
ax.violinplot(data, widths=0.7, showmeans=True, showextrema=True)
ax.set_xticks(list(range(len(data.columns)+1)))
ax.set_xticklabels([' '] + list(data.columns))
formatter = mpl.ticker.EngFormatter('s')
ax.get_yaxis().set_major_formatter(formatter)
ax.set_title("Confidence intervals for {0} of differences"
.format(name))
ax.set_xlabel("Class pairs")
ax.set_ylabel("{0} of differences".format(name))
if name == "trim mean (5%)":
name = "trim_mean_05"
elif name == "trim mean (25%)":
name = "trim_mean_25"
with open(join(self.output,
"bootstrapped_{0}.csv".format(name)),
"w") as f:
writer = csv.writer(f)
writer.writerow(data.columns)
writer.writerows(data.itertuples(index=False))
canvas.print_figure(join(self.output,
"conf_interval_plot_{0}.png"
.format(name)),
bbox_inches="tight")
def desc_stats(self):
"""Calculate the descriptive statistics for sample differences."""
data = self.load_data()
results = {}
comb = combinations(list(range(len(self.class_names))), 2)
for index1, index2, in comb:
data1 = data.iloc[:, index1]
data2 = data.iloc[:, index2]
diff = data2 - data1
diff_stats = {}
diff_stats["mean"] = np.mean(diff)
diff_stats["SD"] = np.std(diff)
quantiles = np.quantile(diff, [0.25, 0.5, 0.75])
diff_stats["median"] = quantiles[1]
diff_stats["IQR"] = quantiles[2] - quantiles[1]
diff_stats["MAD"] = stats.median_abs_deviation(diff)
results[TestPair(index1, index2)] = diff_stats
return results
@staticmethod
def _write_stats(name, low, med, high, txt_file):
txt = "{} of differences: {:.5e}s, 95% CI: {:.5e}s, {:5e}s (±{:.3e}s)"\
.format(name, med, low, high, (high-low)/2)
print(txt)
txt_file.write(txt + "\n")
def _write_individual_results(self):
"""Write results to report.csv"""
difference = 0
# create a report with statistical tests
box_results = self.box_test()
wilcox_results = self.wilcoxon_test()
sign_results = self.sign_test()
sign_less_results = self.sign_test(alternative="less")
sign_greater_results = self.sign_test(alternative="greater")
ttest_results = self.rel_t_test()
desc_stats = self.desc_stats()
report_filename = join(self.output, "report.csv")
p_vals = []
sign_p_vals = []
with open(report_filename, 'w') as file:
writer = csv.writer(file)
writer.writerow(["Class 1", "Class 2", "Box test",
"Wilcoxon signed-rank test",
"Sign test", "Sign test less",
"Sign test greater",
"paired t-test", "mean", "SD",
"median", "IQR", "MAD"])
worst_pair = None
worst_p = None
worst_median_difference = None
for pair, result in box_results.items():
index1 = pair.index1
index2 = pair.index2
diff_stats = desc_stats[pair]
box_write = "="
if result:
smaller, bigger = result
print("Box test {} vs {}: {} < {}".format(index1,
index2,
smaller,
bigger))
box_write = "<" if smaller == index1 else ">"
else:
print("Box test {} vs {}: No difference".format(index1,
index2))
print("Wilcoxon signed-rank test {} vs {}: {:.3}"
.format(index1, index2, wilcox_results[pair]))
print("Sign test {} vs {}: {:.3}"
.format(index1, index2, sign_results[pair]))
print("Sign test, probability that {1} < {0}: {2:.3}"
.format(index1, index2, sign_less_results[pair]))
print("Sign test, probability that {1} > {0}: {2:.3}"
.format(index1, index2, sign_greater_results[pair]))
if sign_results[pair] > 0.05:
sign_test_relation = "="
elif sign_less_results[pair] > sign_greater_results[pair]:
sign_test_relation = "<"
else:
sign_test_relation = ">"
print("Sign test interpretation: {} {} {}"
.format(index2, sign_test_relation, index1))
print("Dependent t-test for paired samples {} vs {}: {:.3}"
.format(index1, index2, ttest_results[pair]))
print("{} vs {} stats: mean: {:.3}, SD: {:.3}, median: {:.3}, "
"IQR: {:.3}, MAD: {:.3}".format(
index1, index2, diff_stats["mean"], diff_stats["SD"],
diff_stats["median"], diff_stats["IQR"],
diff_stats["MAD"]))
# if both tests or the sign test found a difference
# consider it a possible side-channel
if result and wilcox_results[pair] < 0.05 or \
sign_results[pair] < 0.05:
difference = 1
wilcox_p = wilcox_results[pair]
sign_p = sign_results[pair]
ttest_p = ttest_results[pair]
row = [self.class_names[index1],
self.class_names[index2],
box_write,
wilcox_p,
sign_p,
sign_less_results[pair],
sign_greater_results[pair],
ttest_p,
diff_stats["mean"],
diff_stats["SD"],
diff_stats["median"],
diff_stats["IQR"],
diff_stats["MAD"]
]
writer.writerow(row)
p_vals.append(wilcox_p)
sign_p_vals.append(sign_p)
median_difference = abs(diff_stats["median"])
if worst_pair is None or wilcox_p < worst_p or \
worst_median_difference is None or \
worst_median_difference < median_difference:
worst_pair = pair
worst_p = wilcox_p
worst_median_difference = median_difference
return difference, p_vals, sign_p_vals, worst_pair, worst_p
def _write_legend(self):
"""Write the legend.csv file."""
legend_filename = join(self.output, "legend.csv")
with open(legend_filename, "w") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['ID', 'Name'])
for num, name in enumerate(self.class_names):
writer.writerow([num, name])
def _write_sample_stats(self):
"""Write summary statistics of samples to sample_stats.csv file."""
data = self.load_data()
stats_filename = join(self.output, "sample_stats.csv")
with open(stats_filename, "w") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Name', 'mean', 'median', 'MAD'])
for num, name in enumerate(self.class_names):
sample = data.iloc[:, num]
writer.writerow([
name,
np.mean(sample),
np.median(sample),
stats.median_abs_deviation(sample)])
def _write_summary(self, difference, p_vals, sign_p_vals, worst_pair,
worst_p, friedman_p):
"""Write the report.txt file and print summary."""
report_filename = join(self.output, "report.csv")
text_report_filename = join(self.output, "report.txt")
with open(text_report_filename, 'w') as txt_file:
txt_file.write(
"tlsfuzzer analyse.py version {0} analysis\n"
.format(VERSION))
_, p = stats.kstest(p_vals, 'uniform')
txt = ("KS-test for uniformity of p-values from Wilcoxon "
"signed-rank test")
print(txt)
txt_file.write(txt)
txt_file.write('\n')
txt = "p-value: {}".format(p)
print(txt)
txt_file.write(txt)
txt_file.write('\n')
if p < 0.05:
difference = 1
_, p = stats.kstest(sign_p_vals, 'uniform')
txt = "KS-test for uniformity of p-values from sign test "
print(txt)
txt_file.write(txt)
txt_file.write('\n')
txt = "p-value: {}".format(p)
print(txt)
txt_file.write(txt)
txt_file.write('\n')
txt = ("Sign test mean p-value: {}, median p-value: {}"
.format(np.mean(sign_p_vals), np.median(sign_p_vals)))
print(txt)
txt_file.write(txt)
txt_file.write('\n')
# fail the overall test only when p-values from sign test
# are not uniform AND are skewed to the left
if p < 0.05 and np.mean(sign_p_vals) < 0.5:
difference = 1
txt = "Friedman test (chisquare approximation) for all samples"
print(txt)
txt_file.write(txt)
txt_file.write('\n')
txt = "p-value: {}".format(friedman_p)
print(txt)
txt_file.write(txt)
txt_file.write('\n')
if friedman_p < 0.05:
difference = 1
txt = "Worst pair: {}({}), {}({})".format(
worst_pair.index1,
self.class_names[worst_pair.index1],
worst_pair.index2,
self.class_names[worst_pair.index2])
print(txt)
txt_file.write(txt)
txt_file.write('\n')
diff_conf_int = self.calc_diff_conf_int(worst_pair)
# use 95% CI as that translates to 2 standard deviations, making
# it easy to estimate higher CIs
for name, key in (("Mean", "mean"), ("Median", "median"),
("Trimmed mean (5%)", "trim_mean_05"),
("Trimmed mean (25%)", "trim_mean_25"),
("Trimean", "trimean")):
self._write_stats(
name,
diff_conf_int[key][0], diff_conf_int[key][1],
diff_conf_int[key][2], txt_file)
txt = "For detailed report see {}".format(report_filename)
print(txt)
txt_file.write(txt)
txt_file.write('\n')
return difference
def _start_thread(self, method, err_desc):
"""Start a thread, wait for end with self.multithreaded_graph set."""
proc = mp.Process(target=method)
proc.start()
if not self.multithreaded_graph:
self._stop_thread(proc, err_desc)
return (proc, err_desc)
@staticmethod
def _stop_thread(proc, err_desc):
"""Wait for thread completion, raise Exception on error."""
proc.join()
if proc.exitcode != 0:
raise Exception(err_desc)
def _stop_all_threads(self, threads):
"""Wait for completion of threads, raise Exception on error."""
if not self.multithreaded_graph:
return
for proc, err_desc in threads:
self._stop_thread(proc, err_desc)
def generate_report(self):
"""
Compiles a report consisting of statistical tests and plots.
:return: int 0 if no difference was detected, 1 otherwise
"""
# plot in separate processes so that the matplotlib memory leaks are
# not cumulative, see https://stackoverflow.com/q/28516828/462370
processes = []
processes.append(
self._start_thread(self.box_plot,
"Box plot graph generation failed"))
processes.append(
self._start_thread(self.scatter_plot,
"Scatter plot graph generation failed"))
processes.append(
self._start_thread(self.ecdf_plot,
"ECDF graph generation failed"))
processes.append(
self._start_thread(self.conf_interval_plot,
"Conf interval graph generation failed"))
processes.append(
self._start_thread(self.diff_ecdf_plot,
"Generation of ECDF graph of differences "
"failed"))
processes.append(
self._start_thread(self.diff_scatter_plot,
"Generation of scatter plot of differences "
"failed"))
self._write_legend()
self._write_sample_stats()
friedman_result = self.friedman_test()
difference, p_vals, sign_p_vals, worst_pair, worst_p = \
self._write_individual_results()
difference = self._write_summary(difference, p_vals, sign_p_vals,
worst_pair,
worst_p, friedman_result)
self._stop_all_threads(processes)
return difference
# exclude from coverage as it's a). trivial, and b). not easy to test
if __name__ == '__main__': # pragma: no cover
ret = main()
print("Analysis return value: {}".format(ret))
sys.exit(ret)