initial commit

2024-01-12 20:33:08 +01:00
commit 21f2314551
1567 changed files with 3078069 additions and 0 deletions
--- a/workflows/init.py
+++ b/workflows/init.py
--- a/workflows/aggregate_metrics_av1.py
+++ b/workflows/aggregate_metrics_av1.py
@@ -0,0 +1,52 @@
+import os
+from . import res
+
+def get_encoding_time_percentage(filename, encoding_times_av1):
+    encoding_times_hevc = res.get_encoding_time_hevc(filename)
+    return round(
+        encoding_times_av1 / (encoding_times_hevc / 100),
+        2
+    )
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    aggregated_metrics = {}
+    output_file = os.path.join(
+        res.get_path_results_aggregations(),
+        res.get_filename_results_aggregations_av1()
+    )
+
+    for f in res.get_all_encoded_files_av1():
+        filename        = os.path.splitext(os.path.basename(f))[0]
+        sampleNumber    = res.get_sample_number_from_filename(filename)
+        preset          = res.get_preset_from_encode_filename(filename)
+        crf             = res.get_crf_from_encode_filename(filename)
+
+        if not preset in aggregated_metrics.keys():
+            aggregated_metrics[preset] = {}
+        if not crf in aggregated_metrics[preset].keys():
+            aggregated_metrics[preset][crf] = {}
+        if not "samples" in aggregated_metrics[preset][crf].keys():
+            aggregated_metrics[preset][crf]["samples"] = {}
+
+        aggregated_metrics[preset][crf]["samples"][sampleNumber] = {
+            "encoding_time": res.get_encoding_time_av1(filename),
+            "filesize_percentage": res.get_filesize_percentage(f),
+            "vmaf_score": res.get_vmaf_score_of_encode(filename)
+        }
+
+        aggregated_metrics[preset][crf]["samples"][sampleNumber]["encoding_time_percentage"] = get_encoding_time_percentage(
+            filename,
+            aggregated_metrics[preset][crf]["samples"][sampleNumber]["encoding_time"]
+        )
+
+    for preset in aggregated_metrics.keys():
+        for crf in aggregated_metrics[preset].keys():
+            aggregated_metrics[preset][crf] = res.aggregated_metrics(
+                aggregated_metrics[preset][crf]
+            )
+
+    res.write_dict_to_json_file(output_file, aggregated_metrics)
+        
+    
--- a/workflows/aggregate_metrics_hevc.py
+++ b/workflows/aggregate_metrics_hevc.py
@@ -0,0 +1,40 @@
+import os
+from . import res
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    aggregated_metrics = {}
+    output_file = os.path.join(
+        res.get_path_results_aggregations(),
+        res.get_filename_results_aggregations_hevc()
+    )
+
+    for f in res.get_all_encoded_files_hevc():
+        filename        = os.path.splitext(os.path.basename(f))[0]
+        sampleNumber    = res.get_sample_number_from_filename(filename)
+        preset          = res.get_preset_from_encode_filename(filename)
+        crf             = res.get_crf_from_encode_filename(filename)
+
+        if not preset in aggregated_metrics.keys():
+            aggregated_metrics[preset] = {}
+        if not crf in aggregated_metrics[preset].keys():
+            aggregated_metrics[preset][crf] = {}
+        if not "samples" in aggregated_metrics[preset][crf].keys():
+            aggregated_metrics[preset][crf]["samples"] = {}
+
+        aggregated_metrics[preset][crf]["samples"][sampleNumber] = {
+            "encoding_time": res.get_encoding_time_hevc(filename),
+            "filesize_percentage": res.get_filesize_percentage(f),
+            "vmaf_score": res.get_vmaf_score_of_encode(filename)
+        }
+
+    for preset in aggregated_metrics.keys():
+        for crf in aggregated_metrics[preset].keys():
+            aggregated_metrics[preset][crf] = res.aggregated_metrics(
+                aggregated_metrics[preset][crf]
+            )
+
+    res.write_dict_to_json_file(output_file, aggregated_metrics)
+        
+    
--- a/workflows/benchmark_av1.py
+++ b/workflows/benchmark_av1.py
@@ -0,0 +1,17 @@
+import os
+import subprocess
+from . import res
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    for f in res.get_all_encoded_files_av1():
+        if os.path.exists(res.get_filepath_metric_log(f)):
+            continue
+
+        subprocess.run(
+            res.get_benchmark_command(f),
+            shell=True,
+            check=True
+        )
+        
--- a/workflows/benchmark_hevc.py
+++ b/workflows/benchmark_hevc.py
@@ -0,0 +1,17 @@
+import os
+import subprocess
+from . import res
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    for f in res.get_all_encoded_files_hevc():
+        if os.path.exists(res.get_filepath_metric_log(f)):
+            continue
+
+        subprocess.run(
+            res.get_benchmark_command(f),
+            shell=True,
+            check=True
+        )
+        
--- a/workflows/build_readme_md.py
+++ b/workflows/build_readme_md.py
@@ -0,0 +1,76 @@
+import os
+import subprocess
+from . import res
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    # hevc diagrams
+    hevc_diagrams = res.get_diagrams_hevc()
+
+    # to complete data structure for rendering
+    data = {
+        "aggregated_metrics": {
+            "hevc": {
+                "encoding_time": {},
+                "filesize_percentage": {},
+                "vmaf_score": {}
+            }
+        },
+        "diagrams": {
+            "hevc": {
+                "encoding_time": hevc_diagrams["encoding_time"],
+                "filesize_percentage": hevc_diagrams["filesize_percentage"],
+                "vmaf_score": hevc_diagrams["vmaf_score"]
+            }
+        }
+    }
+
+    # hevc metrics
+    aggregated_metrics_hevc = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_hevc()
+        )
+    )
+    for preset in aggregated_metrics_hevc.keys():
+        for crf in aggregated_metrics_hevc[preset].keys():
+            data["aggregated_metrics"]["hevc"]["encoding_time"]["mean"] = round(
+                aggregated_metrics_hevc[preset][crf]["encoding_time"]["mean"],
+                2
+            )
+            data["aggregated_metrics"]["hevc"]["encoding_time"]["median"] = round(
+                aggregated_metrics_hevc[preset][crf]["encoding_time"]["median"],
+                2
+            )
+            data["aggregated_metrics"]["hevc"]["filesize_percentage"]["mean"] = round(
+                aggregated_metrics_hevc[preset][crf]["filesize_percentage"]["mean"],
+                2
+            )
+            data["aggregated_metrics"]["hevc"]["filesize_percentage"]["median"] = round(
+                aggregated_metrics_hevc[preset][crf]["filesize_percentage"]["median"],
+                2
+            )
+            data["aggregated_metrics"]["hevc"]["vmaf_score"]["mean"] = round(
+                aggregated_metrics_hevc[preset][crf]["vmaf_score"]["mean"],
+                2
+            )
+            data["aggregated_metrics"]["hevc"]["vmaf_score"]["median"] = round(
+                aggregated_metrics_hevc[preset][crf]["vmaf_score"]["median"],
+                2
+            )
+
+    # av1 metrics
+    data["aggregated_metrics"]["av1"] = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_av1()
+        )
+    )
+
+    # render the template
+    res.render_template(
+        os.path.join(res.get_path_templates(), "README.md.tmpl"),
+        os.path.join(res.get_path_project(), "README.md"),
+        data
+    )
--- a/workflows/encode_av1.py
+++ b/workflows/encode_av1.py
@@ -0,0 +1,79 @@
+import os
+import subprocess
+import time
+from . import res
+
+DEFAULT_PARAMS_AV1 = "tune=0"
+
+PRESETS_AV1 = [str(i) for i in range(6, 3, -1)]
+CRFS_AV1 = [str(i) for i in range(30, 9, -1)]
+
+def get_encoding_command_av1(f, preset, crf):
+    filenameSample = os.path.splitext(os.path.basename(f))[0]
+    filenameEncode = ".".join(
+        [
+            filenameSample,
+            'AV1',
+            '10Bit',
+            'Preset',
+            f'{preset}',
+            'CRF',
+            f'{crf}',
+            'mkv'
+        ]
+    )
+    filepathEncode = os.path.join(res.get_path_data_encodes_av1(), filenameEncode)
+
+    cmd_parts = [
+        'ffmpeg',
+        f'-i "{f}"',
+        '-an',
+        '-c:v libsvtav1',
+        f'-preset {preset}',
+        f'-crf {crf}',
+        f'-svtav1-params "{DEFAULT_PARAMS_AV1}"',
+        f'-pix_fmt yuv420p10le',
+        '-g 245',
+        '-y',
+        f'"{filepathEncode}"'
+    ]
+    
+    return " ".join(cmd_parts)
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    path_results_encoding_time_av1_json = os.path.join(
+        res.get_path_results_encoding_time(),
+        res.get_filename_results_encoding_time_av1()
+    )
+    results_encoding_time_av1 = res.read_dict_from_json_file(
+        path_results_encoding_time_av1_json
+    )
+
+    for preset in PRESETS_AV1:
+        if not preset in results_encoding_time_av1.keys():
+            results_encoding_time_av1[preset] = {}
+        for crf in CRFS_AV1:
+            if not crf in results_encoding_time_av1[preset].keys():
+                results_encoding_time_av1[preset][crf] = {}
+            for f in res.get_all_sample_files():
+                filename = os.path.splitext(os.path.basename(f))[0]
+                if filename in results_encoding_time_av1[preset][crf].keys():
+                    continue
+
+                time_start = time.time()
+                subprocess.run(
+                    get_encoding_command_av1(f, preset, crf),
+                    shell=True,
+                    check=True
+                )
+                time_encoding = round(time.time() - time_start)
+                
+                results_encoding_time_av1[preset][crf][filename] = time_encoding
+
+                res.write_dict_to_json_file(
+                    path_results_encoding_time_av1_json,
+                    results_encoding_time_av1
+                )
+        
--- a/workflows/encode_hevc.py
+++ b/workflows/encode_hevc.py
@@ -0,0 +1,73 @@
+import os
+import subprocess
+import time
+from . import res
+
+DEFAULT_BITDEPTH_HEVC = 10
+DEFAULT_CRF_HEVC = 21
+DEFAULT_PARAMS_HEVC = "aq-mode=1:qcomp=0.7:vbv-bufsize=9000:vbv-maxrate=9000:no-sao=1:no-strong-intra-smoothing=1:keyint=240:min-keyint=24"
+DEFAULT_PIX_FMT_HEVC = "yuv420p10le"
+DEFAULT_PRESET_HEVC = "slow"
+
+def get_encoding_command_hevc(f):
+    filenameSample = os.path.splitext(os.path.basename(f))[0]
+    filenameEncode = ".".join(
+        [
+            filenameSample,
+            'HEVC',
+            '10Bit',
+            'Preset',
+            f'{DEFAULT_PRESET_HEVC}',
+            'CRF',
+            f'{DEFAULT_CRF_HEVC}',
+            'mkv'
+        ]
+    )
+    filepathEncode = os.path.join(res.get_path_data_encodes_hevc(), filenameEncode)
+
+    cmd_parts = [
+        'ffmpeg',
+        f'-i "{f}"',
+        '-an',
+        '-c:v libx265',
+        f'-preset {DEFAULT_PRESET_HEVC}',
+        f'-crf {DEFAULT_CRF_HEVC}',
+        f'-x265-params "{DEFAULT_PARAMS_HEVC}"',
+        f'-pix_fmt {DEFAULT_PIX_FMT_HEVC}',
+        '-y',
+        f'"{filepathEncode}"'
+    ]
+    
+    return " ".join(cmd_parts)
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    path_results_encoding_time_hevc_json = os.path.join(
+        res.get_path_results_encoding_time(),
+        res.get_filename_results_encoding_time_hevc()
+    )
+    results_encoding_time_hevc = res.read_dict_from_json_file(
+        path_results_encoding_time_hevc_json
+    )
+
+    for f in res.get_all_sample_files():
+        filename = os.path.splitext(os.path.basename(f))[0]
+        if filename in results_encoding_time_hevc.keys():
+            continue
+
+        time_start = time.time()
+        subprocess.run(
+            get_encoding_command_hevc(f),
+            shell=True,
+            check=True
+        )
+        time_encoding = round(time.time() - time_start)
+        
+        results_encoding_time_hevc[filename] = time_encoding
+
+        res.write_dict_to_json_file(
+            path_results_encoding_time_hevc_json,
+            results_encoding_time_hevc
+        )
+        
--- a/workflows/identify_possible_candidates.py
+++ b/workflows/identify_possible_candidates.py
@@ -0,0 +1,86 @@
+import copy
+import os
+from . import res
+
+from pprint import pprint
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    aggregated_metrics_av1_from_file = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_av1()
+        )
+    )
+    aggregated_metrics_hevc_from_file = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_hevc()
+        )
+    )
+    
+    aggregated_metrics_hevc = {}
+    for preset, data_of_preset in aggregated_metrics_hevc_from_file.items():
+        for crf, data_of_crf in data_of_preset.items():
+            aggregated_metrics_hevc = copy.deepcopy(data_of_crf)
+            del aggregated_metrics_hevc['samples']
+
+    tolerance_filesize_in_percent = 20
+    hevc_filesize_percentage_mean = aggregated_metrics_hevc["filesize_percentage"]["mean"]
+    hevc_filesize_percentage_median = aggregated_metrics_hevc["filesize_percentage"]["median"]
+    hevc_filesize_percentage_mean_with_tolerance = aggregated_metrics_hevc["filesize_percentage"]["mean"] * (1 + tolerance_filesize_in_percent / 100)
+    hevc_filesize_percentage_median_with_tolerance = aggregated_metrics_hevc["filesize_percentage"]["median"] * (1 + tolerance_filesize_in_percent / 100)
+    hevc_vmaf_score_mean = aggregated_metrics_hevc["vmaf_score"]["mean"]
+    hevc_vmaf_score_median = aggregated_metrics_hevc["vmaf_score"]["median"]
+
+    possible_candidates = {}
+    possible_candidates_with_tolerance = {}
+    for preset, data_of_preset in aggregated_metrics_av1_from_file.items():
+        for crf, data_of_crf in data_of_preset.items():
+            viable_only_with_tolerance = False
+
+            av1_filesize_percentage_mean = data_of_crf["filesize_percentage"]["mean"]
+            av1_filesize_percentage_median = data_of_crf["filesize_percentage"]["median"]
+            av1_vmaf_score_mean = data_of_crf["vmaf_score"]["mean"]
+            av1_vmaf_score_median = data_of_crf["vmaf_score"]["median"]
+
+            if av1_filesize_percentage_mean > hevc_filesize_percentage_mean:
+                if av1_filesize_percentage_mean > hevc_filesize_percentage_mean_with_tolerance:
+                    continue
+                viable_only_with_tolerance = True
+            if av1_filesize_percentage_median > hevc_filesize_percentage_median:
+                if av1_filesize_percentage_median > hevc_filesize_percentage_median_with_tolerance:
+                    continue
+                viable_only_with_tolerance = True
+            if av1_vmaf_score_mean < hevc_vmaf_score_mean:
+                continue
+            if av1_vmaf_score_median < hevc_vmaf_score_median:
+                continue
+
+            if viable_only_with_tolerance:
+                if not preset in possible_candidates_with_tolerance.keys():
+                    possible_candidates_with_tolerance[preset] = {}
+        
+                possible_candidates_with_tolerance[preset][crf] = copy.deepcopy(data_of_crf)
+                del possible_candidates_with_tolerance[preset][crf]["samples"]
+                continue
+            
+            if not preset in possible_candidates.keys():
+                possible_candidates[preset] = {}
+    
+            possible_candidates[preset][crf] = copy.deepcopy(data_of_crf)
+            del possible_candidates[preset][crf]["samples"]
+
+
+    output_file = os.path.join(
+        res.get_path_results_candidates(),
+        res.get_filename_results_candidates_viable()
+    )
+    output_file_with_tolerance = os.path.join(
+        res.get_path_results_candidates(),
+        res.get_filename_results_candidates_viable_with_tolerance()
+    )
+
+    res.write_dict_to_json_file(output_file, possible_candidates)
+    res.write_dict_to_json_file(output_file_with_tolerance, possible_candidates_with_tolerance)
--- a/workflows/res.py
+++ b/workflows/res.py
@@ -0,0 +1,502 @@
+from jinja2 import Environment, BaseLoader
+import glob
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import re
+import statistics
+import sys
+
+def aggregated_metrics(metrics):
+    all_encoding_times              = []
+    all_encoding_time_percentages   = []
+    all_filesize_percentages        = []
+    all_vmaf_scores                 = []
+
+    for k, v in metrics["samples"].items():
+        all_encoding_times.append(v["encoding_time"])
+        all_filesize_percentages.append(v["filesize_percentage"])
+        all_vmaf_scores.append(v["vmaf_score"])
+        if "encoding_time_percentage" in v.keys():
+            all_encoding_time_percentages.append(v["encoding_time_percentage"])
+
+    metrics["encoding_time"] = {
+        "max": max(all_encoding_times),
+        "mean": int(sum(all_encoding_times) / len(all_encoding_times)),
+        "median": int(statistics.median(all_encoding_times)),
+        "min": min(all_encoding_times)
+    }
+    metrics["filesize_percentage"] = {
+        "max": max(all_filesize_percentages),
+        "mean": round(sum(all_filesize_percentages) / len(all_filesize_percentages), 2),
+        "median": round(statistics.median(all_filesize_percentages), 2),
+        "min": min(all_filesize_percentages)
+    }
+    metrics["vmaf_score"] = {
+        "max": max(all_vmaf_scores),
+        "mean": round(sum(all_vmaf_scores) / len(all_vmaf_scores), 2),
+        "median": round(statistics.median(all_vmaf_scores), 2),
+        "min": min(all_vmaf_scores)
+    }
+
+    if len(all_encoding_time_percentages) > 0:
+        metrics["encoding_time_percentage"] = {
+            "max": max(all_encoding_time_percentages),
+            "mean": round(sum(all_encoding_time_percentages) / len(all_encoding_time_percentages), 2),
+            "median": round(statistics.median(all_encoding_time_percentages), 2),
+            "min": min(all_encoding_time_percentages)
+        }
+
+    return metrics
+
+def bootstrap_folder_structure():
+    folders = [
+        get_path_data(),
+        get_path_data_encodes(),
+        get_path_data_encodes_av1(),
+        get_path_data_encodes_hevc(),
+        get_path_data_samples(),
+        get_path_results(),
+        get_path_results_aggregations(),
+        get_path_results_candidates(),
+        get_path_results_diagrams(),
+        get_path_results_encoding_time(),
+        get_path_results_metrics(),
+        get_path_templates()
+    ]
+
+    for f in folders:
+       os.makedirs(f, exist_ok=True)
+
+def generate_diagram_bars(data, title, ylabel):
+    fig, ax = plt.subplots()
+    fig.patch.set_facecolor(get_background_color_for_diagrams())
+    ax.set_facecolor(get_background_color_for_diagrams())
+
+    presets = list(data.keys())
+    crfs = list(data[presets[0]].keys())
+
+    bar_width = 0.35
+    index = np.arange(len(presets) * len(crfs))
+
+    for i, preset in enumerate(presets):
+        for j, crf in enumerate(crfs):
+            mean_value = data[preset][crf]['mean']
+            median_value = data[preset][crf]['median']
+
+            mean_bar = ax.bar(index[i * len(crfs) + j] + 1 * bar_width, mean_value, bar_width, label=f'Mean ({preset} - {crf})', color='#106daa')
+            median_bar = ax.bar(index[i * len(crfs) + j] + (1 + 1) * bar_width, median_value, bar_width, label=f'Median ({preset} - {crf})', color='#3B758C')
+
+    # styling
+    ax.set_title(title, color='white')
+    ax.set_xlabel("Preset - CRF", color='white')
+    ax.set_ylabel(ylabel, color='white')
+    ax.spines['bottom'].set_color('white')
+    ax.spines['top'].set_color('white')
+    ax.spines['right'].set_color('white')
+    ax.spines['left'].set_color('white')
+    ax.xaxis.label.set_color('white')
+    ax.yaxis.label.set_color('white')
+    ax.tick_params(axis='x', colors='white')
+    ax.tick_params(axis='y', colors='white')
+    ax.set_xticks(index + bar_width * len(presets) / 2)
+    ax.set_xticklabels([f"{preset} - {crf}" for preset in presets for crf in crfs], rotation=45, ha='right')
+
+    # legend
+    legend = ax.legend(handles=ax.containers[:2], loc='upper right')
+    frame = legend.get_frame()
+    frame.set_facecolor(get_background_color_for_diagrams())
+    frame.set_edgecolor(get_background_color_for_diagrams())
+    legend_texts = ['Mean', 'Median']
+    for i, text in enumerate(legend.get_texts()):
+        text.set_text(legend_texts[i])
+        text.set_color('white')
+    
+    return fig
+
+def generate_diagram_normal_distribution(values, title, legend_unit, xlabel):
+    fig, ax = plt.subplots()
+
+    # add histogram
+    hist = ax.hist(values, bins=20, density=True, alpha=0.6, color='#106daa', edgecolor='black')
+
+    # add normal distribution
+    mu, sigma = np.mean(values), np.std(values)
+    xmin, xmax = plt.xlim()
+    x = np.linspace(xmin, xmax, 100)
+    p = np.exp(-(x - mu)**2 / (2 * sigma**2)) / (sigma * np.sqrt(2 * np.pi))
+    ax.plot(x, p, linewidth=2, color='white')
+
+    # add mean
+    ax.axvline(x=mu, linestyle='--', label=f'Mean: {int(round(mu, 0))} {legend_unit}', color='red')
+
+    # add median
+    median_value = np.median(values)
+    ax.axvline(x=median_value, linestyle='--', label=f'Median: {int(round(median_value, 0))} {legend_unit}', color='blue')
+
+    # add min and max value
+    min_value = min(values)
+    max_value = max(values)
+    ax.axvline(x=min_value, linestyle='--', label=f'Min: {int(round(min_value, 0))} {legend_unit}', color='green')
+    ax.axvline(x=max_value, linestyle='--', label=f'Max: {int(round(max_value, 0))} {legend_unit}', color='green')
+
+    # title and labels
+    ax.set_title(title, color='white')
+    ax.set_xlabel(xlabel, color='white')
+    ax.set_ylabel("Frequency", color='white')
+
+    # legend
+    legend = ax.legend()
+    frame = legend.get_frame()
+    frame.set_facecolor(get_background_color_for_diagrams())
+    frame.set_edgecolor(get_background_color_for_diagrams())
+    for text in legend.get_texts():
+        text.set_color('white')
+
+    # styling
+    fig.patch.set_facecolor(get_background_color_for_diagrams())
+    ax.set_facecolor(get_background_color_for_diagrams())
+    ax.spines['bottom'].set_color('white')
+    ax.spines['top'].set_color('white')
+    ax.spines['right'].set_color('white')
+    ax.spines['left'].set_color('white')
+    ax.xaxis.label.set_color('white')
+    ax.yaxis.label.set_color('white')
+    ax.tick_params(axis='x', colors='white')
+    ax.tick_params(axis='y', colors='white')
+
+    return fig
+
+def get_all_diagrams():
+    samples = glob.glob(os.path.join(get_path_results_diagrams(), "*.png"))
+    samples.sort()
+    return samples
+
+def get_all_encoded_files_av1():
+    encodes = glob.glob(os.path.join(get_path_data_encodes_av1(), "*.mkv"))
+    encodes.sort()
+    return encodes
+
+def get_all_encoded_files_hevc():
+    encodes = glob.glob(os.path.join(get_path_data_encodes_hevc(), "*.mkv"))
+    encodes.sort()
+    return encodes
+
+def get_all_sample_files():
+    samples = glob.glob(os.path.join(get_path_data_samples(), "*.mkv"))
+    samples.sort()
+    return samples
+
+def get_background_color_for_diagrams():
+    return "#11171f"
+
+def get_benchmark_command(f):
+    filenameEncode = os.path.splitext(os.path.basename(f))[0]
+    
+    cmd_parts = [
+        'ffmpeg',
+        f'-i "{f}"',
+        f'-i "{get_sample_file_of_encode(filenameEncode)}"',
+        '-lavfi',
+        f'libvmaf="n_threads={os.cpu_count()}:log_fmt=json:log_path={get_filepath_metric_log(f)}"',
+        '-f',
+        'null',
+        '-'
+    ]
+    
+    return " ".join(cmd_parts)
+
+def get_diagrams_hevc():
+    diagrams = {}
+
+    for path_to_diagram in get_all_diagrams():
+        filename = os.path.splitext(os.path.basename(path_to_diagram))[0]
+        if not "hevc" in filename:
+            continue
+        if "encoding_time" in filename:
+            diagrams["encoding_time"] = f"{filename}.png"
+            continue
+        if "filesize_percentage" in filename:
+            diagrams["filesize_percentage"] = f"{filename}.png"
+            continue
+        if "vmaf_score" in filename:
+            diagrams["vmaf_score"] = f"{filename}.png"
+            continue
+
+    return diagrams
+
+def get_filepath_metric_log(f):
+    filenameEncode = os.path.splitext(os.path.basename(f))[0]
+    return os.path.join(
+        get_path_results_metrics(),
+        f"{filenameEncode}.json"
+    )
+
+def get_filesize_percentage(f):
+    filesize_encode = os.path.getsize(f)
+    filesize_sample = os.path.getsize(
+        get_sample_file_of_encode(
+            os.path.splitext(os.path.basename(f))[0]
+        )
+    )
+
+    return round((filesize_encode / filesize_sample) * 100, 2)
+
+def get_encoding_time_av1(filenameEncode):
+    path_to_file_results_encoding_time_av1 = os.path.join(
+        get_path_results_encoding_time(),
+        get_filename_results_encoding_time_av1()
+    )
+
+    encoding_times_av1 = read_dict_from_json_file(
+        path_to_file_results_encoding_time_av1
+    )
+
+    preset = get_preset_from_encode_filename(filenameEncode)
+    if not preset in encoding_times_av1.keys():
+        raise ValueError(
+            f'Missing preset "{preset}" in file: {path_to_file_results_encoding_time_av1}'
+        )
+
+    crf = get_crf_from_encode_filename(filenameEncode)
+    if not crf in encoding_times_av1[preset].keys():
+        raise ValueError(
+            f'Missing crf "{crf}" for preset "{preset}" in file: {path_to_file_results_encoding_time_av1}'
+        )
+
+    filename_sample = filenameEncode.split('.')[0]
+    if not filename_sample in encoding_times_av1[preset][crf].keys():
+        raise ValueError(
+            f'Missing sample filename "{filename_sample}" for preset "{preset}" and crf "{crf}" in file: {path_to_file_results_encoding_time_av1}'
+        )
+
+    return encoding_times_av1[preset][crf][filename_sample]
+
+def get_encoding_time_hevc(filenameEncode):
+    path_to_file_results_encoding_time_hevc = os.path.join(
+        get_path_results_encoding_time(),
+        get_filename_results_encoding_time_hevc()
+    )
+
+    encoding_times_hevc = read_dict_from_json_file(
+        path_to_file_results_encoding_time_hevc
+    )
+
+    filename_sample = filenameEncode.split('.')[0]
+    if not filename_sample in encoding_times_hevc.keys():
+        raise ValueError(
+            f'Missing key "{filename_sample}" in file: {path_to_file_results_encoding_time_hevc}'
+        )
+    
+    return encoding_times_hevc[filename_sample]
+
+def get_filename_results_aggregations_av1():
+    return "av1.json"
+
+def get_filename_results_aggregations_hevc():
+    return "hevc.json"
+
+def get_filename_results_candidates_viable():
+    return "viable.json"
+
+def get_filename_results_candidates_viable_with_tolerance():
+    return "viable_with_tolerance.json"
+
+def get_filename_results_encoding_time_av1():
+    return "av1.json"
+
+def get_filename_results_encoding_time_hevc():
+    return "hevc.json"
+
+def get_all_values_encoding_time(aggregated_metrics):
+    values = []
+
+    for k, v in aggregated_metrics["samples"].items():
+        values.append(v["encoding_time"])
+
+    return values
+
+def get_all_values_filesize_percentage(aggregated_metrics):
+    values = []
+
+    for k, v in aggregated_metrics["samples"].items():
+        values.append(v["filesize_percentage"])
+
+    return values
+
+def get_all_values_vmaf_score(aggregated_metrics):
+    values = []
+
+    for k, v in aggregated_metrics["samples"].items():
+        values.append(v["vmaf_score"])
+
+    return values
+
+def get_path_data():
+    return os.path.join(get_path_project(), "data")
+
+def get_path_data_encodes():
+    return os.path.join(get_path_data(), "encodes")
+
+def get_path_data_encodes_av1():
+    return os.path.join(get_path_data_encodes(), "av1")
+
+def get_path_data_encodes_hevc():
+    return os.path.join(get_path_data_encodes(), "hevc")
+
+def get_path_data_samples():
+    return os.path.join(get_path_data(), "samples")
+
+def get_path_project():
+    return os.path.dirname(get_path_script())
+
+def get_path_results():
+    return os.path.join(get_path_project(), "results")
+
+def get_path_results_aggregations():
+    return os.path.join(get_path_results(), "aggregations")
+
+def get_path_results_candidates():
+    return os.path.join(get_path_results(), "candidates")
+
+def get_path_results_diagrams():
+    return os.path.join(get_path_results(), "diagrams")
+
+def get_path_results_encoding_time():
+    return os.path.join(get_path_results(), "encoding_time")
+
+def get_path_results_metrics():
+    return os.path.join(get_path_results(), "metrics")
+
+def get_path_script():
+    return os.path.realpath(sys.argv[0])
+
+def get_path_templates():
+    return os.path.join(get_path_project(), "templates")
+
+def get_crf_from_encode_filename(filename):
+    match = re.compile(
+        r'.+\.Preset\.\w+\.CRF\.(\d+)'
+    ).search(filename)
+    
+    if match is None:
+        raise ValueError('Could not determine crf from filename')
+        
+    return ''.join(match.groups())
+
+def get_preset_from_encode_filename(filename):
+    match = re.compile(
+        r'.+\.Preset\.(\w+)\.CRF\..+'
+    ).search(filename)
+    
+    if match is None:
+        raise ValueError('Could not determine preset from filename')
+        
+    return ''.join(match.groups())
+
+def get_sample_file_of_encode(filename):
+    match = re.compile(
+        r'(sample\d\d)'
+    ).search(filename)
+    
+    if match is None:
+        raise ValueError('Could not determine sample from filename')
+        
+    return os.path.join(
+        get_path_data_samples(),
+        f"{''.join(match.groups())}.mkv"
+    )
+
+def get_sample_number_from_filename(filename):
+    match = re.compile(
+        r'sample(\d+)\.'
+    ).search(filename)
+    
+    if match is None:
+        raise ValueError('Could not determine sample number from filename')
+        
+    return ''.join(match.groups())
+
+def get_vmaf_score_of_encode(filename):
+    metric_file = os.path.join(
+        get_path_results_metrics(),
+        f"{filename}.json"
+    )
+
+    metrics = read_dict_from_json_file(metric_file)
+   
+    if "frames" in metrics.keys():
+        del metrics['frames']
+        write_dict_to_json_file(metric_file, metrics)
+
+    if not "pooled_metrics" in metrics.keys():
+        return 0.0
+
+    if not "vmaf" in metrics["pooled_metrics"].keys():
+        return 0.0
+
+    if not "mean" in metrics["pooled_metrics"]["vmaf"].keys():
+        return 0.0
+
+    return round(metrics["pooled_metrics"]["vmaf"]["mean"], 2)
+
+def read_dict_from_json_file(path_to_file):
+    if not os.path.exists(path_to_file):
+        write_dict_to_json_file(path_to_file, {})
+        return {}
+
+    with open(path_to_file, 'r', encoding='UTF-8') as f:
+        data_as_dict = json.load(f)
+        f.close()
+    
+    return data_as_dict
+
+def render_template(template_file_path, output_file_path, data):
+    with open(template_file_path, "r") as template_file:
+        template_content = template_file.read()
+
+    template_env = Environment(loader=BaseLoader())
+    template = template_env.from_string(template_content)
+
+    rendered_template = template.render(data)
+
+    with open(output_file_path, "w") as output_file:
+        output_file.write(rendered_template)
+        
+def save_diagram_bars(diagram, path_to_file):
+    diagram.set_size_inches(24, 18)
+    diagram.savefig(
+        path_to_file,
+        bbox_inches="tight",
+        facecolor=get_background_color_for_diagrams(),
+        dpi=300
+    )
+
+def save_diagram_normal_distribution(diagram, path_to_file):
+    diagram.set_size_inches(12, 9)
+    diagram.savefig(
+        path_to_file,
+        bbox_inches="tight",
+        facecolor=get_background_color_for_diagrams(),
+        dpi=300
+    )
+
+def sort_dict(input_dict):
+    sorted_dict = {}
+    for key, value in sorted(input_dict.items()):
+        if isinstance(value, dict):
+            sorted_dict[key] = sort_dict(value)
+        else:
+            sorted_dict[key] = value
+    return sorted_dict
+    
+def write_dict_to_json_file(path_to_file, data_as_dict):
+    with open(path_to_file, 'w', encoding='UTF-8') as f:
+        f.write(
+            json.dumps(
+                sort_dict(data_as_dict),
+                indent=4
+            )
+        )
+        f.close()
--- a/workflows/visualize_metrics_av1.py
+++ b/workflows/visualize_metrics_av1.py
@@ -0,0 +1,99 @@
+import os
+from . import res
+
+def extract_encoding_time_percentages(am):
+    extract = {}
+
+    for preset in am.keys():
+        if not preset in extract.keys():
+            extract[preset] = {}
+
+        for crf in am[preset].keys():
+            if not crf in extract[preset].keys():
+                extract[preset][crf] = {}
+
+            extract[preset][crf]["mean"] = am[preset][crf]["encoding_time_percentage"]["mean"]
+            extract[preset][crf]["median"] = am[preset][crf]["encoding_time_percentage"]["median"]
+
+    return extract
+
+def extract_filesize_percentages(am):
+    extract = {}
+
+    for preset in am.keys():
+        if not preset in extract.keys():
+            extract[preset] = {}
+
+        for crf in am[preset].keys():
+            if not crf in extract[preset].keys():
+                extract[preset][crf] = {}
+
+            extract[preset][crf]["mean"] = am[preset][crf]["filesize_percentage"]["mean"]
+            extract[preset][crf]["median"] = am[preset][crf]["filesize_percentage"]["median"]
+
+    return extract
+
+def extract_vmaf_scores(am):
+    extract = {}
+
+    for preset in am.keys():
+        if not preset in extract.keys():
+            extract[preset] = {}
+
+        for crf in am[preset].keys():
+            if not crf in extract[preset].keys():
+                extract[preset][crf] = {}
+
+            extract[preset][crf]["mean"] = am[preset][crf]["vmaf_score"]["mean"]
+            extract[preset][crf]["median"] = am[preset][crf]["vmaf_score"]["median"]
+
+    return extract
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    aggregated_metrics = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_av1()
+        )
+    )
+
+    # encoding time percentages
+    res.save_diagram_bars(
+        res.generate_diagram_bars(
+            extract_encoding_time_percentages(aggregated_metrics),
+            "AV1 encoding time percentages by Preset and CRF",
+            "Percent of the encoding time of HEVC"
+        ),
+        os.path.join(
+            res.get_path_results_diagrams(),
+            f'av1_encoding_time_percentages.png'
+        )
+    )
+
+    # filesize percentages
+    res.save_diagram_bars(
+        res.generate_diagram_bars(
+            extract_filesize_percentages(aggregated_metrics),
+            "AV1 filesize percentages by Preset and CRF",
+            "Percent of the filesize of HEVC"
+        ),
+        os.path.join(
+            res.get_path_results_diagrams(),
+            f'av1_filesize_percentages.png'
+        )
+    )
+
+    # vmaf scores
+    res.save_diagram_bars(
+        res.generate_diagram_bars(
+            extract_vmaf_scores(aggregated_metrics),
+            "AV1 VMAF scores by Preset and CRF",
+            "VMAF Score"
+        ),
+        os.path.join(
+            res.get_path_results_diagrams(),
+            f'av1_vmaf_scores.png'
+        )
+    )
--- a/workflows/visualize_metrics_hevc.py
+++ b/workflows/visualize_metrics_hevc.py
@@ -0,0 +1,62 @@
+import os
+from . import res
+
+def start_workflow():
+    res.bootstrap_folder_structure()
+
+    aggregated_metrics = res.read_dict_from_json_file(
+        os.path.join(
+            res.get_path_results_aggregations(),
+            res.get_filename_results_aggregations_hevc()
+        )
+    )
+
+    for preset in aggregated_metrics.keys():
+        for crf in aggregated_metrics[preset].keys():
+            # encoding time
+            res.save_diagram_normal_distribution(
+                res.generate_diagram_normal_distribution(
+                    res.get_all_values_encoding_time(
+                        aggregated_metrics[preset][crf]
+                    ),
+                    f"Encoding times HEVC (Preset:{preset}, CRF: {crf})",
+                    "sec",
+                    "Time (seconds)"
+                ),
+                os.path.join(
+                    res.get_path_results_diagrams(),
+                    f'hevc_preset_{preset}_crf_{crf}_encoding_time.png'
+                )
+            )
+            # filesize
+            res.save_diagram_normal_distribution(
+                res.generate_diagram_normal_distribution(
+                    res.get_all_values_filesize_percentage(
+                        aggregated_metrics[preset][crf]
+                    ),
+                    f"Filesize Percentage HEVC (Preset:{preset}, CRF: {crf})",
+                    "pct",
+                    "Percentage file size of the encode from the sample"
+                ),
+                os.path.join(
+                    res.get_path_results_diagrams(),
+                    f'hevc_preset_{preset}_crf_{crf}_filesize_percentage.png'
+                )
+            )
+            # vmaf score
+            res.save_diagram_normal_distribution(
+                res.generate_diagram_normal_distribution(
+                    res.get_all_values_vmaf_score(
+                        aggregated_metrics[preset][crf]
+                    ),
+                    f"VMAF Score HEVC (Preset:{preset}, CRF: {crf})",
+                    "pct",
+                    "Score (percent)"
+                ),
+                os.path.join(
+                    res.get_path_results_diagrams(),
+                    f'hevc_preset_{preset}_crf_{crf}_vmaf_score.png'
+                )
+            )
+
+