diff --git a/.vscode/launch.json b/.vscode/launch.json
index 91ae1bf..7fc9df2 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -7,7 +7,7 @@
             "request": "attach",
             "connect": {
                 "host": "localhost",
-                "port": "5678"
+                "port": 5678
             },
             "pathMappings": [
                 {
diff --git a/Dockerfile.development b/Dockerfile.development
index ea2fbf6..32a6153 100644
--- a/Dockerfile.development
+++ b/Dockerfile.development
@@ -12,24 +12,31 @@ ENV FLASK_APP=/dMLPA-flask/app.py
 ENV PYTHONPATH=/dMLPA-flask/dMLPA-flask
 ENV SESSION_FILE_DIR /var/local/dMLPA-flask/flask_sessions
 ENV SCRIPT_NAME=dMLPA-flask
-ENV UPLOAD_FOLDER /var/local/dMLPA-flask/uploads
-ENV FLASK_DEBUG=1 
+ENV UPLOAD_FOLDER_1 /var/local/dMLPA-flask/probe_file
+ENV UPLOAD_FOLDER_2 /var/local/dMLPA-flask/inputs
+ENV OUTPUT_FOLDER /var/local/dMLPA-flask/outputs
+ENV FLASK_DEBUG=1
 # only for development
 
 # add and install requirements
 COPY ./requirements.txt .
 RUN pip3 install -r requirements.txt
-USER 0 
+USER 0
 RUN mkdir -p /var/local/dMLPA-flask/logs/
-RUN mkdir -p /var/local/dMLPA-flask/uploads/
+RUN mkdir -p /var/local/dMLPA-flask/probe_file
+RUN mkdir -p /var/local/dMLPA-flask/inputs
+RUN mkdir -p /var/local/dMLPA-flask/outputs
 RUN mkdir -p /var/local/dMLPA-flask/flask_sessions/
 RUN chmod 777 /var/local/dMLPA-flask/logs/
-RUN chmod 777 /var/local/dMLPA-flask/uploads/
+RUN chmod 777 /var/local/dMLPA-flask/probe_file
+RUN chmod 777 /var/local/dMLPA-flask/inputs
+RUN chmod 777 /var/local/dMLPA-flask/outputs
 RUN chmod 777 /var/local/dMLPA-flask/flask_sessions/
 
 USER $CONTAINER_USER_ID
 
 RUN pip install debugpy
+RUN apt-get update && apt-get install -y wkhtmltopdf
 
 # add app
 COPY . .
diff --git a/dMLPA-flask/app.py b/dMLPA-flask/app.py
index 4c3bce0..ac1ada9 100644
--- a/dMLPA-flask/app.py
+++ b/dMLPA-flask/app.py
@@ -28,7 +28,8 @@
 from werkzeug.utils import secure_filename
 import zipfile
 
-
+import re
+import config
 import logging
 
 log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -39,15 +40,18 @@
 
 
 app = Flask(__name__)
-app.config["UPLOAD_FOLDER"] = os.environ["UPLOAD_FOLDER"]
+app.config["UPLOAD_FOLDER_1"] = os.environ["UPLOAD_FOLDER_1"]
+app.config["UPLOAD_FOLDER_2"] = os.environ["UPLOAD_FOLDER_2"]
+app.config["OUTPUT_FOLDER"] = os.environ["OUTPUT_FOLDER"]
 app.config["SECRET_KEY"] = "catchmeifyoucan"
 app.config["SESSION_TYPE"] = "filesystem"
 app.config["SESSION_PERMANENT"] = True
 app.config["SESSION_FILE_DIR"] = os.environ["SESSION_FILE_DIR"]
 app.config["UPLOAD_EXTENSIONS"] = [
     ".xlsx",
-    ".bam",
-]  # TODO: If multiple file types, add to list i.e. [".txt", ".csv", ".xlsm", ".xlsx"]
+    ".xlsm",
+    ".xls"
+]
 app.config["MAX_CONTENT_LENGTH"] = 2 * 1024 * 1024  # File has to be less than 2MB
 app.config["APPLICATION_ROOT"] = "/dMLPA-flask"
 app.config["WTF_CSRF_ENABLED"] = True
@@ -67,18 +71,28 @@ def validate_input_file(form, field):
         "xlsx",
         "xlsm",
         "xls",
-    ]  # TODO: If multiple file types, add to list i.e. ["xlsm", "xlsx"]
+    ]
     file = field.data
     if not file:
-        raise ValidationError("No file provided.")
+        raise ValidationError("No probe file provided.")
     if not file.filename:
-        raise ValidationError("No file selected.")
+        raise ValidationError("No probe file selected.")
     if not (
         "." in file.filename
         and file.filename.rsplit(".", 1)[1].lower() in allowed_extensions
     ):
         raise ValidationError(
-            f"Invalid file type for input file '{file.filename}'. Allowed types are: xlsx"  # TODO: Ammend file types as appropriate i.e. "xlsm, xlsx"
+            f"Invalid file type for probe file '{file.filename}'. "
+            "Allowed types are: xlsx, xlsm, xls"
+        )
+    file_check = re.search(config.PROBEFILE_NAME_CONVENTION, file.filename)
+    if not file_check:
+        raise ValidationError(
+            f"File {file.filename} is not a valid probe file. "
+            "File name does not match expected naming convention for a probe "
+            "file which may result in processing errors. Please provide an "
+            "appropriate probe file which matches the expected naming "
+            "convention."
         )
 
 
@@ -87,19 +101,20 @@ def validate_input_files_array(form, field):
         "xlsx",
         "xlsm",
         "xls",
-    ]  # TODO: Ammend file types as appropriate i.e. ["xlsm", "xlsx"]
+    ]
     files = field.data
     if not files:
-        raise ValidationError("No files provided.")
+        raise ValidationError("No results files provided.")
     for file in files:
         if not file.filename:
-            raise ValidationError("No files selected.")
+            raise ValidationError("No results files selected.")
         if not (
             "." in file.filename
             and file.filename.rsplit(".", 1)[1].lower() in allowed_extensions
         ):
             raise ValidationError(
-                f"Invalid file type for input files '{file.filename}'. Allowed types are: bam"  # TODO: Ammend file types as appropriate i.e. "xlsm, xlsx"
+                f"Invalid file type for results files '{file.filename}'."
+                "Allowed types are: xlsx, xlsm, xls"
             )
 
 
@@ -110,7 +125,7 @@ class ChangeForm(FlaskForm):
         render_kw={
             "accept": ".xlsm, .xlsx, .xls",
             "id": "single_file",
-        },  # TODO: If multiple file types, add to list i.e. ".xlsm, .xlsx"
+        },
     )
     multiple_array_of_files = MultipleFileField(
         "Single/Multiple Patient Data Files (Excel):",
@@ -119,7 +134,7 @@ class ChangeForm(FlaskForm):
             "accept": ".xlsm, .xlsx, .xls",
             "multiple": "True",
             "id": "multiple_array_of_files",
-        },  # TODO: If multiple file types, add to list i.e. ".xlsm, .xlsx"
+        },
     )
     submit = SubmitField("Run dMLPA-flask")
 
@@ -135,13 +150,15 @@ def form(backend_state="initial"):
             return render_template(
                 "index.html",
                 form=chgForm,
-                file_errors=chgForm.errors,
+                validation_errors=chgForm.errors,
             )
 
         # Use FileHandler() to log to a file
 
         session["timestr"] = datetime.now().strftime("%Y%m%d-%H%M%S")
-        os.mkdir(os.path.join(app.config["UPLOAD_FOLDER"], session["timestr"]))
+        os.mkdir(os.path.join(app.config["UPLOAD_FOLDER_1"], session["timestr"]))
+        os.mkdir(os.path.join(app.config["UPLOAD_FOLDER_2"], session["timestr"]))
+        os.mkdir(os.path.join(app.config["OUTPUT_FOLDER"], session["timestr"]))
         file_handler = logging.FileHandler(
             f"/var/local/dMLPA-flask/logs/dMLPA-flask_error.log"
         )
@@ -170,48 +187,61 @@ def form(backend_state="initial"):
 
         # Create the paths to the uploaded files
         input_single_file_tmp_path = os.path.join(
-            app.config["UPLOAD_FOLDER"], session["timestr"], input_single_file_basename
+            app.config["UPLOAD_FOLDER_1"], session["timestr"], input_single_file_basename
         )
 
         input_files_tmp_paths = [
-            os.path.join(app.config["UPLOAD_FOLDER"], session["timestr"], basename)
+            os.path.join(app.config["UPLOAD_FOLDER_2"], session["timestr"], basename)
             for basename in input_files_basenames
         ]
 
-        zipped_output, input_errors, input_ok_flag = run_backend(
+        # Create the path to the output folder
+        output_path = os.path.join(app.config["OUTPUT_FOLDER"], session["timestr"])
+
+        zipped_output, results_ok_flag, probe_ok_flag, probe_errors, unprocessed_files = run_backend(
             input_single_file_tmp_path,
             input_files_tmp_paths,
-            app.config["UPLOAD_FOLDER"],
+            output_path,
         )
+
+        # Establishing count of files
+        no_files_submitted = len(input_files_basenames)
+        no_files_processed = no_files_submitted - len(unprocessed_files)
+
+        # TODO add logging of any errors encountered with the probe or results flags
+
         # Save the zipped output path to the session
         session["zipped_output"] = zipped_output
 
-        if input_ok_flag == False:
+        if probe_ok_flag == False:
             return render_template(
                 "index.html",
                 form=chgForm,
-                backend_state="initial",
-                errors=input_errors,  # Pass the errors to the template
-                file_errors=chgForm.errors,
+                backend_state="failed",
+                single_file_name=input_file.filename,
+                validation_errors=chgForm.errors,
+                probe_errors=probe_errors,  # Pass the errors to the template
             )
         else:
             return render_template(
                 "index.html",
                 form=chgForm,
-                backend_state=backend_state,
+                backend_state="finished",
+                files_submitted = no_files_submitted,
+                files_processed = no_files_processed,
                 single_file_name=input_file.filename,
-                multiple_array_of_files_names=", ".join(
-                    [x.filename for x in input_files_array]
-                ),
+                multiple_array_of_files_names=input_files_basenames,
                 zipped_folder_name=zipped_output,
-                file_errors=chgForm.errors,
+                results_flag=results_ok_flag,
+                errors=unprocessed_files,
+                validation_errors=chgForm.errors,
             )
 
     return render_template(
         "index.html",
         form=chgForm,
         backend_state=backend_state,
-        file_errors=chgForm.errors,
+        validation_errors=chgForm.errors,
     )
 
 
@@ -225,7 +255,7 @@ def upload(self, file):
             secure_file_name = secure_filename(file_name)
             file.save(
                 os.path.join(
-                    app.config["UPLOAD_FOLDER"],
+                    app.config["UPLOAD_FOLDER_1"],
                     session["timestr"],
                     secure_file_name,
                 )
@@ -233,7 +263,7 @@ def upload(self, file):
 
             return str(
                 os.path.join(
-                    app.config["UPLOAD_FOLDER"],
+                    app.config["UPLOAD_FOLDER_1"],
                     session["timestr"],
                     secure_file_name,
                 )
@@ -257,7 +287,7 @@ def upload(self, files):
                 secure_file_name = secure_filename(file_name)
                 file.save(
                     os.path.join(
-                        app.config["UPLOAD_FOLDER"],
+                        app.config["UPLOAD_FOLDER_2"],
                         session["timestr"],
                         secure_file_name,
                     )
@@ -265,7 +295,7 @@ def upload(self, files):
                 file_names.append(
                     str(
                         os.path.join(
-                            app.config["UPLOAD_FOLDER"],
+                            app.config["UPLOAD_FOLDER_2"],
                             session["timestr"],
                             secure_file_name,
                         )
diff --git a/dMLPA-flask/backend.py b/dMLPA-flask/backend.py
index dc628f3..b05242c 100644
--- a/dMLPA-flask/backend.py
+++ b/dMLPA-flask/backend.py
@@ -1,6 +1,13 @@
 import os
+import re
 import zipfile
 from datetime import datetime
+from jinja2 import Environment, PackageLoader
+import pdfkit
+import pandas as pd
+import config
+from qc_data import get_test_codes, df_to_html, get_qc_status, summary_data
+from cna import split_df_column, clinical_df, copy_number_analysis, cnv_plot, pms2_plot
 
 img_versioned = os.getenv(
     "IMG_VERSIONED"
@@ -8,6 +15,17 @@
 
 
 def run_backend(single_file, array_of_files, output_directory):
+
+    # USER WARNINGS: Anything added to the string 'probe_errors' will be passed from the backend to
+    # the frontend and displayed in a warning to the user to give immediate feedback to the
+    # user about errors such as incorrect formatting of input files etc.
+    # 'unprocessed_results_files on the other hand records a list of file names that won't be processed
+    # and will be returned to the user after other files have been processed.
+    probe_file_ok_flag: bool = True
+    results_file_ok_flag: bool = True
+    probe_errors = ""
+    unprocessed_results_files: list[str] = []
+
     # Ensure the output directory exists, if not, create it
     os.makedirs(output_directory, exist_ok=True)
 
@@ -15,27 +33,156 @@ def run_backend(single_file, array_of_files, output_directory):
     timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
     zip_name = os.path.join(output_directory, f"output_files_{timestamp}.zip")
 
+    # Creating empty list of files to zip
+    files_to_zip = []
+
     # Ensure all paths are absolute
     single_file = os.path.abspath(single_file)
     array_of_files = [os.path.abspath(file) for file in array_of_files]
 
-    # Prepare the files to be zipped
-    files_to_zip: list[str] = [single_file] + array_of_files
+    # Reading probe data into dataframe and formatting
+    try:
+        probe_df = pd.read_excel(single_file, sheet_name="probe_info")
+        probe_df.rename(columns={"Exon number": "Exon"}, inplace=True)
+        probe_df = split_df_column(probe_df, "Mapview (hg38)a", config.NEW_PROBE_DF_HEADERS)
+        probe_df["chr"] = probe_df["chr"].str.replace("chr", "").str.lstrip("0")
+    except Exception: #TODO Add cusom exception when adding logging.
+        error_message = (
+            "Probe File Error: Spreadsheet headers don't match expected file format "\
+            "resultantly result files could not be processed. Confirm that an "\
+            "appropriate probe file was provided for analysis"
+        )
+        probe_file_ok_flag = False
+        return zip_name, results_file_ok_flag, probe_file_ok_flag, error_message, unprocessed_results_files
 
-    # USER WARNINGS: Anything added to the list 'input_errors' will be passed from the backend to
-    # the frontend and displayed in a warning to the user. Used to give immediate feedback to the
-    # user about errors such as incorrect formatting of input files etc.
-    input_errors: list[str] = []
-    input_ok_flag: bool = True
-
-    # TODO: Add any relevant data validation here.  Currently only checks if the file has "error_test"
-    # in the name, and if so, sends warning to frontend.  This is just a placeholder for now.
-    for file_name in files_to_zip:
-        if "error_test" in file_name:
-            input_errors, input_ok_flag = [
-                "TEST ERROR: File passed to trigger this warning for testing purposes"
-            ], False
-            break
+    # Iterating over files to generate reports
+    for file in array_of_files:
+
+        # Getting input file basenames
+        file_name = os.path.basename(file)
+        probe_file = os.path.basename(single_file)
+
+        file_check = re.search(config.FILE_NAME_CONVENTION, file_name)
+
+        if not file_check:
+            unprocessed_results_files.append(file_name)
+            results_file_ok_flag = False
+            continue
+
+        else:
+            # Reading data from results file in dataframes and formatting
+            metadata_df = pd.read_excel(file, sheet_name="Sample info")
+            raw_results_df = pd.read_excel(file, sheet_name="Gene filtered results")
+            raw_results_df.rename(columns={raw_results_df.columns[-1]:"dosage quotient"}, inplace=True)
+
+            # controlling dataypes
+            probe_df["start position"] = probe_df["start position"].astype(int)
+            probe_df["end position"] = probe_df["end position"].astype(int)
+
+            # creating results_clinical dataframe
+            results_clinical_df = clinical_df(
+                raw_results_df,
+                probe_df,
+                config.COLUMNS_RESULTS_DF,
+                config.COLUMNS_PROBE_DF
+            )
+
+            # check certain columns for any NaN entries
+            # This would indicated that for some probes in the results file
+            # the matching probe data couldn't be found in the probe file
+            columns_to_check = config.NEW_PROBE_DF_HEADERS
+            columns_with_nan = results_clinical_df[columns_to_check].columns[results_clinical_df[columns_to_check].isna().any()]
+            if not columns_with_nan.empty:
+                probe_file_ok_flag = False
+                probe_errors = (
+                    "Probe File Error: Required probe information for some probes "
+                    "could not be found in the probe information file provided. "
+                    "Confirm that an appropriate probe file was provided for analysis"
+                )
+                return zip_name, results_file_ok_flag, probe_file_ok_flag, probe_errors, unprocessed_results_files
+
+            # get list of genes in panel
+            gene_list = results_clinical_df["gene"].unique()
+
+            # perform copy number analysis
+            cnv_summary_df, marked_df = copy_number_analysis(results_clinical_df, gene_list)
+            summary_html = df_to_html(cnv_summary_df)
+
+            # generating summary dataframe with limited columns
+            summary_data_df = summary_data(marked_df)
+
+            # creating empty dictionaries
+            summary_tables = {}
+            cnv_detail = {}
+
+            for gene in gene_list:
+                # creating cnv summary data by gene
+                summary_data_gene_df =  summary_data_df[summary_data_df["gene"] == gene]
+                summary_data_gene_df.columns = summary_data_gene_df.columns.str.replace("_", " ").str.title()
+                summary_tables[gene] = df_to_html(summary_data_gene_df)
+
+                if not cnv_summary_df.empty:
+                    # generating details of cnv by impacted gene if cnv's are detected
+                    impacted_genes = cnv_summary_df["Gene"].unique()
+                    if gene in impacted_genes:
+                        # plotting gene data
+                        results_gene_df = marked_df[marked_df["gene"] == gene]
+                        if gene == "PMS2":
+                            gene_plot = pms2_plot(results_gene_df, gene)
+                        else:
+                            gene_plot = cnv_plot(results_gene_df, gene)
+
+                        # creating table of probes involved in CNV
+                        # impacted_probes_df = summary_data_gene_df.dropna(subset=["Variant"])
+                        impacted_probes_df = summary_data_gene_df.loc[summary_data_gene_df["Variant"] != ""]
+                        impacted_probes_df = (impacted_probes_df.sort_values(by="Start Position"))
+                        impacted_probes_html = df_to_html(impacted_probes_df)
+
+                        # saving to cnv_detail dict.
+                        cnv_detail[gene] = [gene_plot, impacted_probes_html]
+
+            # getting panel number and qc data from metadata
+            pan_number, r_code = get_test_codes(metadata_df)
+            qc_status = get_qc_status(metadata_df)
+            qc_data = df_to_html(metadata_df)
+
+            # Creating ouput file name and path
+            # Input file name of results file is determined in GitHub repository:
+            # https://github.com/moka-guys/digital_mlpa
+            report = file_name.replace("_filtered_results.xlsx", ".pdf") \
+                            .replace("_filtered_results.xls", ".pdf")
+            report_path = os.path.join(output_directory, report)
+
+            # Rendering template and saving as pdf
+            env = Environment(loader=PackageLoader("dMLPA-flask", "templates"))
+            template = env.get_template("report.html")
+            data_values = {
+                "app_version": config.DMLPA_FLASK_VERSION,
+                "production": config.RELEASED_TO_PRODUCTION,
+                "file_name": report,
+                "r_code": r_code,
+                "pan_number": pan_number,
+                "genes": gene_list,
+                "run_id": "", # TODO to be added once file name is confirmed,
+                "genome_build": config.GENOME_BUILD,
+                "patient_file": file_name,
+                "probe_file": probe_file,
+                "qc_status": qc_status,
+                "cnv_summary": summary_html,
+                "cnc_comment": config.CNC_CATEGORIES_DISCLAIMER,
+                "pms2_comment": config.PMS2_DISCLAIMER,
+                "cnv_detail": cnv_detail,
+                "report_notes": config.REPORTING_NOTE,
+                "qc_table": qc_data,
+                "summary_tables": summary_tables
+            }
+
+            html_report = template.render(data_values)
+
+            pdfkit.from_string(html_report, report_path)
+
+            # Add report to list of files to be zipped
+            files_to_zip.append(report_path)
 
     # TODO: Add any relevant data processing here.  Currently just copies the input files to the
     # output directory.  This is just a placeholder for now.
@@ -52,4 +199,4 @@ def run_backend(single_file, array_of_files, output_directory):
                     f"File {file} does not exist."
                 )  # Consider replacing with proper logging
 
-    return zip_name, input_errors, input_ok_flag
+    return zip_name, results_file_ok_flag, probe_file_ok_flag, probe_errors, unprocessed_results_files
diff --git a/dMLPA-flask/cna.py b/dMLPA-flask/cna.py
new file mode 100644
index 0000000..8bea129
--- /dev/null
+++ b/dMLPA-flask/cna.py
@@ -0,0 +1,322 @@
+"""
+Module for conducting copy number analysis on pan filtered digital
+MLPA output data.
+
+This module provides functions for merging the probe data with
+the results data (which is necessary to return the desired output
+data), manipulating the relevant dataframes, creating copy number
+summary and detailed copy number reports.
+
+"""
+import io
+import base64
+import pandas as pd
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import config
+
+def shared_columns(df1, df2):
+    """Identify shared dataframe columns"""
+
+    # streamline column formatting
+    df1.columns = df1.columns.str.replace("_", " ").str.title()
+    df2.columns = df2.columns.str.replace("_", " ").str.title()
+
+    # identifying shared columns
+    shared_columns = list(set(df1) & set(df2))
+
+    return shared_columns
+
+def split_df_column(df, column, headers):
+    """
+    Split dataframe columns based on set regular expression
+
+    Arguments:
+    - df (Pandas Dataframe)
+    - column (str): name of existing column
+    - headers (list): list of new column headers
+
+    Return:
+    - df (Pandas Dataframe): updated dataframe
+    """
+    df[headers] = df[column].str.split(r"[:\-]", expand=True)
+    return df
+
+def merge_df(df1, df2, df1_columns, df2_columns):
+    """Left join databases based on shared columns
+
+    Arguments:
+    - df1 (Pandas Dataframe): 'left' database in the merge.
+    Data of this database will be maintained for the 'shared
+    columns'
+    - df2 (Pandas Dataframe): 'right' database. Only Data in
+    the unique df2_columns will be maintained.
+    - df1_columns (list): Specific unique columns from df1 that
+    must be maintained
+    - df2_columns (list): Specific unique columns from df2 that
+    must be maintained
+
+    Return:
+    - merged_df (Pandas Dataframe)
+
+    """
+
+    # streamline column formatting
+    df1.columns = df1.columns.str.replace("_", " ").str.lower()
+    df2.columns = df2.columns.str.replace("_", " ").str.lower()
+
+    # identifying shared columns
+    shared_columns = list(set(df1) & set(df2))
+
+    # left merge
+    merged_df = df1[shared_columns + df1_columns].merge(
+        df2[df2_columns],
+        how = "left",
+        on = "probe number" # can be changed based on desired column
+    )
+
+    return merged_df
+
+def clinical_df(df1, df2, df1_columns, df2_columns):
+    """
+    Create dataframe of clinical relevant MLPA probes
+
+    Calls merge_df (function) to left-join probe and results
+    dataframe.
+
+    Arguments:
+    - df1 (Pandas Dataframe): 'left' database in the merge.
+    Data of this database will be maintained for the 'shared
+    columns'
+    - df2 (Pandas Dataframe): 'right' database. Only Data in
+    the unique df2_columns will be maintained.
+    - df1_columns (list): Specific unique columns from df1 that
+    must be maintained
+    - df2_columns (list): Specific unique columns from df2 that
+    must be maintained
+
+    Return:
+    - clinical_df (Pandas Dataframe)
+    """
+
+    merged_df =  merge_df(df1, df2, df1_columns, df2_columns)
+
+    # restructuring df based on column order
+    merged_df = merged_df[config.DESIRED_COLUMN_ORDER]
+
+    # Filtering results by probetype
+    results_clinical_df = merged_df[merged_df["probe type"] != "CTRL"]
+
+    return results_clinical_df
+
+def copy_number_analysis(df, gene_list):
+    """
+    TODO function string needed.
+    Currently set up to analyse for all genes in run.
+    Might need to be broken down more in future
+
+    """
+    # Empty dataframe for all identified copy number variations
+    cnv_summary = pd.DataFrame()
+
+    # Create 'variant' column with object dtype
+    df['variant'] = ""  # Initialized with NaN values compatible with object dtype
+    df["variant"] = df["variant"].astype(object) #controls expected type and prevents FutureError observed in manual testing
+
+    # TODO This is mostly from chatGPT - Need to understand what it's actually doing
+    for gene in gene_list:
+
+        # Get results per gene
+        results_gene =  df[df["gene"] == gene]
+
+        if gene == "PMS2":
+            cnc_categories = config.PMS2_CATEGORIES
+        else:
+            cnc_categories = config.CNC_CATEGORIES
+        # iterate through CN categories and identify matching probe entries
+        for direction, parameters in cnc_categories.items():
+            if direction != "Normal":
+                # print(direction, gene) # TODO Replace with logging
+                cnv_rows = np.where((parameters[0] <= results_gene["dosage quotient"]) &
+                        (results_gene["dosage quotient"] <= parameters[1]))[0]
+
+                # If no probe entries match CN category skipping next steps
+                if len(cnv_rows) == 0:
+                    continue
+
+                # If probe entries match CN category, identify uniform CN segments
+                else:
+                    # gather information for appropriate probe entries
+                    cnv_data = results_gene.iloc[cnv_rows]
+                    # Assess based on probe order if probes are contiguous
+                    cnv_groups = np.cumsum(np.concatenate(([1], np.diff(cnv_data["probe order"]) != 1)))
+
+                    # Splitting results per contiguous CN segment if multiple
+                    cnv_list = {cnv_index: group for cnv_index, group in cnv_data.groupby(cnv_groups)}
+
+                    # Processing data of individual CN segments
+                    for cnv_index, cnv_lines in cnv_list.items():
+                        cnv_lines = pd.DataFrame(cnv_lines)
+
+                        # Update the input DataFrame by adding a new column containing the CN category
+                        probe_numbers = cnv_lines["probe number"]
+                        for probe_number in probe_numbers:
+                            df.loc[df["probe number"] == probe_number, 'variant'] = direction
+
+
+                        # Obtainins summary data of contiguous CN segments
+                        start_exon = cnv_lines["exon"].min()
+                        end_exon = cnv_lines["exon"].max()
+                        start_pos = cnv_lines["start position"].min()
+                        end_pos = cnv_lines["end position"].max()
+
+                        # Calculate average dosage quotient across CN segment and standard deviation
+                        copy_change_mean = round(cnv_lines["dosage quotient"].mean(), 3)
+                        copy_change_sd = round(cnv_lines["dosage quotient"].std(), 3)
+
+                        # Calculating Estimated Copy number for CN segment. Autosomal Logic 2*1=2; 2*0.5=1; 2*1.5=3
+                        # TODO Does datatype need to be considered before calculation?
+                        if "Ref to Scientist" in direction:
+                            copy_number_est = "-"
+                        else:
+                            copy_number_est = round(np.mean(cnv_lines["normal copy number"] * cnv_lines["dosage quotient"]))
+
+
+                        # summarize CN data into a df entry
+                        cnv_aggregate = pd.DataFrame({
+                            "Gene": [gene],
+                            "Variant": [direction],
+                            "Exon(s)": f"{start_exon} - {end_exon}" if start_exon != end_exon else str(start_exon),
+                            "Chr": cnv_lines["chr"].unique(),
+                            "Probe Co-ordinates": f"{start_pos}-{end_pos}",
+                            "Normal Copy Number": int(cnv_lines["normal copy number"].unique()), #Problem if there is more than one potential copy number?
+                            "Mean Dosage Quotient": f"{copy_change_mean}+/-{copy_change_sd}",
+                            "Est. Copy Number": [copy_number_est],
+                            "Supporting Probes": [len(cnv_lines)]
+                        })
+
+                        # Adding df entry into existing CNA_summary dataframe
+                        # TODO concat method is not ideal apparently. Consider alternative
+                        cnv_summary = pd.concat([cnv_summary,cnv_aggregate], ignore_index=True)
+
+                        # organize summary by genomic co-ordinates
+                        cnv_summary = cnv_summary.sort_values(by="Probe Co-ordinates")
+
+    # returning output data to backend.py
+    return cnv_summary, df
+
+def cnv_plot(df, gene, extend=1000):
+    # create copy of dataframe
+    cnv_copy = df.copy()
+
+    # pull specific data from columns
+    start = min(cnv_copy["start position"]) - extend
+    end = max(cnv_copy["end position"]) + extend
+
+    # determining y-axis limit for DQ plot
+    ylim = (0, max(2.0, 1.25*cnv_copy["dosage quotient"].max()))
+
+    #setting up plot layout
+    plt.figure(figsize=(10,4))
+
+    # setting axis
+    plt.xlim(start,end)
+    plt.ylim(*ylim)
+    plt.tick_params(axis='both', labelsize=8)
+
+    # setting plot labels
+    plt.ylabel("Dosage Quotient", fontdict={'family': 'Sans', 'size': 8})
+    plt.xlabel("Co-ordinates", fontdict={'family': 'Sans', 'size': 8})
+    plt.title(gene, fontdict={'family': 'Sans', 'size': 10})
+
+    # formatting x-axis ticks
+    # but this is from chatGPT and i need to question if this is the best way to do this.
+    plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:d}'.format(int(x))))
+
+    # adding shading to define ranges for normal, hetdel and hetdup
+    cnc_categories = config.CNC_CATEGORIES
+    plt.axhspan(cnc_categories["Normal"][0], cnc_categories["Normal"][1], facecolor = "#00FF0044", edgecolor = None)
+    plt.axhspan(cnc_categories["HetDel"][0], cnc_categories["HetDel"][1], facecolor = "#FF000044", edgecolor = None)
+    plt.axhspan(cnc_categories["HetDup"][0], cnc_categories["HetDup"][1], facecolor = "#318CE744", edgecolor = None)
+
+    # plot DQ value
+    plt.plot(cnv_copy["start position"], cnv_copy["dosage quotient"], marker = "x", linestyle = "", color = "black")
+
+    # plt.show()
+
+    # Save plot as image
+    img = io.BytesIO()
+    plt.savefig(img, format="svg")
+    img.seek(0)
+
+    # Encode SVG image as Base64
+    img_base64 = base64.b64encode(img.getvalue()).decode()
+
+    plt.close()
+
+    return img_base64
+
+def pms2_plot(df, gene, extend=1000):
+    # create copy of dataframe
+    cnv_copy = df.copy()
+
+    # pull specific data from columns
+    start = min(cnv_copy["start position"]) - extend
+    end = max(cnv_copy["end position"]) + extend
+
+    # determining y-axis limit for DQ plot
+    ylim = (0, max(2.0, 1.25*cnv_copy["dosage quotient"].max()))
+
+    #setting up plot layout
+    plt.figure(figsize=(10,4))
+
+    # setting axis
+    plt.xlim(start,end)
+    plt.ylim(*ylim)
+    plt.tick_params(axis='both', labelsize=8)
+
+    # setting plot labels
+    plt.ylabel("Dosage Quotient", fontdict={'family': 'Sans', 'size': 8})
+    plt.xlabel("Co-ordinates", fontdict={'family': 'Sans', 'size': 8})
+    plt.title(gene, fontdict={'family': 'Sans', 'size': 10})
+
+    # formatting x-axis ticks
+    # but this is from chatGPT and i need to question if this is the best way to do this.
+    plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:d}'.format(int(x))))
+
+    # adding shading to define ranges for normal, hetdel and hetdup
+    cnc_categories = config.PMS2_GRAPH_CATEGORIES
+    plt.axhspan(cnc_categories["Normal"][0], cnc_categories["Normal"][1], facecolor = "#00FF0044", edgecolor = None)
+    plt.axhspan(cnc_categories["Del2"][0], cnc_categories["Del2"][1], facecolor = "#FF000044", edgecolor = None)
+    plt.axhspan(cnc_categories["Del1"][0], cnc_categories["Del1"][1], facecolor = "#FF7F7F44", edgecolor = None)
+    plt.axhspan(cnc_categories["Dup1"][0], cnc_categories["Dup1"][1], facecolor = "#98C5F144", edgecolor = None)
+    plt.axhspan(cnc_categories["Dup2"][0], cnc_categories["Dup2"][1], facecolor = "#318CE744", edgecolor = None)
+
+    # adding boundary marker between PMS2 exon 11 and exon 12
+    plt.axvline(
+        x=config.PMS2_PMS2CL_BOUNDARY,
+        # ymin = cnc_categories["Del2"][0],
+        # ymax = cnc_categories["Dup2"][1],
+        color='black',
+        linestyle = '--',
+        lw = 0.75
+    )
+
+    # plot DQ value
+    plt.plot(cnv_copy["start position"], cnv_copy["dosage quotient"], marker = "x", linestyle = "", color = "black")
+
+    # plt.show()
+
+    # Save plot as image
+    img = io.BytesIO()
+    plt.savefig(img, format="svg")
+    img.seek(0)
+
+    # Encode SVG image as Base64
+    img_base64 = base64.b64encode(img.getvalue()).decode()
+
+    plt.close()
+
+    return img_base64
\ No newline at end of file
diff --git a/dMLPA-flask/config.py b/dMLPA-flask/config.py
index e69de29..65be8dd 100644
--- a/dMLPA-flask/config.py
+++ b/dMLPA-flask/config.py
@@ -0,0 +1,140 @@
+
+# config.py
+
+# TODO Add config docstring
+
+''' ==================== Production release constants ==================== '''
+
+DMLPA_FLASK_VERSION = "v1.0.0"
+
+RELEASED_TO_PRODUCTION = False
+
+''' ==================== app.py constants ==================== '''
+
+PROBEFILE_NAME_CONVENTION = "PIF_D001-[a-zA-Z][0-9]" #TODO to be confirmed.
+
+''' ==================== backend.py constants ==================== '''
+
+# TODO to be confirmed
+FILE_NAME_CONVENTION = "BP01-[0-9]+-dMLPA_R[0-9]+v[0-9]_Pan[0-9]+_[0-9]+_[a-zA-Z]+_[a-zA-Z]+[0-9]+_filtered_results.xlsx"
+
+GENOME_BUILD = "GRCh38"
+
+NEW_PROBE_DF_HEADERS = [
+    "chr",
+    "start position",
+    "end position"
+]
+
+COLUMNS_RESULTS_DF = [
+    "dosage quotient"
+]
+
+COLUMNS_PROBE_DF = [
+    "probe number",
+    "chr",
+    "start position",
+    "end position",
+]
+
+REPORTING_NOTE = {
+    # Generic message
+    "Ref to Scientist": (
+        "Ref to Scientist: The dosage quotient for one or "
+        "more probes falls outside the typical ranges used "
+        "to evaluate copy number. Scientist review required"
+    ),
+}
+
+CNC_CATEGORIES_DISCLAIMER = """The following DQ ranges are used to \
+    assess copy number in most genes: Normal Copy Number 0.8-1.2; \
+    Heterzygous Copy Number Loss 0.4-0.6; Heterzygous Copy Number \
+    Gain 1.4-16."""
+
+PMS2_DISCLAIMER = """OF NOTE, analysis of copy number variation in \
+    PMS2 is complicated by the pseudogene PMS2CL. The following \
+    alternative DQ ranges have been used to assess copy number as \
+    a result: Normal Copy Number 0.86-1.14; Potential Copy Number Loss \
+    0.4-0.84; Potential Copy Number Gain: 1.16-1.6. Scientist Review \
+    is Recommended for all PMS2 Cases."""
+
+''' ==================== cna.py constants ==================== '''
+
+DESIRED_COLUMN_ORDER = [
+    "probe order",
+    "probe number",
+    "reference probe",
+    "chr",
+    "gene",
+    "exon",
+    "start position",
+    "end position",
+    "normal copy number",
+    "dosage quotient",
+    "probe type",
+    "additional information",
+]
+
+CNC_CATEGORIES = {
+    # TODO How does code handle MDQ that is 3 dp in the excel
+    "Ref to Scientist 1*": [0, 0.399],
+    "HetDel": [0.400, 0.600],  # theoretical value 0.5, represents CN 1
+    "Ref to Scientist 2*": [0.601, 0.799],
+    "Normal": [0.800, 1.200],  # theoretical value 1, represents CN 2
+    "Ref to Scientist 3*": [1.201, 1.399],
+    "HetDup": [1.400, 1.600],  # theoretical value 1.5, represents CN 3
+    "Ref to Scientist 4*": [1.601, float("Inf")]
+}
+
+PMS2_CATEGORIES = {
+    # TODO Stake holders to confirm thresholds for this
+    "Ref to Scientist 1*": [0, 0.399],
+    "Potential PMS2 CN loss": [0.40, 0.840],
+    "Ref to Scientist 2*": [0.841, 0.859],
+    "Normal": [0.860, 1.140],  # theoretical value 1
+    "Ref to Scientist 3*": [1.141, 1.159],
+    "Potential PMS2 CN gain": [1.160, 1.600],
+    "Ref to Scientist 4*": [1.601, float("Inf")]
+}
+
+PMS2_GRAPH_CATEGORIES = {
+    # TODO Stake holders to confirm thresholds for this
+    "Del2": [0.4, 0.60, "#FF000044"], # theortical value of 0.5
+    "Del1": [0.65, 0.84, "#FF7F7F44"],  # theoretical value 0.75
+    "Normal": [0.86, 1.14, "#00FF0044"],  # theoretical value 1
+    "Dup1": [1.16, 1.35, "#98C5F144"],  # theoretical value 1.25,
+    "Dup2": [1.4, 1.6, "#318CE744"] # theoretical value of 1.5
+}
+
+PMS2_CATEGORIES_PREVIOUS = {
+    # TODO Stake holders to confirm thresholds for this
+    "Ref to Scientist 5*": [0, 0.39],
+    "PMS2 CN loss": [0.4, 0.60], # theortical value of 0.5
+    "Ref to Scientist 6*": [0.61, 0.64],
+    "Potential PMS2 CN loss": [0.65, 0.84],  # theoretical value 0.75
+    "Ref to Scientist 7*": [0.85, 0.85],
+    "Normal": [0.86, 1.14],  # theoretical value 1
+    "Ref to Scientist 8*": [1.15, 1.15],
+    "Potential PMS2 CN gain": [1.16, 1.35],  # theoretical value 1.25,
+    "Ref to Scientist 9*": [1.36, 1.39],
+    "PMS2 CN gain": [1.4, 1.6],
+    "Ref to Scientist 10*": [1.61, float("Inf")]
+}
+
+PMS2_PMS2CL_BOUNDARY = 5983991 # 1000bp into the intron proximal to the
+    # start of exon 12 (i.e. intron 11-12). Based on transcript
+    # NM_000535.7 build GRCH38.p14
+
+''' ==================== qc_data.py constants ==================== '''
+
+SUMMARY_DATA_COLUMNS = [
+    "probe number",
+    "chr",
+    "gene",
+    "exon",
+    "start position",
+    "end position",
+    "normal copy number",
+    "dosage quotient",
+    "variant"
+]
diff --git a/dMLPA-flask/notes.py b/dMLPA-flask/notes.py
new file mode 100644
index 0000000..81573e6
--- /dev/null
+++ b/dMLPA-flask/notes.py
@@ -0,0 +1,152 @@
+import pandas as pd
+import numpy as np
+from cna import split_df_column, merge_df, clinical_df, copy_number_analysis, cnv_plot
+from qc_data import summary_data, df_to_html
+import config
+
+from jinja2 import Environment, PackageLoader
+import pdfkit
+
+# Test files to test script
+# xls = "/home/epayne1/Documents/GSTT/test_input/BP01-16-dMLPA_R210v1_Pan5025_218513_SURNAME_Unaffected_filtered_results.xlsx""
+xls = "/home/epayne1/Documents/GSTT/test_input/BP01-16-dMLPA_R210v1_Pan5025_218513_SURNAME_Firstname16_filtered_results.xlsx"
+probe = "/home/epayne1/Documents/GSTT/probe_file/PIF_D001-C1_Hereditary_Cancer_Panel_1-v02_modB1probe.xlsx"
+
+# Reading data in dataframes
+metadata_df = pd.read_excel(xls, sheet_name="Sample info")
+raw_results_df = pd.read_excel(xls, sheet_name="Gene filtered results")
+probe_df = pd.read_excel(probe, sheet_name="probe_info")
+
+# Defining new column names
+probe_column_headers = [
+    "chr",
+    "start position",
+    "end position"
+]
+
+# hard manipulation/formatting of df's
+probe_df.rename(columns={"Exon number": "Exon"}, inplace=True)
+probe_df = split_df_column(probe_df, "Mapview (hg38)a", probe_column_headers)
+probe_df["chr"] = probe_df["chr"].str.replace("chr", "").str.lstrip("0")
+raw_results_df.rename(columns={raw_results_df.columns[-1]:"dosage quotient"}, inplace=True)
+
+# creating results_clinical dataframe
+results_clinical_df = clinical_df(
+            raw_results_df,
+            probe_df,
+            config.COLUMNS_RESULTS_DF,
+            config.COLUMNS_PROBE_DF
+        )
+
+results_clinical_df["start position"] = results_clinical_df["start position"].astype(int)
+results_clinical_df["end position"] = results_clinical_df["end position"].astype(int)
+# results_clinical_df.to_excel("merged_excel.xlsx")
+
+# get list of genes
+gene_list = results_clinical_df["gene"].unique()
+
+# perform copy number analysis
+cnv_summary_df, marked_df = copy_number_analysis(results_clinical_df, gene_list)
+# marked_df.to_excel("marked_df.xlsx")
+
+
+# obtaining summary data per gene for appendix
+summary_data_df = summary_data(marked_df)
+# print(summary_data_df)
+
+summary_tables = {}
+cnv_detail = {}
+for gene in gene_list:
+    # summary data by gene
+    summary_data_gene =  summary_data_df[summary_data_df["gene"] == gene]
+    # print(summary_data_gene)
+    summary_data_gene.columns = summary_data_gene.columns.str.replace("_", " ").str.title()
+    summary_tables[gene] = df_to_html(summary_data_gene)
+    # summary_tables[gene] = gene_summary_data(gene, summary_data_df)
+
+    print(cnv_summary_df)
+
+    if not cnv_summary_df.empty:
+        # cnv_detail by impacted gene
+        impacted_genes = cnv_summary_df["Gene"].unique()
+        if gene not in impacted_genes:
+            continue
+        else:
+            results_gene = marked_df[marked_df["gene"] == gene]
+            gene_plot = cnv_plot(results_gene, gene)
+            impacted_probes = summary_data_gene.dropna(subset=["Variant"])
+            impacted_probes = summary_data_gene.loc[summary_data_gene["Variant"] != ""]
+            impacted_probes = (impacted_probes.sort_values(by="Start Position"))
+            print(impacted_probes)
+
+            impacted_probes_html = df_to_html(impacted_probes)
+
+            # saving to cnv_detail dict.
+            cnv_detail[gene] = [gene_plot, impacted_probes_html]
+
+# env = Environment(loader=PackageLoader("dMLPA-flask", "templates"))
+# template = env.get_template("report.html")
+# data_values = {
+#     "file_name": "report.pdf",
+#     # "r_code": r_code,
+#     "pan_number": "pan_number",
+#     "genes": gene_list,
+#     "patient_file": xls,
+#     "probe_file": probe,
+#     "qc_status": "qc_status",
+#     "cnv_summary": "summary_html",
+#     "report_notes": "report_notes",
+#     "qc_table": "passed",
+#     "summary_tables": "summary_tables",
+#     "gene_plots": gene_plots
+# }
+
+# html_report = template.render(data_values)
+
+# pdfkit.from_string(html_report)
+
+""" NOTES ON PLOT ANNOTATION """
+
+def get_text_positions(text, x_data, y_data, txt_width, txt_height):
+    a = zip(y_data, x_data)
+    text_positions = list(y_data)
+    for index, (y, x) in enumerate(a):
+        local_text_positions = [i for i in a if i[0] > (y - txt_height)
+                            and (abs(i[1] - x) < txt_width * 2) and i != (y,x)]
+        if local_text_positions:
+            sorted_ltp = sorted(local_text_positions)
+            if abs(sorted_ltp[0][0] - y) < txt_height: #True == collision
+                differ = np.diff(sorted_ltp, axis=0)
+                a[index] = (sorted_ltp[-1][0] + txt_height, a[index][1])
+                text_positions[index] = sorted_ltp[-1][0] + txt_height*1.01
+                for k, (j, m) in enumerate(differ):
+                    #j is the vertical distance between words
+                    if j > txt_height * 2: #if True then room to fit a word in
+                        a[index] = (sorted_ltp[k][0] + txt_height, a[index][1])
+                        text_positions[index] = sorted_ltp[k][0] + txt_height
+                        break
+    return text_positions
+
+def text_plotter(text, x_data, y_data, text_positions, txt_width,txt_height):
+    for z,x,y,t in zip(text, x_data, y_data, text_positions):
+        plt.annotate(str(z), xy=(x-txt_width/2, t), size=12)
+        if y != t:
+            plt.arrow(x, t,0,y-t, color='red',alpha=0.3, width=txt_width*0.1,
+                head_width=txt_width, head_length=txt_height*0.5,
+                zorder=0,length_includes_head=True)
+
+# Pulled from function
+    # # Annotate probes within het_del_dq_range or het_dup_dq_range with probe numbers
+    # for index, row in cnv_copy.iterrows():
+    #     dq_value = row["dosage quotient"]
+    #     position = row["start position"]
+    #     probe_number = row["probe number"]
+
+    #     if dq_value < normal_dq_range[-1] or dq_value > normal_dq_range[1]:
+    #         plt.annotate(probe_number, xy=(position, dq_value),
+    #                      xytext=(position + 99, dq_value + 0.5),  # Adjust xytext position
+    #                      rotation=89,
+    #                     #  textcoords="offset points",  # Offset in points
+    #                     #  ha='center', va='center',  # Horizontal and vertical alignment
+    #                     #  bbox=dict(boxstyle='round,pad=-1.3', fc='yellow', alpha=0.5),  # Bounding box style
+    #                      arrowprops=dict(arrowstyle='->', connectionstyle='arc,rad=-1', color='black'))  # Arrow properties
\ No newline at end of file
diff --git a/dMLPA-flask/qc_data.py b/dMLPA-flask/qc_data.py
new file mode 100644
index 0000000..201a4f1
--- /dev/null
+++ b/dMLPA-flask/qc_data.py
@@ -0,0 +1,104 @@
+"""
+Module for pulling relevant run and raw results data from dMLPA
+reporting files.
+
+This module provides functions for extracting virtual pan number,
+obtaining QC status, converting DataFrame tables to specific HTML
+format, and generating summary results data.
+
+Functions:
+- get_pan_number(df): Return virtual pan number used for analysis.
+- get_qc_status(df): Return QC pass or fail status.
+- df_to_html(df): Convert DataFrame table to HTML without index.
+- summary_data (df): Create a summary table of key probe data.
+"""
+
+import re
+import config
+
+def get_test_codes(df):
+    """
+    Return virtual pan number and Rcode used for analysis
+
+    Arguments:
+    - df(Pandas Dataframe): assumed to be a dataframe containing the
+    Pan(el) number within the second column header (samplename)
+
+    Return Value:
+    - pan_number(Str): GSTT virtual gene pan(el) number used to filter
+    raw NGS output
+    - r_code (Str): Appropriate Rcode from the national genomic test
+    directory
+    """
+
+    # TODO:
+    # - confirm samplenaming convetion prior to release
+    # - Logging.info[panel number] and logging.error[pan number not found]
+    # - Print error onto report.
+
+    # creating empty variables
+    pan_number = ""
+    r_code = ""
+
+    # Get samplename from df
+    samplename = df.columns[1]
+
+    # Searching for Pan number by regular expression
+    panel_number_match = re.search(r'Pan[0-9]+', samplename)
+    rcode_match = re.search(r'R[0-9]+', samplename)
+
+    if panel_number_match:
+        pan_number = panel_number_match.group(0)
+
+    if rcode_match:
+        r_code = rcode_match.group(0)
+
+    return pan_number, r_code
+
+
+def get_qc_status(df):
+    """
+    Return qc pass or fail status
+
+    Arguments:
+    - df(Pandas Dataframe): assumed to be a dataframe containing the run
+    qc status at a specific location.
+
+    Return Value:
+    - qc_status(Str):
+    """
+    qc_status = df.loc[6, df.columns[1]].upper()
+
+    return qc_status
+
+def df_to_html(df):
+    """Convert dataframe table to html without index"""
+
+    #Saving dataframe to html format
+    html_format = df.to_html(classes="table", index=False)
+
+    return html_format
+
+def summary_data(df):
+    """Create summary table of key probe data"""
+
+    # creating copy of selected columns with Copy number estimate column
+    summary_data_df = df[config.SUMMARY_DATA_COLUMNS].copy()
+    summary_data_df["copy number est."] = "-"
+    summary_data_df["copy number est."] = summary_data_df["copy number est."].astype(object)
+
+    # defining regular expression
+    variant_type = re.compile("Ref to Scientist.*", re.IGNORECASE)
+
+    # creating condition for calculating copy number estimate
+    # TODO two methods below (both ChatGPT) decide which to keep
+    condition = ~summary_data_df['variant'].str.match(variant_type)
+    # condition = ~summary_data_df["variant"].apply(lambda x: bool(variant_type.match(x)))
+
+    # calculating copy number estimate only where condition isn't met
+    summary_data_df.loc[condition, "copy number est."] = (
+        summary_data_df["normal copy number"] *
+        summary_data_df["dosage quotient"]
+    ).round()
+
+    return summary_data_df
diff --git a/dMLPA-flask/templates/index.html b/dMLPA-flask/templates/index.html
index 9fa76b3..805f128 100644
--- a/dMLPA-flask/templates/index.html
+++ b/dMLPA-flask/templates/index.html
@@ -24,98 +24,114 @@
 <body>
 
     <div class="container">
-        <h1>dMLPA-flask Web App</h1>
+        <h1>dMLPA Results Generator</h1>
     </div>
 
     <div class="container">
 
-        {% if errors %}
-        <div class="alert alert-danger" role="alert">
-            <h4 class="alert-heading">Spreadsheet Errors:</h4>
-            <ul>
-                {% for error in errors %}
-                <li> {{ error }} </li>
-                {% endfor %}
-            </ul>
-        </div>
+        <!-- if some results files are not processed -->
+        {% if results_flag == False %}
+            <div class="alert alert-danger" role="alert">
+                <h4 class="alert-heading">File Errors:</h4>
+                <ul>
+                    <!-- {% if not results_flag %} -->
+                        <p>The following files do not meet the expected naming convention for patient files and could not be processed:</p>
+                    <!-- {% endif %} -->
+                    {% for error in errors %}
+                        <li> {{ error }} </li>
+                    {% endfor %} <!--TODO Confirm naming convention-->
+                    <br>
+                    <p>Expected naming convention for results files: "dMLPA_Rcode_PanNumber_Sample/TestID_Surname_Name"</p>
+                </ul>
+            </div>
         {% endif %}
 
-        {% if file_errors %}
-        <div class="alert alert-warning" role="alert">
-            <h4 class="alert-heading">File Errors:</h4>
-            <ul>
-                {% for field, errors in file_errors.items() %}
-                {% for error in errors %}
-                <li> {{ error }} </li>
-                {% endfor %}
-                {% endfor %}
-            </ul>
-        </div>
+        <!-- if FlaskForm validation raises validation errors -->
+        {% if validation_errors %}
+            <div class="alert alert-warning" role="alert">
+                <h4 class="alert-heading">File Error:</h4>
+                <ul>
+                    {% for field, errors in validation_errors.items() %}
+                        {% for error in errors %}
+                        <li> {{ error }} </li>
+                        {% endfor %}
+                    {% endfor %}
+                </ul>
+            </div>
         {% endif %}
 
         {% if backend_state == "initial" %}
-        <div class="alert alert-dark" role="alert">
-            <h4 class="alert-heading">Welcome to dMLPA-flask</h4>
-            <p>Please select the files for your analysis below, then click
-                the "Run dMLPA-flask" button.</p>
-        </div>
-        <div class="container">
-            <h4>dMLPA-flask File Upload</h4>
-            <form action=" {{ url_for('dMLPA-flask.form') }}  " method="post" enctype="multipart/form-data">
-                {{ form.hidden_tag() }}
-                <div class="row">
-                    <div class="col-sm-6 form-group">
-                        {{ form.single_file.label }} <br>
-                        {{ form.single_file(class="form-control-file") }}
+            <div class="alert alert-dark" role="alert">
+                <h4 class="alert-heading">Welcome to dMLPA Results Generator</h4>
+                <p> This web-interface can be used to generate a pdf report of the results obtained from
+                    dMLPA analysis. <br>
+                    To do this the following 2 inputs are required:<br>
+                    <ul>
+                        <li>Probe Information File (xls/xlsx)- This is an excel document containing
+                            information on the genomic location of all probes present in the digitalMLPA
+                            probemix.<br>
+                            Expected naming convention for this file: Name beginning with "PIF_D001"
+                        </li> <!--TODO Confirm naming convention-->
+                        <li>Patient Files (xls/xlsx) - These are inidividual excel files generated
+                            by the digitalMLPA filtering script</li>
+                            Expected naming convention for this file: "dMLPA_Rcode_PanNumber_Sample/TestID_Surname_Name"
+                        </ul> <!--TODO Confirm naming convention-->
+                    Please select the appropriate files below and then click the "Run dMLPA-flask" button.</p>
+            </div>
+
+            <div class="container">
+                <h4>File Upload</h4>
+                <form action=" {{ url_for('dMLPA-flask.form') }}  " method="post" enctype="multipart/form-data">
+                    {{ form.hidden_tag() }}
+                    <div class="row">
+                        <div class="col-sm-6 form-group">
+                            {{ form.single_file.label }} <br>
+                            {{ form.single_file(class="form-control-file") }}
+                        </div>
+                        <div class="col-sm-6 form-group">
+                            {{ form.multiple_array_of_files.label }} <br>
+                            {{ form.multiple_array_of_files(class="form-control-file", multiple=true) }}
+                        </div>
                     </div>
-                    <div class="col-sm-6 form-group">
-                        {{ form.multiple_array_of_files.label }} <br>
-                        {{ form.multiple_array_of_files(class="form-control-file", multiple=true) }}
-                    </div>
-                </div>
-                {{ form.submit(class="btn btn-primary btn-lg btn-block") }}
-            </form>
-        </div>
-
-        {%elif backend_state == "started" %}
-        <div class="alert alert-success" role="alert">
-            <h4 class="alert-heading">dMLPA-flask Analysis Complete</h4>
-            <p>You submitted the following single file: </p>
-            <p style="color:blue;"> {{ single_file_name }} </p>
-            <p>You submitted the following multiple files:</p>
-            <p style="color:blue;"> {{ multiple_array_of_files_names }} </p>
-            <hr>
-            <h4>The following output can be downloaded to your computer using the button below:</h4>
-            <p style="color:blue;"> {{ report_name }} </p>
-            <br>
-            <a class="btn btn-success" href=" {{url_for('dMLPA-flask.download')}}  ">Download</a>
-            <hr>
-            <p class="mb-0">If you have any issues with this tool please raise a ticket with Genome Support.</p>
-        </div>
-    </div>
-
-    {%elif backend_state == "finished" %}
-
-    <div class="alert alert-success" role="alert">
-        <h4 class="alert-heading">Analysis Completed!</h4>
-        <p>The report can be downloaded to your computer using the button below.</p>
-        <br>
-        <a class="btn btn-success" href=" {{url_for('dMLPA-flask.download')}}  ">Download
-            Report</a>
-        <hr>
-        <p class="mb-0">If you have any issues with this tool please raise a ticket with Genome Support.</p>
-    </div>
-
-    {%elif backend_state == "failed" %}
-
-    <div class="alert alert-danger" role="alert">
-        <h4 class="alert-heading">WARNING!</h4>
-        <p>Backend has failed with the following error:</p>
-        <hr>
-        <p class="mb-0"> ERROR: <br> If you require any help please raise a ticket with Genome Support.
-        </p>
-    </div>
-    {%endif %}
+                    {{ form.submit(class="btn btn-primary btn-lg btn-block") }}
+                </form>
+            </div>
+
+        {%elif backend_state == "finished" %}
+            <div class="alert alert-success" role="alert">
+                <h4 class="alert-heading">dMLPA Analysis Complete</h4>
+                <br>
+                <h5><b>{{files_processed}}/{{files_submitted}}</b> files successfully processed</h5>
+                <br>
+                <p>You submitted the following probe file: </p>
+                <p style="color:blue;"> {{ single_file_name }} </p>
+                <p>You submitted {{files_submitted}} patient file(s):</p>
+                    {% for file_name in multiple_array_of_files_names %}
+                    <li style="color:blue;"> {{file_name}} </li>
+                    {% endfor %}
+                <hr>
+                <h4>To download the pdf reports to your computer clink the download button below.</h4>
+                <p style="color:blue;"> {{ report_name }} </p>
+                <br>
+                <a class="btn btn-success" href=" {{url_for('dMLPA-flask.download')}}  ">Download</a>
+                <hr>
+                <p class="mb-0">If you have any issues with this tool please contact Synnovis bioinformatics support.</p>
+            </div>
+        <!-- </div> -->
+
+        {%elif backend_state == "failed" %}
+
+            <div class="alert alert-danger" role="alert">
+                <h4 class="alert-heading">WARNING! Analysis Failed!</h4>
+                <p>Backend has failed with the following error:</p>
+                <p><b>{{ probe_errors }}</b></p>
+                    <p>You submitted the following probe file: </p>
+                    <p style="color:blue;"> {{ single_file_name }} </p>
+                <hr>
+                <p>If you have any issues with this tool please contact Synnovis bioinformatics support.</p>
+            </div>
+
+        {%endif %}
     </div>
 
     {% block scripts %}
@@ -140,15 +156,13 @@ <h4 class="alert-heading">WARNING!</h4>
         }
 
         document.getElementById("single_file").addEventListener("change", function () {
-            validateFileType(this, ["xlsm", "xlsx", "xls"]); // TODO Add more file extensions here i.e. ["xlsm", "xlsx"]
+            validateFileType(this, ["xlsm", "xlsx", "xls"]);
         });
 
         document.getElementById("multiple_array_of_files").addEventListener("change", function () {
-            validateFileType(this, ["xlsm", "xlsx", "xls"]); // TODO Add more file extensions here i.e. ["xlsm", "xlsx"]
+            validateFileType(this, ["xlsm", "xlsx", "xls"]);
         });
     </script>
     {% endblock %}
 
-
-
 </body>
\ No newline at end of file
diff --git a/dMLPA-flask/templates/report.html b/dMLPA-flask/templates/report.html
new file mode 100644
index 0000000..a4ddad2
--- /dev/null
+++ b/dMLPA-flask/templates/report.html
@@ -0,0 +1,165 @@
+<!DOCTYPE html>
+<html lang="en-UK">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>dMLPA Reporting Template</title>
+    <style>  /* TODO Consider transferring to css style layout instead and importing */
+        /* Style for printing to PDF */
+        @page {
+            size: A4;
+            margin: 1.5cm;
+        } /* Make sure header and footer work */
+
+        /* Global Style */
+        body {
+            font-family: Arial;
+            padding: 10px;/* TODO Fix padding. It is only applying on left */
+            font-size: 12px;
+        }
+
+        body .large{
+            font-family: Arial;
+            font-size: 16px;
+        }
+
+        /* Header Styles TODO only appearing on the first page */
+        header {
+            display: flex;
+        }
+
+        header .header-element1{
+            flex: 1;
+        }
+
+        header .header-element2{
+            flex: 1;
+            position: absolute;
+            right: 0;
+        }
+
+        /* Body Styles */
+        .container {
+            display: flex;
+        }
+        .element:first-child{
+            font-family: Arial;
+            font-size: 12px;
+        }
+        .element:last-child {
+            position: absolute;
+            right: 0;
+            font-family: Arial;
+            font-size: 28px;
+            font-style: bold;
+        }
+        /* Page Break Element*/
+        .page-break {
+            page-break-before: always;
+        }
+    </style>
+</head>
+
+<body>
+
+<header>
+    <div class="header-element1"> dMLPA report generator {{ app_version }}</div>
+    <div class="header-element2"> {{ file_name }}</div>
+</header>
+<hr><br>
+
+{% if prouction == FALSE %}
+            <h4 style="color: red">WARNING: This report was generated in a development environment. Do not use for clinical reporting</h4>
+{% endif %}
+
+<h1>dMLPA Copy Number Analysis</h1>
+<p><i>South East Genomics Laboratory Hub</i></p>
+<br>
+
+<div class="container">
+    <div class="element">
+        <h4>Test Request</h4>
+            <p><b>R Code:</b>{{ r_code }}</p>
+            <p><b>Pan Number: </b><i>{{ pan_number }}</i></p>
+            <p><b>Genes: </b>
+                {%for gene in genes %}
+                    <i>{{ gene }}{% if not loop.last %}, {% endif %}</i>
+                {% endfor %}
+            </p>
+            <br>
+            <p><b>Run ID: </b><i> {{ run_id }}</i></p>
+            <p><b>Genome Build: </b><i> {{ genome_build }}</i></p>
+            <p><b>File-Name: </b><i>{{ patient_file }}</i></p>
+            <p><b>Probe-file: </b><i>{{ probe_file }}</i></p>
+    </div>
+    <div class="element">
+        {% if qc_status == "PASSED" %}
+            <p style="color:green"> QC {{ qc_status }}</p>
+        {% else %}
+            <p style="color:red"> QC {{ qc_status }} </p>
+        {% endif %}
+    </div>
+</div>
+<hr>
+
+<div class="body">
+    <h2>Called CNV's</h2>
+    {% if '<td>' is in cnv_summary%}
+        <p>{{ cnv_summary }}</p>
+        {% for key, value in report_notes.items() %}
+        {% if key is in cnv_summary %}
+            <p>{{ value}}</p>
+        {% endif %}
+        <p><i>{{ cnc_comment }}</i></p>
+        {% if 'PMS2' in genes %}
+            <p><i>{{ pms2_comment }}</i></p>
+        {% endif %}
+
+        {% endfor %}
+    {% else %}
+        <p>NO COPY NUMBER VARIANTS DETECTED</p>
+        <p><i>{{ cnc_comment }}</i></p>
+        {% if 'PMS2' is in genes %}
+            <p><i>{{ psm2_comment }}</i></p>
+        {% endif %}
+    {% endif %}
+</div>
+
+
+<br>
+
+<div class="body">
+    <h3>CNV details</h3>
+    {% if cnv_detail %}
+        {% for gene, data in cnv_detail.items() %}
+            <p>{{ gene }}_plot</p>
+            <img src="data:image/svg+xml;base64,{{ data[0] }}" alt="{{ gene }} Plot" style="display: block; margin-left: auto; margin-right: auto">
+            <br>
+            <p>{{ data[1] }}</p>
+            <br>
+        {% endfor %}
+    {% else %}
+        <p>NO COPY NUMBER VARIANTS DETECTED</p>
+    {% endif %}
+</div>
+
+<div class="page-break"></div>
+
+<div>
+    <h2>Appendix</h2>
+    <br>
+    <h3>QC Data</h3>
+        <p> {{ qc_table }}</p>
+        <div class="page-break"></div>
+    <br>
+    <h3>Raw Data</h3>
+        {% for gene, summary_table in summary_tables.items() %}
+        <p>{{ gene }}</p>
+        <p>{{ summary_table }}</p>
+        <div class="page-break"></div>
+        {% endfor %}
+</div>
+
+</body>
+</html>
diff --git a/docker-compose.yml b/docker-compose.yml
index 98699e7..717d384 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,7 +9,9 @@ services:
     volumes:
       - .:/dMLPA-flask/
       - /var/local/dMLPA-flask/logs/:/var/local/dMLPA-flask/logs/
-      - /var/local/dMLPA-flask/uploads/:/var/local/dMLPA-flask/uploads/
+      - /var/local/dMLPA-flask/probe_file/:/var/local/dMLPA-flask/probe_file/
+      - /var/local/dMLPA-flask/inputs/:/var/local/dMLPA-flask/inputs/
+      - /var/local/dMLPA-flask/outputs/:/var/local/dMLPA-flask/outputs/
 
     ports:
       - 5000:5000
diff --git a/requirements.txt b/requirements.txt
index f4318f5..ad0c8d4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,16 +2,17 @@ Flask==2.2.2
 flask-cors==3.0.10
 Flask-Session==0.4.0
 Flask-WTF==1.1.1
+gunicorn==20.1.0
 Jinja2==3.1.2
 kaleido==0.2.1
+matplotlib==3.8.3
+myst-parser==0.15.2
 numpy==1.22.4
 openpyxl==3.0.10
-gunicorn==20.1.0
 pandas==1.4.2
 pdfkit==1.0.0
 plotly==5.7.0
 pytest==7.2.0
 sphinx==4.5.0
 sphinx_rtd_theme==1.1.1
-myst-parser==0.15.2
 Werkzeug==2.2.2