From be657f517f9409ac495380279bd9dbfdfedaa5f7 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Wed, 9 Oct 2024 10:11:09 +0200 Subject: [PATCH 1/4] Fix column order validation --- sdrf_pipelines/sdrf/sdrf_schema.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sdrf_pipelines/sdrf/sdrf_schema.py b/sdrf_pipelines/sdrf/sdrf_schema.py index c0a4c2a..f407428 100644 --- a/sdrf_pipelines/sdrf/sdrf_schema.py +++ b/sdrf_pipelines/sdrf/sdrf_schema.py @@ -280,19 +280,19 @@ def validate_columns_order(panda_sdrf): error_columns_order = [] if "assay name" in list(panda_sdrf): cnames = list(panda_sdrf) - index = cnames.index("assay name") + assay_index = cnames.index("assay name") factor_tag = False - for column in cnames: - if ("comment" in column or "technology type" in column) and cnames.index(column) < index: - error_message = "The column " + column + "cannot be before the assay name" + for idx, column in enumerate(cnames): + if "comment" in column and idx < assay_index: + error_message = "The column " + column + " cannot be before the assay name" error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) if ( "characteristics" in column or ("material type" in column and "factor value" not in column) - ) and cnames.index(column) > index: - error_message = "The column " + column + "cannot be after the assay name" + or "technology type" in column) and idx > assay_index: + error_message = "The column " + column + " cannot be after the assay name" error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) if "factor value" in column and not factor_tag: - factor_index = cnames.index(column) + factor_index = idx factor_tag = True if factor_tag: temp = [] From f6f361788a09ff2054e0ae6d3dca5c3b1510ace8 Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Wed, 9 Oct 2024 10:34:14 +0200 Subject: [PATCH 2/4] Black reformat --- sdrf_pipelines/sdrf/sdrf_schema.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdrf_pipelines/sdrf/sdrf_schema.py b/sdrf_pipelines/sdrf/sdrf_schema.py index f407428..d703084 100644 --- a/sdrf_pipelines/sdrf/sdrf_schema.py +++ b/sdrf_pipelines/sdrf/sdrf_schema.py @@ -287,8 +287,10 @@ def validate_columns_order(panda_sdrf): error_message = "The column " + column + " cannot be before the assay name" error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) if ( - "characteristics" in column or ("material type" in column and "factor value" not in column) - or "technology type" in column) and idx > assay_index: + "characteristics" in column + or ("material type" in column and "factor value" not in column) + or "technology type" in column + ) and idx > assay_index: error_message = "The column " + column + " cannot be after the assay name" error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) if "factor value" in column and not factor_tag: From ee678adc2c3c7a376a4836a4c12179408828a17b Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Fri, 11 Oct 2024 12:46:57 +0200 Subject: [PATCH 3/4] Implement new rules for technology type --- sdrf_pipelines/sdrf/sdrf_schema.py | 33 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sdrf_pipelines/sdrf/sdrf_schema.py b/sdrf_pipelines/sdrf/sdrf_schema.py index d703084..5b73b0f 100644 --- a/sdrf_pipelines/sdrf/sdrf_schema.py +++ b/sdrf_pipelines/sdrf/sdrf_schema.py @@ -283,16 +283,29 @@ def validate_columns_order(panda_sdrf): assay_index = cnames.index("assay name") factor_tag = False for idx, column in enumerate(cnames): - if "comment" in column and idx < assay_index: - error_message = "The column " + column + " cannot be before the assay name" - error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) - if ( - "characteristics" in column - or ("material type" in column and "factor value" not in column) - or "technology type" in column - ) and idx > assay_index: - error_message = "The column " + column + " cannot be after the assay name" - error_columns_order.append(LogicError(error_message, error_type=logging.ERROR)) + error_message, error_type = '', None + if idx < assay_index: + if "comment" in column: + error_message = "The column " + column + " cannot be before the assay name" + error_type = logging.ERROR + if "technology type" in column: + error_message = "The column " + column + " must be immediately after the assay name" + if assay_index - idx > 1: + error_type = logging.ERROR + else: + error_type = logging.WARNING + else: + if ( + "characteristics" in column + or ("material type" in column and "factor value" not in column) + ): + error_message = "The column " + column + " cannot be after the assay name" + error_type = logging.ERROR + if "technology type" in column and idx > assay_index + 1: + error_message = "The column " + column + " must be immediately after the assay name" + error_type = logging.ERROR + if error_type is not None: + error_columns_order.append(LogicError(error_message, error_type=error_type)) if "factor value" in column and not factor_tag: factor_index = idx factor_tag = True From 0e132fc8014a3654e95235286941bb3f7e2a49ea Mon Sep 17 00:00:00 2001 From: Lev Levitsky Date: Fri, 11 Oct 2024 13:27:01 +0200 Subject: [PATCH 4/4] black reformat --- sdrf_pipelines/sdrf/sdrf_schema.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sdrf_pipelines/sdrf/sdrf_schema.py b/sdrf_pipelines/sdrf/sdrf_schema.py index 5b73b0f..3100234 100644 --- a/sdrf_pipelines/sdrf/sdrf_schema.py +++ b/sdrf_pipelines/sdrf/sdrf_schema.py @@ -283,7 +283,7 @@ def validate_columns_order(panda_sdrf): assay_index = cnames.index("assay name") factor_tag = False for idx, column in enumerate(cnames): - error_message, error_type = '', None + error_message, error_type = "", None if idx < assay_index: if "comment" in column: error_message = "The column " + column + " cannot be before the assay name" @@ -295,10 +295,7 @@ def validate_columns_order(panda_sdrf): else: error_type = logging.WARNING else: - if ( - "characteristics" in column - or ("material type" in column and "factor value" not in column) - ): + if "characteristics" in column or ("material type" in column and "factor value" not in column): error_message = "The column " + column + " cannot be after the assay name" error_type = logging.ERROR if "technology type" in column and idx > assay_index + 1: