Skip to content

Commit

Permalink
feat: make circle filtering configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
dlaehnemann committed Apr 17, 2024
1 parent a4d0ae3 commit d809f3b
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 5 deletions.
14 changes: 13 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,16 @@ params:
cutadapt: ""
gatk:
BaseRecalibrator: ""
applyBQSR: ""
applyBQSR: ""

# These filters mostly correspond to the output columns of Circle-Map:
# https://github.com/iprada/Circle-Map/wiki/Circle-Map-Realign-output-files
# In addition, you can filter on the length of the circle.
circle_filtering:
min_circle_score: 100
min_split_reads: 1
min_discordant_read_pairs: 1
max_uncovered fraction: 0.9
min_mean_coverage: 2.0
min_circle_length: 500
max_circle_length: 80000000
2 changes: 2 additions & 0 deletions workflow/resources/circles.datavzrd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@ views:
heatmap:
scale: linear
range:
- "#e7d4e8"
- "#e7d4e8"
- "white"
- "#d9f0d3"
- "#7fbf7b"
- "#7fbf7b"
domain:
- 0
- 50
- 200
- 300
Expand Down
8 changes: 8 additions & 0 deletions workflow/rules/circle_map.smk
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,13 @@ rule clean_circle_map_realign_output:
"logs/circle-map/{sample}.circles.cleaned.log",
conda:
"../envs/pandas.yaml"
params:
min_circle_score=config["circle_filtering"]["min_circle_score"],
min_split_reads=config["circle_filtering"]["min_split_reads"],
min_discordant_read_pairs=config["circle_filtering"]["min_discordant_read_pairs"],
max_uncovered_fraction=config["circle_filtering"]["max_uncovered_fraction"],
min_mean_coverage=config["circle_filtering"]["min_mean_coverage"],
min_circle_length=config["circle_filtering"]["min_circle_length"],
max_circle_length=config["circle_filtering"]["max_circle_length"],
script:
"../scripts/clean_circle_map_realign_output.py"
26 changes: 26 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,34 @@ properties:
- release
- build
- n_chromosomes
circle_filtering:
type: object
properties:
min_circle_score:
type: number
min_split_reads:
type: integer
min_discordant_read_pairs:
type: integer
max_uncovered fraction:
type: number
min_mean_coverage:
type: number
min_circle_length:
type: integer
max_circle_length:
type: integer
required:
- min_circle_score
- min_split_reads
- min_discordant_read_pairs
- max_uncovered fraction
- min_mean_coverage
- min_circle_length
- max_circle_length

required:
- samples
- units
- ref
- circle_filtering
14 changes: 10 additions & 4 deletions workflow/scripts/clean_circle_map_realign_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@
# filter out low-quality circles, according to:
# https://github.com/iprada/Circle-Map/wiki/Circle-Map-Realign-output-files
circles = circles.loc[
( circles["circle_score"] >= 50 ) &
( circles["discordant_reads"] > 0 ) &
( circles["split_reads"] > 0 ) &
( circles["uncovered_fraction"] < 1 )
( circles["circle_score"] >= snakemake.params["min_circle_score"] ) &
( circles["discordant_reads"] > snakemake.params["min_discordant_read_pairs"] ) &
( circles["split_reads"] > snakemake.params["min_split_reads"] ) &
( circles["uncovered_fraction"] <= snakemake.params["max_uncovered_fraction"] ) &
( circles["mean_coverage"] >= snakemake.params["min_mean_coverage"] )
]


Expand All @@ -57,6 +58,11 @@
axis='columns',
)

circles = circles.loc[
( circles["length"] >= snakemake.params["min_circle_length"] ) &
( circles["length"] <= snakemake.params["max_circle_length"] )
]

circles.sort_values(
by=['chromosome', 'start', 'end'],
inplace=True
Expand Down

0 comments on commit d809f3b

Please sign in to comment.