Skip to content

Commit

Permalink
resolve missing genes
Browse files Browse the repository at this point in the history
  • Loading branch information
astrdhr committed Oct 6, 2023
1 parent 14c2522 commit fb8626c
Show file tree
Hide file tree
Showing 16 changed files with 182,959 additions and 271,839 deletions.
55 changes: 0 additions & 55 deletions bin/get-salmon-mapping-qc.py

This file was deleted.

97 changes: 97 additions & 0 deletions bin/modify-syn-gff.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Script used to modify gene names in synthetic Chr11 GFF3 file to distinguish
# from wild-type Chr11 genes.

# awk -F '\t' 'BEGIN { OFS = "\t" }

# # skip first 3 lines
# NR <= 3 { print; next }

# {
# # split attributes field into individual attributes
# split($9, attributes, ";")
# new_attributes = ""
# for (i = 1; i <= length(attributes); i++) {
# attr = attributes[i]

# # check if attribute starts with "ID=" or "Name="
# if (match(attr, /^ID=/) || match(attr, /^Name=/)) {

# # extract attribute name and value
# attr_name = substr(attr, 1, RSTART + RLENGTH - 2)
# attr_value = substr(attr, RLENGTH + 1)

# # check if attribute name is "ID" or "Name" and if it has a value
# if ((attr_name == "ID" || attr_name == "Name") && length(attr_value) > 0) {

# # check if "gene:" or "transcript:" prefix is already present
# if (!match(attr_value, /^(gene|transcript):/)) {
# attr_value = "x." attr_value
# } else {
# attr_value = substr(attr_value, 1, RLENGTH) "x." substr(attr_value, RLENGTH + 1)
# }
# }

# # reconstruct modified attribute
# modified_attr = attr_name "=" attr_value
# } else {
# modified_attr = attr
# }
# new_attributes = new_attributes ";" modified_attr
# }

# # replace attributes field with modified attributes
# $9 = substr(new_attributes, 2)
# print
# }
# ' tmp-syn.gff > modified_syn_chr11.gff


awk -F '\t' 'BEGIN { OFS = "\t" }
# skip first 3 lines
NR <= 3 { print; next }
{
# split attributes field into individual attributes
split($9, attributes, ";")
new_attributes = ""
for (i = 1; i <= length(attributes); i++) {
attr = attributes[i]
# check if attribute starts with "ID=", "Name=", or "Parent="
if (match(attr, /^(ID|Name|Parent)=/)) {
# extract attribute name and value
attr_name = substr(attr, 1, RLENGTH - 1)
attr_value = substr(attr, RLENGTH + 1)
# check if attribute name is "ID" or "Name" and if it has a value
if ((attr_name == "ID" || attr_name == "Name") && length(attr_value) > 0) {
# check if "gene:" or "transcript:" prefix is already present
if (!match(attr_value, /^(gene|transcript):/)) {
attr_value = "x." attr_value
} else {
attr_value = substr(attr_value, 1, RLENGTH) "x." substr(attr_value, RLENGTH + 1)
}
} else if (attr_name == "Parent") {
# check if "gene:" or "transcript:" prefix is already present
if (!match(attr_value, /^(gene|transcript):/)) {
attr_value = "x." attr_value
} else {
attr_value = substr(attr_value, 1, RLENGTH) "x." substr(attr_value, RLENGTH + 1)
}
}
# reconstruct modified attribute
modified_attr = attr_name "=" attr_value
} else {
modified_attr = attr
}
new_attributes = new_attributes ";" modified_attr
}
# replace attributes field with modified attributes
$9 = substr(new_attributes, 2)
print
}' data/genome/modified-syn-chr11/tmp-syn.gff > chr18.gff
47 changes: 0 additions & 47 deletions bin/modify-syn-gff3.sh

This file was deleted.

6 changes: 3 additions & 3 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ profiles {

// source genome files
reference_wt = "data/genome/BY4742-genome/BY4742.fasta"
annotation_wt = "data/genome/BY4742-genome/BY4742.gff"
annotation_wt = "data/genome/BY4742/annotation/*.gff"
reference_syn = "data/genome/syn-yeast-genome/contigs/yeast_chr11_3_39.ref.fasta"
annotation_syn = "data/genome/syn-yeast-genome/annotation/yeast_chr11_3_39.ref.gff"

// combined genome files
genome.reference = "data/genome/new-concat-genome/concat-wt-syn-chr11-BY4742.fasta"
genome.annotation = "data/genome/new-concat-genome/concat-wt-syn-chr11-BY4742.gff"
genome.reference = "data/genome/wt-syn-chr11-genome/wt-syn-chr11-BY4742.fasta"
genome.annotation = "data/genome/wt-syn-chr11-genome/wt-syn-chr11-BY4742.gff"

// fastp parameters
fastp.args = ""
Expand Down
Loading

0 comments on commit fb8626c

Please sign in to comment.