diff --git a/CHANGELOG.md b/CHANGELOG.md index c2fe0c05..10682477 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305) - Added a parser and writer for the `pileup` sequence alignment format (#329) +- Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350) ### Fixed - `fastq` parser no longer becomes de-aligned when reading (#325) diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index 9a44b69f..5fd51de1 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -26,37 +26,35 @@ import ( "strings" "time" + "github.com/TimothyStiles/poly/io/genbank" weightedRand "github.com/mroth/weightedrand" ) /****************************************************************************** -Oct, 15, 2020 - File is structured as so: - Interfaces: - Table - specifies the functions that all table types must implement + Interfaces: + Table - An interface encompassing what a potentially codon optimized Translation table can do + + Structs: + TranslationTable - contains a weighted codon table, which is used when translating and optimizing sequences. The weights can be updated through the codon frequencies we observe in given DNA sequences. - Structs: - codonTable - holds all information mapping codons <-> amino acids during transformations. AminoAcid - holds amino acid related info for codonTable struct - Codon - holds codon related info for AminoAcid struct - Big functions that everything else is related to: + Codon - holds codon related info for AminoAcid struct - Translate - given a nucleic sequence string and codon table it translates sequences - to UPPERCASE amino acid sequences. + Key functions: + TranslationTable.Translate - given a nucleic sequence string and codon table it translates sequences to UPPERCASE amino acid sequences. - Optimize - given an amino acid sequence string and codon table it translates - sequences to UPPERCASE nucleic acid sequences. + TranslationTable.OptimizeSequence - will return a set of codons which can be used to encode the given amino acid sequence. The codons picked are weighted according to the computed translation table's weights -Anywho, most of this file and codonTable's struct methods are meant to help overcome -this codon bias. There's a default codonTable generator near the bottom of this file -with a whole section on how it works and why it's gotta be that way. + TranslationTable.UpdateWeightsWithSequence - will look at the coding regions in the given genbank data, and use those to generate new weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated weights. + + TranslationTable.Stats - a set of statistics we maintain throughout the translation table's lifetime. For example we track the start codons observed when we update the codon table's weights with other DNA sequences ******************************************************************************/ var ( - errEmptyCodonTable = errors.New("empty codon table") + errNoCodingRegions = errors.New("no coding regions found") errEmptyAminoAcidString = errors.New("empty amino acid string") errEmptySequenceString = errors.New("empty sequence string") newChooserFn = weightedRand.NewChooser @@ -83,74 +81,69 @@ type AminoAcid struct { Codons []Codon `json:"codons"` } -// Table is an interface that specifies the functions that all table types must implement +// Table is an interface encompassing what a potentially codon optimized Translation table can do type Table interface { - Chooser() (map[string]weightedRand.Chooser, error) - GenerateTranslationTable() map[string]string - GenerateStartCodonTable() map[string]string - GetAminoAcids() []AminoAcid - GetStartCodons() []string - GetStopCodons() []string - IsEmpty() bool - OptimizeTable(string) Table + GetWeightedAminoAcids() []AminoAcid + OptimizeSequence(aminoAcids string, randomState ...int) (string, error) + Translate(dnaSeq string) (string, error) +} + +// Stats denotes a set of statistics we maintain throughout the translation table's lifetime. For example we track +// the start codons observed when we update the codon table's weights with other DNA sequences +type Stats struct { + StartCodonCount map[string]int + GeneCount int } -// codonTable holds information for a codon table. -type codonTable struct { +// NewStats returns a new instance of codon statistics (a set of statistics we maintain throughout a translation table's lifetime) +func NewStats() *Stats { + return &Stats{ + StartCodonCount: map[string]int{}, + } +} + +// TranslationTable contains a weighted codon table, which is used when translating and optimizing sequences. The +// weights can be updated through the codon frequencies we observe in given DNA sequences. +type TranslationTable struct { StartCodons []string `json:"start_codons"` StopCodons []string `json:"stop_codons"` AminoAcids []AminoAcid `json:"amino_acids"` -} -// Translate translates a codon sequence to an amino acid sequence -func Translate(sequence string, codonTable Table) (string, error) { - if codonTable.IsEmpty() { - return "", errEmptyCodonTable - } - if len(sequence) == 0 { - return "", errEmptySequenceString - } + TranslationMap map[string]string + StartCodonTable map[string]string + Choosers map[string]weightedRand.Chooser - var aminoAcids strings.Builder - var currentCodon strings.Builder - translationTable := codonTable.GenerateTranslationTable() - startCodonTable := codonTable.GenerateStartCodonTable() + Stats *Stats +} - startCodonReached := false - for _, letter := range sequence { - // add current nucleotide to currentCodon - currentCodon.WriteRune(letter) +// Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another +// process, since the tables are generated at build time. +func (table *TranslationTable) Copy() *TranslationTable { + return &TranslationTable{ + StartCodons: table.StartCodons, + StopCodons: table.StopCodons, + AminoAcids: table.AminoAcids, - // if current nucleotide is the third in a codon translate to aminoAcid write to aminoAcids and reset currentCodon. - // use start codon table for the first codon only, erroring out if an invalid start codon is provided - if currentCodon.Len() == 3 { - if startCodonReached { - aminoAcids.WriteString(translationTable[strings.ToUpper(currentCodon.String())]) - } else { - aminoAcid, ok := startCodonTable[strings.ToUpper(currentCodon.String())] - if !ok { - return "", fmt.Errorf("start codon %q is not in start codon table %v", currentCodon.String(), startCodonTable) - } - aminoAcids.WriteString(aminoAcid) - startCodonReached = true - } + StartCodonTable: table.StartCodonTable, + TranslationMap: table.TranslationMap, + Choosers: table.Choosers, - // reset codon string builder for next codon. - currentCodon.Reset() - } + Stats: table.Stats, } - return aminoAcids.String(), nil } -// Optimize takes an amino acid sequence and codonTable and returns an optimized codon sequence. Takes an optional random seed as last argument. -func Optimize(aminoAcids string, codonTable Table, randomState ...int) (string, error) { +// GetWeightedAminoAcids returns the amino acids along with their associated codon weights +func (table *TranslationTable) GetWeightedAminoAcids() []AminoAcid { + return table.AminoAcids +} + +// OptimizeSequence will return a set of codons which can be used to encode the given amino acid sequence. The codons +// picked are weighted according to the computed translation table's weights +func (table *TranslationTable) OptimizeSequence(aminoAcids string, randomState ...int) (string, error) { // Finding any given aminoAcid is dependent upon it being capitalized, so // we do that here. aminoAcids = strings.ToUpper(aminoAcids) - if codonTable.IsEmpty() { - return "", errEmptyCodonTable - } if len(aminoAcids) == 0 { return "", errEmptyAminoAcidString } @@ -163,45 +156,149 @@ func Optimize(aminoAcids string, codonTable Table, randomState ...int) (string, } var codons strings.Builder - codonChooser, err := codonTable.Chooser() - if err != nil { - return "", err - } + codonChooser := table.Choosers for _, aminoAcid := range aminoAcids { chooser, ok := codonChooser[string(aminoAcid)] if !ok { return "", invalidAminoAcidError{aminoAcid} } + codons.WriteString(chooser.Pick().(string)) } + return codons.String(), nil } -// OptimizeTable weights each codon in a codon table according to input string codon frequency. -// This function actually mutates the codonTable struct itself. -func (table codonTable) OptimizeTable(sequence string) Table { +// UpdateWeights will update the translation table's codon pickers with the given amino acid codon weights +func (table *TranslationTable) UpdateWeights(aminoAcids []AminoAcid) error { + // regenerate a map of codons -> amino acid + + var updatedTranslationMap = make(map[string]string) + for _, aminoAcid := range table.AminoAcids { + for _, codon := range aminoAcid.Codons { + updatedTranslationMap[codon.Triplet] = aminoAcid.Letter + } + } + + table.TranslationMap = updatedTranslationMap + + // Update Chooser + updatedChoosers, err := newAminoAcidChoosers(table.AminoAcids) + if err != nil { + return err + } + + table.Choosers = updatedChoosers + table.AminoAcids = aminoAcids + + return nil +} + +// UpdateWeightsWithSequence will look at the coding regions in the given genbank data, and use those to generate new +// weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated +// weights. +// +// This can be used to, for example, figure out which DNA sequence is needed to give the best yield of protein when +// trying to express a protein across different species +func (table *TranslationTable) UpdateWeightsWithSequence(data genbank.Genbank) error { + codingRegions, err := extractCodingRegion(data) + if err != nil { + return err + } + + table.Stats.GeneCount = len(codingRegions) + for _, sequence := range codingRegions { + table.Stats.StartCodonCount[sequence[:3]]++ + } + + if len(codingRegions) == 0 { + return errNoCodingRegions + } + + // weight our codon optimization table using the regions we collected from the genbank file above + newWeights := weightAminoAcids(strings.Join(codingRegions, ""), table.AminoAcids) + + return table.UpdateWeights(newWeights) +} + +// Translate will return an amino acid sequence which the given DNA will yield +func (table *TranslationTable) Translate(dnaSeq string) (string, error) { + if dnaSeq == "" { + return "", errEmptySequenceString + } + + var aminoAcids strings.Builder + var currentCodon strings.Builder + translationTable := table.TranslationMap + startCodonTable := table.StartCodonTable + + startCodonReached := false + for _, letter := range dnaSeq { + // add current nucleotide to currentCodon + currentCodon.WriteRune(letter) + + // if current nucleotide is the third in a codon translate to aminoAcid write to aminoAcids and reset currentCodon. + // use start codon table for the first codon only, erroring out if an invalid start codon is provided + if currentCodon.Len() == 3 { + if startCodonReached { + aminoAcids.WriteString(translationTable[strings.ToUpper(currentCodon.String())]) + } else { + aminoAcid, ok := startCodonTable[strings.ToUpper(currentCodon.String())] + if !ok { + return "", fmt.Errorf("start codon %q is not in start codon table %v", currentCodon.String(), startCodonTable) + } + aminoAcids.WriteString(aminoAcid) + startCodonReached = true + } + + // reset codon string builder for next codon. + currentCodon.Reset() + } + } + return aminoAcids.String(), nil +} + +// weightAminoAcids weights each codon in a codon table according to input string codon frequency, adding weight to +// the given NCBI base codon table +func weightAminoAcids(sequence string, aminoAcids []AminoAcid) []AminoAcid { sequence = strings.ToUpper(sequence) codonFrequencyMap := getCodonFrequency(sequence) - for aminoAcidIndex, aminoAcid := range table.AminoAcids { + for aminoAcidIndex, aminoAcid := range aminoAcids { // apply weights to codonTable for codonIndex, codon := range aminoAcid.Codons { - table.AminoAcids[aminoAcidIndex].Codons[codonIndex].Weight = codonFrequencyMap[codon.Triplet] + aminoAcids[aminoAcidIndex].Codons[codonIndex].Weight = codonFrequencyMap[codon.Triplet] } } - return table + + return aminoAcids } -// GenerateStartCodonTable returns a mapping from the start codons of a Table to their associated amino acids. -// For our codonTable implementation, assumes that we always map to Met. -func (table codonTable) GenerateStartCodonTable() map[string]string { - result := make(map[string]string) - for _, codon := range table.StartCodons { - result[codon] = "M" +// extractCodingRegion loops through genbank data to find all CDS (coding sequences) +func extractCodingRegion(data genbank.Genbank) ([]string, error) { + codingRegions := []string{} + + // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder + for _, feature := range data.Features { + if feature.Type == "CDS" { + sequence, err := feature.GetSequence() + if err != nil { + return nil, err + } + + // Note: sometimes, genbank files will have annotated CDSs that are pseudo genes (not having triplet codons). + // This will shift the entire codon table, messing up the end results. To fix this, make sure to do a modulo + // check. + if len(sequence)%3 != 0 { + continue + } + + codingRegions = append(codingRegions, sequence) + } } - return result + return codingRegions, nil } // getCodonFrequency takes a DNA sequence and returns a hashmap of its codons and their frequencies. @@ -231,17 +328,13 @@ func getCodonFrequency(sequence string) map[string]int { return codonFrequencyHashMap } -func (table codonTable) IsEmpty() bool { - return len(table.StartCodons) == 0 && len(table.StopCodons) == 0 && len(table.AminoAcids) == 0 -} - -// Chooser is a codonTable method to convert a codon table to a chooser -func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { +// newAminoAcidChoosers is a codonTable method to convert a codon table to a chooser +func newAminoAcidChoosers(aminoAcids []AminoAcid) (map[string]weightedRand.Chooser, error) { // This maps codon tables structure to weightRand.NewChooser structure codonChooser := make(map[string]weightedRand.Chooser) // iterate over every amino acid in the codonTable - for _, aminoAcid := range table.AminoAcids { + for _, aminoAcid := range aminoAcids { // create a list of codon choices for this specific amino acid codonChoices := make([]weightedRand.Choice, len(aminoAcid.Codons)) @@ -264,7 +357,7 @@ func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { // add this chooser set to the codonChooser map under the name of the aminoAcid it represents. chooser, err := newChooserFn(codonChoices...) if err != nil { - return nil, fmt.Errorf("weightedRand.NewChooser() error: %s", err) + return nil, fmt.Errorf("weightedRand.NewChooser() error: %w", err) } codonChooser[aminoAcid.Letter] = *chooser @@ -272,29 +365,6 @@ func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { return codonChooser, nil } -// GenerateTranslationTable generates a map of codons -> amino acid -func (table codonTable) GenerateTranslationTable() map[string]string { - var translationMap = make(map[string]string) - for _, aminoAcid := range table.AminoAcids { - for _, codon := range aminoAcid.Codons { - translationMap[codon.Triplet] = aminoAcid.Letter - } - } - return translationMap -} - -func (table codonTable) GetStartCodons() []string { - return table.StartCodons -} - -func (table codonTable) GetStopCodons() []string { - return table.StopCodons -} - -func (table codonTable) GetAminoAcids() []AminoAcid { - return table.AminoAcids -} - /****************************************************************************** Oct, 15, 2020 @@ -323,7 +393,7 @@ Tim ******************************************************************************/ // Function to generate default codon tables from NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi -func generateCodonTable(aminoAcids, starts string) codonTable { +func generateCodonTable(aminoAcids, starts string) *TranslationTable { base1 := "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base2 := "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base3 := "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" @@ -349,16 +419,48 @@ func generateCodonTable(aminoAcids, starts string) codonTable { for k, v := range aminoAcidMap { aminoAcidSlice = append(aminoAcidSlice, AminoAcid{string(k), v}) } - return codonTable{startCodons, stopCodons, aminoAcidSlice} + + // generate a map of codons -> amino acid + + var translationMap = make(map[string]string) + for _, aminoAcid := range aminoAcidSlice { + for _, codon := range aminoAcid.Codons { + translationMap[codon.Triplet] = aminoAcid.Letter + } + } + + // GenerateStartCodonTable returns a mapping from the start codons of a Table to their associated amino acids. + // For our codonTable implementation, assumes that we always map to Met. + + startCodonsMap := make(map[string]string) + for _, codon := range startCodons { + startCodonsMap[codon] = "M" + } + + // This function is run at buildtime and failure here means we have an invalid codon table. + chooser, err := newAminoAcidChoosers(aminoAcidSlice) + if err != nil { + panic(fmt.Errorf("tried to generate an invalid codon table %w", err)) + } + + return &TranslationTable{ + StartCodons: startCodons, + StopCodons: stopCodons, + AminoAcids: aminoAcidSlice, + TranslationMap: translationMap, + StartCodonTable: startCodonsMap, + Choosers: chooser, + Stats: NewStats(), + } } -// GetCodonTable takes the index of desired NCBI codon table and returns it. -func GetCodonTable(index int) Table { - return defaultCodonTablesByNumber[index] +// NewTranslationTable takes the index of desired NCBI codon table and returns it. +func NewTranslationTable(index int) *TranslationTable { + return translationTablesByNumber[index].Copy() } -// defaultCodonTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. -var defaultCodonTablesByNumber = map[int]codonTable{ +// translationTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. +var translationTablesByNumber = map[int]*TranslationTable{ 1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"), 2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"), 3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"), @@ -442,21 +544,21 @@ Keoni ******************************************************************************/ // ParseCodonJSON parses a codonTable JSON file. -func ParseCodonJSON(file []byte) Table { - var codonTable codonTable +func ParseCodonJSON(file []byte) *TranslationTable { + var codonTable TranslationTable _ = json.Unmarshal(file, &codonTable) - return codonTable + return &codonTable } // ReadCodonJSON reads a codonTable JSON file. -func ReadCodonJSON(path string) Table { +func ReadCodonJSON(path string) *TranslationTable { file, _ := os.ReadFile(path) codonTable := ParseCodonJSON(file) return codonTable } // WriteCodonJSON writes a codonTable struct out to JSON. -func WriteCodonJSON(codonTable Table, path string) { +func WriteCodonJSON(codonTable *TranslationTable, path string) { file, _ := json.MarshalIndent(codonTable, "", " ") _ = os.WriteFile(path, file, 0644) } @@ -492,27 +594,26 @@ Keoni // CompromiseCodonTable takes 2 CodonTables and makes a new codonTable // that is an equal compromise between the two tables. -func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float64) (Table, error) { - // Initialize output codonTable, c - var c codonTable +func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, cutOff float64) (*TranslationTable, error) { + // Copy first table to base our merge on + // + // this take start and stop strings from first table + // and use them as start + stops in final codonTable + mergedTable := firstCodonTable.Copy() + // Check if cutOff is too high or low (this is converted to a percent) if cutOff < 0 { - return c, errors.New("cut off too low, cannot be less than 0") + return mergedTable, errors.New("cut off too low, cannot be less than 0") } if cutOff > 1 { - return c, errors.New("cut off too high, cannot be greater than 1") + return mergedTable, errors.New("cut off too high, cannot be greater than 1") } - // Take start and stop strings from first table - // and use them as start + stops in final codonTable - c.StartCodons = firstCodonTable.GetStartCodons() - c.StopCodons = firstCodonTable.GetStopCodons() - // Initialize the finalAminoAcid list for the output codonTable var finalAminoAcids []AminoAcid // Loop over all AminoAcids represented in the first codonTable - for _, firstAa := range firstCodonTable.GetAminoAcids() { + for _, firstAa := range firstCodonTable.AminoAcids { var firstTriplets []string var firstWeights []int var firstTotal int @@ -525,7 +626,7 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float6 firstTriplets = append(firstTriplets, firstCodon.Triplet) firstWeights = append(firstWeights, firstCodon.Weight) firstTotal = firstTotal + firstCodon.Weight - for _, secondAa := range secondCodonTable.GetAminoAcids() { + for _, secondAa := range secondCodonTable.AminoAcids { if secondAa.Letter == firstAa.Letter { for _, secondCodon := range secondAa.Codons { // For each codon from firstCodonTable, get the @@ -568,19 +669,24 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float6 // Append list of Codons to finalAminoAcids finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - c.AminoAcids = finalAminoAcids - return c, nil + + err := mergedTable.UpdateWeights(finalAminoAcids) + if err != nil { + return nil, err + } + + return mergedTable, nil } // AddCodonTable takes 2 CodonTables and adds them together to create // a new codonTable. -func AddCodonTable(firstCodonTable, secondCodonTable Table) Table { +func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*TranslationTable, error) { // Add up codons var finalAminoAcids []AminoAcid - for _, firstAa := range firstCodonTable.GetAminoAcids() { + for _, firstAa := range firstCodonTable.AminoAcids { var finalCodons []Codon for _, firstCodon := range firstAa.Codons { - for _, secondAa := range secondCodonTable.GetAminoAcids() { + for _, secondAa := range secondCodonTable.AminoAcids { for _, secondCodon := range secondAa.Codons { if firstCodon.Triplet == secondCodon.Triplet { finalCodons = append(finalCodons, Codon{firstCodon.Triplet, firstCodon.Weight + secondCodon.Weight}) @@ -591,9 +697,12 @@ func AddCodonTable(firstCodonTable, secondCodonTable Table) Table { finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - return codonTable{ - StartCodons: firstCodonTable.GetStartCodons(), - StopCodons: firstCodonTable.GetStopCodons(), - AminoAcids: finalAminoAcids, + mergedTable := firstCodonTable.Copy() + + err := mergedTable.UpdateWeights(finalAminoAcids) + if err != nil { + return nil, err } + + return mergedTable, nil } diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go index c5b84625..c9607cd0 100644 --- a/synthesis/codon/codon_test.go +++ b/synthesis/codon/codon_test.go @@ -16,7 +16,7 @@ func TestTranslation(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -27,7 +27,7 @@ func TestTranslationAlwaysMapsStartCodonToMet(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "TTGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -35,23 +35,14 @@ func TestTranslationAlwaysMapsStartCodonToMet(t *testing.T) { func TestTranslationErrorsOnIncorrectStartCodon(t *testing.T) { badSequence := "GGG" - if _, gotErr := Translate(badSequence, GetCodonTable(11)); gotErr == nil { + if _, gotErr := NewTranslationTable(11).Translate(badSequence); gotErr == nil { t.Errorf("Translation should return an error if given an incorrect start codon") } } -func TestTranslationErrorsOnEmptyCodonTable(t *testing.T) { - emtpyCodonTable := codonTable{} - _, err := Translate("A", emtpyCodonTable) - - if err != errEmptyCodonTable { - t.Error("Translation should return an error if given an empty codon table") - } -} - func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := GetCodonTable(1) - _, err := Translate("", nonEmptyCodonTable) + nonEmptyCodonTable := NewTranslationTable(1) + _, err := nonEmptyCodonTable.Translate("") if err != errEmptySequenceString { t.Error("Translation should return an error if given an empty sequence string") @@ -61,7 +52,7 @@ func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { func TestTranslationMixedCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaaTTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationMixedCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -69,7 +60,7 @@ func TestTranslationMixedCase(t *testing.T) { func TestTranslationLowerCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgctacatacggaaagcttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgagtacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctctacaaataa" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationLowerCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -78,27 +69,17 @@ func TestOptimize(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + table := NewTranslationTable(11) + err := table.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + codonTable := NewTranslationTable(11) - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := Translate(optimizedSequence, optimizationTable) + optimizedSequence, _ := table.OptimizeSequence(gfpTranslation) + optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) if optimizedSequenceTranslation != gfpTranslation { t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) @@ -108,27 +89,19 @@ func TestOptimize(t *testing.T) { func TestOptimizeSameSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - var codonTable = GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - var optimizationTable = codonTable.OptimizeTable(codingRegions) randomSeed := 10 - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable, randomSeed) - otherOptimizedSequence, _ := Optimize(gfpTranslation, optimizationTable, randomSeed) + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, randomSeed) + otherOptimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, randomSeed) if optimizedSequence != otherOptimizedSequence { t.Error("Optimized sequence with the same random seed are not the same") @@ -138,44 +111,23 @@ func TestOptimizeSameSeed(t *testing.T) { func TestOptimizeDifferentSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - var codonTable = GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - var optimizationTable = codonTable.OptimizeTable(codingRegions) - - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - otherOptimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation) + otherOptimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation) if optimizedSequence == otherOptimizedSequence { t.Error("Optimized sequence with different random seed have the same result") } } -func TestOptimizeErrorsOnEmptyCodonTable(t *testing.T) { - emtpyCodonTable := codonTable{} - _, err := Optimize("A", emtpyCodonTable) - - if err != errEmptyCodonTable { - t.Error("Optimize should return an error if given an empty codon table") - } -} - func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := GetCodonTable(1) - _, err := Optimize("", nonEmptyCodonTable) + nonEmptyCodonTable := NewTranslationTable(1) + _, err := nonEmptyCodonTable.OptimizeSequence("") if err != errEmptyAminoAcidString { t.Error("Optimize should return an error if given an empty amino acid string") @@ -183,32 +135,14 @@ func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { } func TestOptimizeErrorsOnInvalidAminoAcid(t *testing.T) { aminoAcids := "TOP" - table := GetCodonTable(1) // does not contain 'O' + table := NewTranslationTable(1) // does not contain 'O' - _, optimizeErr := Optimize(aminoAcids, table) + _, optimizeErr := table.OptimizeSequence(aminoAcids) assert.EqualError(t, optimizeErr, invalidAminoAcidError{'O'}.Error()) } -func TestOptimizeErrorsOnBrokenChooser(t *testing.T) { - gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - - chooserErr := errors.New("chooser rigged to fail") - - codonTable := &mockTable{ - ChooserFn: func() (map[string]weightedRand.Chooser, error) { - return nil, chooserErr - }, - IsEmptyFn: func() bool { - return false - }, - } - - _, err := Optimize(gfpTranslation, codonTable) - assert.EqualError(t, err, chooserErr.Error()) -} - func TestGetCodonFrequency(t *testing.T) { - translationTable := GetCodonTable(11).GenerateTranslationTable() + translationTable := NewTranslationTable(11).TranslationMap var codons strings.Builder @@ -251,21 +185,6 @@ func TestGetCodonFrequency(t *testing.T) { } } -func TestChooserError(t *testing.T) { - codonTable := GetCodonTable(11) - - oldChooserFn := newChooserFn - newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { - return nil, errors.New("new chooser rigged to fail") - } - defer func() { - newChooserFn = oldChooserFn - }() - - _, err := codonTable.Chooser() - assert.EqualError(t, err, "weightedRand.NewChooser() error: new chooser rigged to fail") -} - /****************************************************************************** JSON related tests begin here. @@ -294,46 +213,23 @@ Codon Compromise + Add related tests begin here. */ func TestCompromiseCodonTable(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder + // weight our codon optimization table using the regions we collected from the genbank file above - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) - sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) - - _, err := CompromiseCodonTable(optimizationTable, optimizationTable2, -1.0) // Fails too low + _, err = CompromiseCodonTable(optimizationTable, optimizationTable2, -1.0) // Fails too low if err == nil { t.Errorf("Compromise table should fail on -1.0") } @@ -341,20 +237,53 @@ func TestCompromiseCodonTable(t *testing.T) { if err == nil { t.Errorf("Compromise table should fail on 10.0") } -} -type mockTable struct { - codonTable - ChooserFn func() (map[string]weightedRand.Chooser, error) - IsEmptyFn func() bool -} + // replace chooser fn with test one + newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, errors.New("new chooser rigged to fail") + } -func (t *mockTable) Chooser() (map[string]weightedRand.Chooser, error) { - return t.ChooserFn() + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err = CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1) + if err == nil { + t.Errorf("Compromise table should fail when new chooser func rigged") + } } -func (t *mockTable) IsEmpty() bool { - return t.IsEmptyFn() +func TestAddCodonTable(t *testing.T) { + sequence, _ := genbank.Read("../../data/puc19.gbk") + + // weight our codon optimization table using the regions we collected from the genbank file above + + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + + sequence2, _ := genbank.Read("../../data/phix174.gb") + optimizationTable2 := NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + t.Error(err) + } + + // replace chooser fn with test one + newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, errors.New("new chooser rigged to fail") + } + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err = AddCodonTable(optimizationTable, optimizationTable2) + if err == nil { + t.Errorf("Compromise table should fail when new chooser func rigged") + } } func TestCapitalizationRegression(t *testing.T) { @@ -362,29 +291,246 @@ func TestCapitalizationRegression(t *testing.T) { gfpTranslation := "MaSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, 1) + optimizedSequenceTranslation, _ := optimizationTable.Translate(optimizedSequence) - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + if optimizedSequenceTranslation != strings.ToUpper(gfpTranslation) { + t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) } +} + +func TestOptimizeSequence(t *testing.T) { + t.Parallel() + + var ( + gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" + optimisedGFP = "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" + puc19 = func() genbank.Genbank { + seq, err := genbank.Read("../../data/puc19.gbk") + if err != nil { + t.Fatal(err) + } + + return seq + }() + ) + + tests := []struct { + name string + + sequenceToOptimise string + updateWeightsWith genbank.Genbank + wantOptimised string + + wantUpdateWeightsErr error + wantOptimiseErr error + }{ + { + name: "ok", + + sequenceToOptimise: gfpTranslation, + updateWeightsWith: puc19, + wantOptimised: optimisedGFP, + + wantUpdateWeightsErr: nil, + wantOptimiseErr: nil, + }, + { + name: "giving no sequence to optimise", - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() + sequenceToOptimise: "", + updateWeightsWith: puc19, + wantOptimised: "", - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + wantUpdateWeightsErr: nil, + wantOptimiseErr: errEmptyAminoAcidString, + }, + { + name: "updating weights with a sequence with no CDS", - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := Translate(optimizedSequence, optimizationTable) + sequenceToOptimise: "", + updateWeightsWith: genbank.Genbank{}, + wantOptimised: "", - if optimizedSequenceTranslation != strings.ToUpper(gfpTranslation) { - t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) + wantUpdateWeightsErr: errNoCodingRegions, + wantOptimiseErr: errEmptyAminoAcidString, + }, + } + + for _, tt := range tests { + var tt = tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) + if !errors.Is(err, tt.wantUpdateWeightsErr) { + t.Errorf("got %v, want %v", err, tt.wantUpdateWeightsErr) + } + + got, err := optimizationTable.OptimizeSequence(tt.sequenceToOptimise, 1) + if !errors.Is(err, tt.wantOptimiseErr) { + t.Errorf("got %v, want %v", err, tt.wantOptimiseErr) + } + + if !cmp.Equal(got, tt.wantOptimised) { + t.Errorf("got and tt.wantOptimised didn't match %s", cmp.Diff(got, tt.wantOptimised)) + } + }) + } +} + +func TestNewAminoAcidChooser(t *testing.T) { + var ( + mockError = errors.New("new chooser rigged to fail") + ) + + tests := []struct { + name string + + aminoAcids []AminoAcid + + chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) + + wantErr error + }{ + { + name: "ok", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: weightedRand.NewChooser, + + wantErr: nil, + }, + { + name: "chooser fn constructor error", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, mockError + }, + + wantErr: mockError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // replace chooser fn with test one + newChooserFn = tt.chooserFn + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err := newAminoAcidChoosers(tt.aminoAcids) + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + }) + } +} + +func TestUpdateWeights(t *testing.T) { + var ( + mockError = errors.New("new chooser rigged to fail") + ) + + tests := []struct { + name string + + aminoAcids []AminoAcid + + chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) + + wantErr error + }{ + { + name: "ok", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: weightedRand.NewChooser, + + wantErr: nil, + }, + { + name: "chooser fn constructor error", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, mockError + }, + + wantErr: mockError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // replace chooser fn with test one + newChooserFn = tt.chooserFn + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + optimizationTable := NewTranslationTable(11) + + err := optimizationTable.UpdateWeights(tt.aminoAcids) + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + }) } } diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go index ce5f9061..0374a8f2 100644 --- a/synthesis/codon/example_test.go +++ b/synthesis/codon/example_test.go @@ -3,69 +3,89 @@ package codon_test import ( "fmt" "os" - "strings" "github.com/TimothyStiles/poly/io/genbank" "github.com/TimothyStiles/poly/synthesis/codon" ) -func ExampleTranslate() { +func ExampleTranslationTable_Translate() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - testTranslation, _ := codon.Translate(gfpDnaSequence, codon.GetCodonTable(11)) // need to specify which codons map to which amino acids per NCBI table + testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table fmt.Println(gfpTranslation == testTranslation) // output: true } -func ExampleOptimize() { +func ExampleTranslationTable_UpdateWeights() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - - sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // initiate genes - genes := 0 - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - // Note: sometimes, genbank files will have annotated CDSs that are pseudo genes (not having triplet codons). - // This will shift the entire codon table, messing up the end results. To fix this, make sure to do a modulo - // check. - if len(sequence)%3 == 0 { - codingRegionsBuilder.WriteString(sequence) - - // Another good double check is to count genes, then count stop codons. - genes++ - } - } + sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" + + table := codon.NewTranslationTable(11) + + // this example is using custom weights for different codons for Arginine. Use this if you would rather use your own + // codon weights, they can also be computed for you with `UpdateWeightsWithSequence`. + + err := table.UpdateWeights([]codon.AminoAcid{ + { + Letter: "R", + Codons: []codon.Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + { + Triplet: "CGA", + Weight: 2, + }, + { + Triplet: "CGG", + Weight: 4, + }, + { + Triplet: "AGA", + Weight: 6, + }, + { + Triplet: "AGG", + Weight: 2, + }, + }, + }, + }) + if err != nil { + fmt.Println("Could not update weights in example") } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() + optimizedSequence, _ := table.OptimizeSequence(gfpTranslation, 1) - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + fmt.Println(optimizedSequence == sequenceWithCustomWeights) + // output: true +} + +func ExampleTranslationTable_OptimizeSequence() { + gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" + + sequence, _ := genbank.Read("../../data/puc19.gbk") + codonTable := codon.NewTranslationTable(11) + _ = codonTable.UpdateWeightsWithSequence(sequence) // Here, we double check if the number of genes is equal to the number of stop codons stopCodonCount := 0 - for _, aa := range optimizationTable.GetAminoAcids() { + for _, aa := range codonTable.AminoAcids { if aa.Letter == "*" { for _, codon := range aa.Codons { stopCodonCount = stopCodonCount + codon.Weight } } } - if stopCodonCount != genes { + + if stopCodonCount != codonTable.Stats.GeneCount { fmt.Println("Stop codons don't equal number of genes!") } - optimizedSequence, _ := codon.Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := codon.Translate(optimizedSequence, optimizationTable) + optimizedSequence, _ := codonTable.OptimizeSequence(gfpTranslation) + optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) fmt.Println(optimizedSequenceTranslation == gfpTranslation) // output: true @@ -74,7 +94,7 @@ func ExampleOptimize() { func ExampleReadCodonJSON() { codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json") - fmt.Println(codontable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } @@ -82,7 +102,7 @@ func ExampleParseCodonJSON() { file, _ := os.ReadFile("../../data/bsub_codon_test.json") codontable := codon.ParseCodonJSON(file) - fmt.Println(codontable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } @@ -94,52 +114,29 @@ func ExampleWriteCodonJSON() { // cleaning up test data os.Remove("../../data/codon_test.json") - fmt.Println(testCodonTable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(testCodonTable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } func ExampleCompromiseCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } - } - - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + optimizationTable := codon.NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) + } sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := codon.GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := codon.NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) - finalTable, _ := codon.CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1) - for _, aa := range finalTable.GetAminoAcids() { + for _, aa := range finalTable.GetWeightedAminoAcids() { for _, codon := range aa.Codons { if codon.Triplet == "TAA" { fmt.Println(codon.Weight) @@ -151,47 +148,27 @@ func ExampleCompromiseCodonTable() { func ExampleAddCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } - } - - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + optimizationTable := codon.NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) + } sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := codon.NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) + finalTable, err := codon.AddCodonTable(optimizationTable, optimizationTable2) + if err != nil { + panic(fmt.Errorf("got error in adding codon table example: %w", err)) + } - finalTable := codon.AddCodonTable(optimizationTable, optimizationTable2) - for _, aa := range finalTable.GetAminoAcids() { + for _, aa := range finalTable.AminoAcids { for _, codon := range aa.Codons { if codon.Triplet == "GGC" { fmt.Println(codon.Weight) diff --git a/synthesis/fix/synthesis.go b/synthesis/fix/synthesis.go index 4aa50767..ad4e1663 100644 --- a/synthesis/fix/synthesis.go +++ b/synthesis/fix/synthesis.go @@ -238,7 +238,7 @@ func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []fun // Build historical maps and full amino acid weights aminoAcidWeightTable := make(map[string]int) - for _, aminoAcid := range codontable.GetAminoAcids() { + for _, aminoAcid := range codontable.GetWeightedAminoAcids() { var aminoAcidTotal int for _, codon := range aminoAcid.Codons { // Get the total weights of all the codons for a given amino acid. @@ -271,7 +271,7 @@ func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []fun // Build weight map. The weight map gives the relative normalized weight of // any given codon triplet. - for _, aminoAcid := range codontable.GetAminoAcids() { + for _, aminoAcid := range codontable.GetWeightedAminoAcids() { for _, codon := range aminoAcid.Codons { codonWeightRatio := float64(codon.Weight) / float64(aminoAcidWeightTable[aminoAcid.Letter]) normalizedCodonWeight := 100 * codonWeightRatio diff --git a/synthesis/fix/synthesis_test.go b/synthesis/fix/synthesis_test.go index 9e274d95..7d6ceb99 100644 --- a/synthesis/fix/synthesis_test.go +++ b/synthesis/fix/synthesis_test.go @@ -40,7 +40,7 @@ func BenchmarkCds(b *testing.B) { var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) for i := 0; i < b.N; i++ { - seq, _ := codon.Optimize(phusion, codonTable) + seq, _ := codonTable.OptimizeSequence(phusion) optimizedSeq, changes, err := Cds(seq, codonTable, functions) if err != nil { b.Errorf("Failed to fix phusion with error: %s", err) @@ -76,7 +76,7 @@ func TestCds(t *testing.T) { phusion := "MGHHHHHHHHHHSSGILDVDYITEEGKPVIRLFKKENGKFKIEHDRTFRPYIYALLRDDSKIEEVKKITGERHGKIVRIVDVEKVEKKFLGKPITVWKLYLEHPQDVPTIREKVREHPAVVDIFEYDIPFAKRYLIDKGLIPMEGEEELKILAFDIETLYHEGEEFGKGPIIMISYADENEAKVITWKNIDLPYVEVVSSEREMIKRFLRIIREKDPDIIVTYNGDSFDFPYLAKRAEKLGIKLTIGRDGSEPKMQRIGDMTAVEVKGRIHFDLYHVITRTINLPTYTLEAVYEAIFGKPKEKVYADEIAKAWESGENLERVAKYSMEDAKATYELGKEFLPMEIQLSRLVGQPLWDVSRSSTGNLVEWFLLRKAYERNEVAPNKPSEEEYQRRLRESYTGGFVKEPEKGLWENIVYLDFRALYPSIIITHNVSPDTLNLEGCKNYDIAPQVGHKFCKDIPGFIPSLLGHLLEERQKIKTKMKETQDPIEKILLDYRQKAIKLLANSFYGYYGYAKARWYCKECAESVTAWGRKYIELVWKELEEKFGFKVLYIDTDGLYATIPGGESEEIKKKALEFVKYINSKLPGLLELEYEGFYKRGFFVTKKRYAVIDEEGKVITRGLEIVRRDWSEIAKETQARVLETILKHGDVEEAVRIVKEVIQKLANYEIPPEKLAIYEQITRPLHEYKAIGPHVAVAKKLAAKGVKIKPGMVIGYIVLRGDGPISNRAILAEEYDPKKHKYDAEYYIENQVLPAVLRILEGFGYRKEDLRYQKTRQVGLTSWLNIKKSGTGGGGATVKFKYKGEEKEVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEKQKK*" var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) - seq, _ := codon.Optimize(phusion, codonTable) + seq, _ := codonTable.OptimizeSequence(phusion) optimizedSeq, _, err := Cds(seq, codonTable, functions) if err != nil { t.Errorf("Failed with error: %s", err)