Skip to content

Commit

Permalink
Add FragmentWithOverhangs (#387)
Browse files Browse the repository at this point in the history
* Add FragmentWithOverhangs

* Fragment naming updated

---------

Co-authored-by: Tim <[email protected]>
  • Loading branch information
Koeng101 and TimothyStiles authored Nov 15, 2023
1 parent 96b5cff commit 489cf97
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305)
- Added a parser and writer for the `pileup` sequence alignment format (#329)
- Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350)
- Added option to fragmenter to fragment with only certain overhangs (#387)




### Fixed
- `fastq` parser no longer becomes de-aligned when reading (#325)
Expand Down
40 changes: 30 additions & 10 deletions synthesis/fragment/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ func NextOverhang(currentOverhangs []string) string {
}

// optimizeOverhangIteration takes in a sequence and optimally fragments it.
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) {
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
// If the sequence is smaller than maxFragment size, stop iteration.
if len(sequence) < maxFragmentSize {
existingFragments = append(existingFragments, sequence)
return existingFragments, SetEfficiency(existingOverhangs), nil
return existingFragments, SetEfficiency(excludeOverhangs), nil
}

// Make sure minFragmentSize > maxFragmentSize
Expand Down Expand Up @@ -136,23 +136,35 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
var bestOverhangEfficiency float64
var bestOverhangPosition int
var alreadyExists bool
var buildAvailable bool
for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ {
// We go from max -> min, so we can maximize the size of our fragments
overhangPosition := maxFragmentSize - overhangOffset
overhangToTest := sequence[overhangPosition-4 : overhangPosition]

// Make sure overhang isn't already in set
alreadyExists = false
for _, existingOverhang := range existingOverhangs {
if existingOverhang == overhangToTest || transform.ReverseComplement(existingOverhang) == overhangToTest {
for _, excludeOverhang := range excludeOverhangs {
if excludeOverhang == overhangToTest || transform.ReverseComplement(excludeOverhang) == overhangToTest {
alreadyExists = true
}
}
if !alreadyExists {
// Make sure overhang is in set of includeOverhangs. If includeOverhangs is
// blank, skip this check.
buildAvailable = false
if len(includeOverhangs) == 0 {
buildAvailable = true
}
for _, includeOverhang := range includeOverhangs {
if includeOverhang == overhangToTest || transform.ReverseComplement(includeOverhang) == overhangToTest {
buildAvailable = true
}
}
if !alreadyExists && buildAvailable {
// See if this overhang is a palindrome
if !checks.IsPalindromic(overhangToTest) {
// Get this overhang set's efficiency
setEfficiency := SetEfficiency(append(existingOverhangs, overhangToTest))
setEfficiency := SetEfficiency(append(excludeOverhangs, overhangToTest))

// If this overhang is more efficient than any other found so far, set it as the best!
if setEfficiency > bestOverhangEfficiency {
Expand All @@ -167,16 +179,24 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
return []string{}, float64(0), fmt.Errorf("bestOverhangPosition failed by equaling zero")
}
existingFragments = append(existingFragments, sequence[:bestOverhangPosition])
existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
excludeOverhangs = append(excludeOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
sequence = sequence[bestOverhangPosition-4:]
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, excludeOverhangs, includeOverhangs)
}

// Fragment fragments a sequence into fragments between the min and max size,
// choosing fragment ends for optimal assembly efficiency. Since fragments will
// be inserted into either a vector or primer binding sites, the first 4 and
// last 4 base pairs are the initial overhang set.
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) {
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string) ([]string, float64, error) {
sequence = strings.ToUpper(sequence)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), []string{})
}

// FragmentWithOverhangs fragments a sequence with only a certain overhang set.
// This is useful if you are constraining the set of possible overhangs when
// doing more advanced forms of cloning.
func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
sequence = strings.ToUpper(sequence)
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...))
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), includeOverhangs)
}
10 changes: 10 additions & 0 deletions synthesis/fragment/fragment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,13 @@ func TestRegressionTestMatching12(t *testing.T) {
t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency)
}
}

func TestFragmentWithOverhangs(t *testing.T) {
defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"}
gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa"

_, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs)
if err != nil {
t.Errorf(err.Error())
}
}

0 comments on commit 489cf97

Please sign in to comment.