Skip to content

Commit

Permalink
Alphabets and Substitution Matrices (#294)
Browse files Browse the repository at this point in the history
* Add alphabet package

* Add matrix package

* improve tests

* use default matrix

* making score part of Scoring

* Delete golangci-lint.yml

---------

Co-authored-by: Timothy Stiles <[email protected]>
Co-authored-by: Tim <[email protected]>
  • Loading branch information
3 people authored Apr 8, 2023
1 parent 3851988 commit 7674eea
Show file tree
Hide file tree
Showing 9 changed files with 5,457 additions and 87 deletions.
38 changes: 0 additions & 38 deletions .github/workflows/golangci-lint.yml

This file was deleted.

70 changes: 45 additions & 25 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,40 @@ Tim
*/
package align

import (
"github.com/TimothyStiles/poly/align/matrix"
)

// Scoring is a struct that holds the scoring matrix for match, mismatch, and gap penalties.
type Scoring struct {
Match int
Mismatch int
GapPenalty int
SubstitutionMatrix *matrix.SubstitutionMatrix
GapPenalty int
}

// NewScoring returns a new Scoring struct with default values.
func NewScoring() Scoring {
// NewScoring returns a new Scoring struct with default values for DNA.
func NewScoring(substitutionMatrix *matrix.SubstitutionMatrix, gapPenalty int) (Scoring, error) {

if substitutionMatrix == nil {
substitutionMatrix = matrix.Default
}
return Scoring{
Match: 1,
Mismatch: -1,
GapPenalty: -1,
SubstitutionMatrix: substitutionMatrix,
GapPenalty: gapPenalty,
}, nil
}

func (s Scoring) Score(a, b byte) (int, error) {
matchScore, err := s.SubstitutionMatrix.Score(string(a), string(b))
if err != nil {
return 0, err
}
return matchScore, nil
}

// NeedlemanWunsch performs global alignment between two strings using the Needleman-Wunsch algorithm.
// It returns the final score and the optimal alignments of the two strings in O(nm) time and O(nm) space.
// https://en.wikipedia.org/wiki/Needleman-Wunsch_algorithm
func NeedlemanWunsch(stringA string, stringB string, scoring Scoring) (int, string, string) {
func NeedlemanWunsch(stringA string, stringB string, scoring Scoring) (int, string, string, error) {

// Get the M and N dimensions of the matrix. The M x N matrix is standard linear algebra notation.
// But I added columns and rows to the variable name to make it more clear what the dimensions are.
Expand Down Expand Up @@ -106,8 +120,10 @@ func NeedlemanWunsch(stringA string, stringB string, scoring Scoring) (int, stri
for columnM := 1; columnM <= columnLengthM; columnM++ {
for rowN := 1; rowN <= rowLengthN; rowN++ {
// Calculate the scores for scoring.Match/mismatch and gap.
var score int = score(stringA[columnM-1], stringB[rowN-1], scoring)

var score, err = scoring.Score(stringA[columnM-1], stringB[rowN-1])
if err != nil {
return 0, "", "", err
}
matrix[columnM][rowN] = max(
matrix[columnM-1][rowN-1]+score,
max(matrix[columnM-1][rowN]+scoring.GapPenalty, matrix[columnM][rowN-1]+scoring.GapPenalty),
Expand All @@ -119,7 +135,11 @@ func NeedlemanWunsch(stringA string, stringB string, scoring Scoring) (int, stri
var alignA, alignB []rune
columnM, rowN := columnLengthM, rowLengthN
for columnM > 0 && rowN > 0 {
if matrix[columnM][rowN] == matrix[columnM-1][rowN-1]+score(stringA[columnM-1], stringB[rowN-1], scoring) {
var matchScore, err = scoring.Score(stringA[columnM-1], stringB[rowN-1])
if err != nil {
return 0, "", "", err
}
if matrix[columnM][rowN] == matrix[columnM-1][rowN-1]+matchScore {
alignA = append(alignA, rune(stringA[columnM-1]))
alignB = append(alignB, rune(stringB[rowN-1]))
columnM--
Expand All @@ -138,13 +158,13 @@ func NeedlemanWunsch(stringA string, stringB string, scoring Scoring) (int, stri
// Reverse the alignments to get the optimal alignment.
alignA = reverseRuneArray(alignA)
alignB = reverseRuneArray(alignB)
return matrix[columnLengthM][rowLengthN], string(alignA), string(alignB)
return matrix[columnLengthM][rowLengthN], string(alignA), string(alignB), nil
}

// SmithWaterman performs local alignment between two strings using the Smith-Waterman algorithm.
// It returns the max score and optimal local alignments between two strings alignments of the two strings in O(nm) time and O(nm) space.
// https://en.wikipedia.org/wiki/Smith-Waterman_algorithm
func SmithWaterman(stringA string, stringB string, scoring Scoring) (int, string, string) {
func SmithWaterman(stringA string, stringB string, scoring Scoring) (int, string, string, error) {

columnLengthM, rowLengthN := len(stringA), len(stringB)

Expand All @@ -162,7 +182,11 @@ func SmithWaterman(stringA string, stringB string, scoring Scoring) (int, string
// Fill the alignment matrix
for columnM := 1; columnM <= columnLengthM; columnM++ {
for rowN := 1; rowN <= rowLengthN; rowN++ {
diagScore := matrix[columnM-1][rowN-1] + score(stringA[columnM-1], stringB[rowN-1], scoring)
var matchScore, err = scoring.Score(stringA[columnM-1], stringB[rowN-1])
if err != nil {
return 0, "", "", err
}
diagScore := matrix[columnM-1][rowN-1] + matchScore
upScore := matrix[columnM-1][rowN] + scoring.GapPenalty
leftScore := matrix[columnM][rowN-1] + scoring.GapPenalty
matrix[columnM][rowN] = max(0, max(diagScore, max(upScore, leftScore)))
Expand All @@ -181,7 +205,11 @@ func SmithWaterman(stringA string, stringB string, scoring Scoring) (int, string
columnM := maxScoreRow
rowN := maxScoreCol
for matrix[columnM][rowN] > 0 {
if matrix[columnM][rowN] == matrix[columnM-1][rowN-1]+score(stringA[columnM-1], stringB[rowN-1], scoring) {
var matchScore, err = scoring.Score(stringA[columnM-1], stringB[rowN-1])
if err != nil {
return 0, "", "", err
}
if matrix[columnM][rowN] == matrix[columnM-1][rowN-1]+matchScore {
alignA = string(stringA[columnM-1]) + alignA
alignB = string(stringB[rowN-1]) + alignB
columnM--
Expand All @@ -197,15 +225,7 @@ func SmithWaterman(stringA string, stringB string, scoring Scoring) (int, string
}
}

return maxScore, alignA, alignB
}

func score(a, b byte, scoring Scoring) int {
if a == b {
return scoring.Match
} else {
return scoring.Mismatch
}
return maxScore, alignA, alignB, nil
}

func reverseRuneArray(runes []rune) []rune { // wasn't able to find a built-in reverse function for runes
Expand Down
Loading

0 comments on commit 7674eea

Please sign in to comment.