Last active
April 6, 2021 15:18
-
-
Save k3zi/883bbbf2ea171465c360bb35056a1829 to your computer and use it in GitHub Desktop.
Hirschberg Sequence Alignment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
enum Match: Equatable { | |
case indexAndValue(Int, Character) | |
case missing | |
} | |
enum Hirschberg { | |
static func align<T>(input1 seq1: [T], input2 seq2: [T], match: Int = 5, substitution: Int = -3, gap: Int = -2, offset1: Int = 0, offset2: Int = 0) -> (output1: [Match<T>], output2: [Match<T>]) where T: Equatable { | |
let substitution = -substitution + match | |
let gap = -gap + match | |
var output1 = [Match<T>]() | |
var output2 = [Match<T>]() | |
let m = max(seq1.count, seq2.count) | |
output1.reserveCapacity(m) | |
output2.reserveCapacity(m) | |
var fwd: [[Int]] = [Array(repeating: 0, count: m + 1), Array(repeating: 0, count: m + 1)] | |
var rev: [[Int]] = [Array(repeating: 0, count: m + 1), Array(repeating: 0, count: m + 1)] | |
func forwardAlg(p1: Int, p2: Int, q1: Int, q2: Int) { | |
fwd[p1 % 2][q1] = 0 | |
for j in (q1 + 1)...q2 { | |
fwd[p1 % 2][j] = fwd[p1 % 2][j - 1] + gap | |
} | |
for i in (p1 + 1)...p2 { | |
fwd[i % 2][q1] = fwd[(i - 1) % 2][q1] + gap | |
for j in (q1 + 1)...q2 { | |
var diag = fwd[(i - 1) % 2][j - 1] | |
if seq1[i - 1] != seq2[j - 1] { | |
diag += substitution | |
} | |
fwd[i % 2][j] = min(diag, min(fwd[(i - 1) % 2][j] + gap, fwd[i % 2][j - 1] + gap)) | |
} | |
} | |
} | |
func reverseAlg(p1: Int, p2: Int, q1: Int, q2: Int) { | |
rev[p2 % 2][q2] = 0 | |
for j in stride(from: q2 - 1, through: q1, by: -1) { | |
rev[p2 % 2][j] = rev[p2 % 2][j + 1] + gap | |
} | |
for i in stride(from: p2 - 1, through: p1, by: -1) { | |
rev[i % 2][q2] = rev[(i + 1) % 2][q2] + gap | |
for j in stride(from: q2 - 1, through: q1, by: -1) { | |
var diag = rev[(i + 1) % 2][j + 1] | |
if seq1[i] != seq2[j] { | |
diag += substitution | |
} | |
rev[i % 2][j] = min(diag, min(rev[(i + 1) % 2][j] + gap, rev[i % 2][j + 1] + gap)) | |
} | |
} | |
} | |
func align(p1: Int, p2: Int, q1: Int, q2: Int) { | |
if p2 <= p1 { | |
for i in q1..<q2 { | |
output1.append(.missing) | |
output2.append(.indexAndValue(i + offset2, seq2[i])) | |
} | |
} else if q2 <= q1 { | |
for i in p1..<p2 { | |
output1.append(.indexAndValue(i + offset1, seq1[i])) | |
output2.append(.missing) | |
} | |
} else if p2 - 1 == p1 { | |
let ch = seq1[p1] | |
var memo = q1 | |
for i in (q1 + 1)..<q2 { | |
if seq2[i] == ch { | |
memo = i | |
} | |
} | |
for i in q1..<q2 { | |
if i == memo { | |
output1.append(.indexAndValue(p1 + offset1, ch)) | |
} else { | |
output1.append(.missing) | |
} | |
output2.append(.indexAndValue(i + offset2, seq2[i])) | |
} | |
} else { | |
let mid = (p1 + p2) / 2 | |
let queue1 = DispatchQueue(label: UUID().uuidString) | |
let queue2 = DispatchQueue(label: UUID().uuidString) | |
let group = DispatchGroup() | |
queue1.async(group: group) { | |
forwardAlg(p1: p1, p2: mid, q1: q1, q2: q2) | |
} | |
queue2.async(group: group) { | |
reverseAlg(p1: mid, p2: p2, q1: q1, q2: q2) | |
} | |
group.wait() | |
var s2mid = q1 | |
var best = Int.max | |
for i in q1...q2 { | |
let sum = fwd[mid % 2][i] + rev[mid % 2][i] | |
if sum < best { | |
best = sum | |
s2mid = i | |
} | |
} | |
align(p1: p1, p2: mid, q1: q1, q2: s2mid) | |
align(p1: mid, p2: p2, q1: s2mid, q2: q2) | |
} | |
} | |
align(p1: 0, p2: seq1.count, q1: 0, q2: seq2.count) | |
return (output1, output2) | |
} | |
} | |
let a = "ACGTACGTACGT".flatMap { $0.unicodeScalars } | |
let b = "AGTACCTACCGT".flatMap { $0.unicodeScalars } | |
let result = Hirschberg.partialAlign(input1: a, input2: b, match: 1, substitution: -1, gap: -1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment