use serde_derive::{Deserialize, Serialize};
use std::hash::Hash;
use std::fmt;
pub mod kmer;
pub mod dna_string;
pub mod graph;
pub mod vmer;
pub mod msp;
pub mod filter;
pub mod compression;
pub mod clean_graph;
pub mod test;
#[inline]
pub fn bits_to_ascii(c: u8) -> u8 {
match c {
0u8 => 'A' as u8,
1u8 => 'C' as u8,
2u8 => 'G' as u8,
3u8 => 'T' as u8,
_ => 'X' as u8,
}
}
#[inline]
pub fn base_to_bits(c: u8) -> u8 {
match c {
b'A' | b'a' => 0u8,
b'C' | b'c' => 1u8,
b'G' | b'g' => 2u8,
b'T' | b't' => 3u8,
_ => 0u8,
}
}
#[inline]
pub fn dna_only_base_to_bits(c: u8) -> Option<u8> {
match c {
b'A' | b'a' => Some(0u8),
b'C' | b'c' => Some(1u8),
b'G' | b'g' => Some(2u8),
b'T' | b't' => Some(3u8),
_ => None,
}
}
#[inline]
pub fn is_valid_base(c: u8) -> bool {
match c {
b'A' | b'C' | b'G' | b'T' => true,
b'a' | b'c' | b'g' | b't' => true,
_ => false,
}
}
#[inline]
pub fn bits_to_base(c: u8) -> char {
match c {
0u8 => 'A',
1u8 => 'C',
2u8 => 'G',
3u8 => 'T',
_ => 'X',
}
}
#[inline(always)]
pub fn complement(base: u8) -> u8 {
(!base) & 0x3u8
}
pub trait Mer: Sized + fmt::Debug {
fn len(&self) -> usize;
fn get(&self, pos: usize) -> u8;
fn set_mut(&mut self, pos: usize, val: u8);
fn set_slice_mut(&mut self, pos: usize, nbases: usize, value: u64);
fn rc(&self) -> Self;
fn iter<'a>(&'a self) -> MerIter<'a, Self> {
MerIter {
sequence: self,
i: 0,
}
}
}
pub struct MerIter<'a, M: 'a + Mer> {
sequence: &'a M,
i: usize,
}
impl<'a, M: 'a + Mer> Iterator for MerIter<'a, M> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
if self.i < self.sequence.len() {
let value = self.sequence.get(self.i);
self.i += 1;
Some(value)
} else {
None
}
}
}
pub trait Kmer: Mer + Sized + Copy + PartialEq + PartialOrd + Eq + Ord + Hash {
fn empty() -> Self;
fn k() -> usize;
fn to_u64(&self) -> u64;
fn from_u64(value: u64) -> Self;
fn extend_left(&self, v: u8) -> Self;
fn extend_right(&self, v: u8) -> Self;
fn extend(&self, v: u8, dir: Dir) -> Self {
match dir {
Dir::Left => self.extend_left(v),
Dir::Right => self.extend_right(v),
}
}
fn get_extensions(&self, exts: Exts, dir: Dir) -> Vec<Self> {
let ext_bases = exts.get(dir);
ext_bases
.iter()
.map(|b| self.extend(b.clone(), dir))
.collect()
}
fn min_rc_flip(&self) -> (Self, bool) {
let rc = self.rc();
if *self < rc {
(self.clone(), false)
} else {
(rc, true)
}
}
fn min_rc(&self) -> Self {
let rc = self.rc();
if *self < rc { self.clone() } else { rc }
}
fn is_palindrome(&self) -> bool {
self.len() % 2 == 0 && *self == self.rc()
}
fn from_bytes(bytes: &[u8]) -> Self {
if bytes.len() < Self::k() {
panic!("bytes not long enough to form kmer")
}
let mut k0 = Self::empty();
for i in 0..Self::k() {
k0.set_mut(i, bytes[i])
}
k0
}
fn from_ascii(bytes: &[u8]) -> Self {
if bytes.len() < Self::k() {
panic!("bytes not long enough to form kmer")
}
let mut k0 = Self::empty();
for i in 0..Self::k() {
k0.set_mut(i, base_to_bits(bytes[i]))
}
k0
}
fn to_string(&self) -> String {
let mut s = String::new();
for pos in 0..self.len() {
s.push(bits_to_base(self.get(pos)))
}
s
}
fn kmers_from_bytes(str: &[u8]) -> Vec<Self> {
let mut r = Vec::new();
if str.len() < Self::k() {
return r;
}
let mut k0 = Self::empty();
for i in 0..Self::k() {
k0.set_mut(i, str[i]);
}
r.push(k0.clone());
for i in Self::k()..str.len() {
k0 = k0.extend_right(str[i]);
r.push(k0.clone());
}
r
}
fn kmers_from_ascii(str: &[u8]) -> Vec<Self> {
let mut r = Vec::new();
if str.len() < Self::k() {
return r;
}
let mut k0 = Self::empty();
for i in 0..Self::k() {
k0.set_mut(i, base_to_bits(str[i]));
}
r.push(k0.clone());
for i in Self::k()..str.len() {
k0 = k0.extend_right(base_to_bits(str[i]));
r.push(k0.clone());
}
r
}
}
pub trait MerImmut: Mer + Clone {
fn set(&self, pos: usize, val: u8) -> Self {
let mut new = self.clone();
new.set_mut(pos, val);
new
}
fn set_slice(&self, pos: usize, nbases: usize, bits: u64) -> Self {
let mut new = self.clone();
new.set_slice_mut(pos, nbases, bits);
new
}
}
impl<T> MerImmut for T
where
T: Mer + Clone,
{
}
pub trait Vmer: Mer + PartialEq + Eq {
fn new(len: usize) -> Self;
fn max_len() -> usize;
fn from_slice(seq: &[u8]) -> Self {
let mut vmer = Self::new(seq.len());
for i in 0..seq.len() {
vmer.set_mut(i, seq[i]);
}
vmer
}
fn get_kmer<K: Kmer>(&self, pos: usize) -> K;
fn first_kmer<K: Kmer>(&self) -> K {
self.get_kmer(0)
}
fn last_kmer<K: Kmer>(&self) -> K {
self.get_kmer(self.len() - K::k())
}
fn both_term_kmer<K: Kmer>(&self) -> (K, K) {
(self.first_kmer(), self.last_kmer())
}
fn term_kmer<K: Kmer>(&self, dir: Dir) -> K {
match dir {
Dir::Left => self.first_kmer(),
Dir::Right => self.last_kmer(),
}
}
fn iter_kmers<K: Kmer>(&self) -> KmerIter<'_, K, Self> {
let kmer = if self.len() >= K::k() {
self.first_kmer()
} else {
K::empty()
};
KmerIter {
bases: self,
kmer: kmer,
pos: K::k(),
}
}
fn iter_kmer_exts<K: Kmer>(&self, seq_exts: Exts) -> KmerExtsIter<'_, K, Self> {
let kmer = if self.len() >= K::k() {
self.first_kmer()
} else {
K::empty()
};
KmerExtsIter {
bases: self,
exts: seq_exts,
kmer: kmer,
pos: K::k(),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub struct DnaBytes(pub Vec<u8>);
impl Mer for DnaBytes {
fn len(&self) -> usize {
self.0.len()
}
fn get(&self, pos: usize) -> u8 {
self.0[pos]
}
fn set_mut(&mut self, pos: usize, val: u8) {
self.0[pos] = val
}
fn set_slice_mut(&mut self, _pos: usize, _nbases: usize, _value: u64) {
unimplemented!();
}
fn rc(&self) -> Self {
unimplemented!();
}
}
impl Vmer for DnaBytes {
fn new(len: usize) -> Self {
DnaBytes(vec![0u8; len])
}
fn max_len() -> usize {
1<<48
}
fn get_kmer<K: Kmer>(&self, pos: usize) -> K {
K::from_bytes(&self.0[pos..pos + K::k()])
}
}
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct DnaSlice<'a>(pub &'a [u8]);
impl<'a> Mer for DnaSlice<'a> {
fn len(&self) -> usize {
self.0.len()
}
fn get(&self, pos: usize) -> u8 {
self.0[pos]
}
fn set_mut(&mut self, _pos: usize, _val: u8) {
unimplemented!()
}
fn set_slice_mut(&mut self, _pos: usize, _nbases: usize, _value: u64) {
unimplemented!();
}
fn rc(&self) -> Self {
unimplemented!();
}
}
impl<'a> Vmer for DnaSlice<'a> {
fn new(_len: usize) -> Self {
unimplemented!();
}
fn max_len() -> usize {
1<<48
}
fn get_kmer<K: Kmer>(&self, pos: usize) -> K {
K::from_bytes(&self.0[pos..pos + K::k()])
}
}
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub enum Dir {
Left,
Right,
}
impl Dir {
pub fn flip(&self) -> Dir {
match *self {
Dir::Left => Dir::Right,
Dir::Right => Dir::Left,
}
}
pub fn cond_flip(&self, do_flip: bool) -> Dir {
if do_flip { self.flip() } else { *self }
}
pub fn pick<T>(&self, if_left: T, if_right: T) -> T {
match self {
&Dir::Left => if_left,
&Dir::Right => if_right,
}
}
}
#[derive(Eq, PartialEq, Copy, Clone, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub struct Exts {
pub val: u8,
}
impl Exts {
pub fn new(val: u8) -> Self {
Exts { val: val }
}
pub fn empty() -> Exts {
Exts { val: 0u8 }
}
pub fn from_single_dirs(left: Exts, right: Exts) -> Exts {
Exts { val: (right.val << 4) | (left.val & 0xf) }
}
pub fn merge(left: Exts, right: Exts) -> Exts {
Exts { val: left.val & 0x0f | right.val & 0xf0 }
}
pub fn add(&self, v: Exts) -> Exts {
Exts { val: self.val | v.val }
}
pub fn set(&self, dir: Dir, pos: u8) -> Exts {
let shift = pos +
match dir {
Dir::Right => 4,
Dir::Left => 0,
};
let new_val = self.val | (1u8 << shift);
Exts { val: new_val }
}
#[inline]
fn dir_bits(&self, dir: Dir) -> u8 {
match dir {
Dir::Right => self.val >> 4,
Dir::Left => self.val & 0xf,
}
}
pub fn get(&self, dir: Dir) -> Vec<u8> {
let bits = self.dir_bits(dir);
let mut v = Vec::new();
for i in 0..4 {
if bits & (1 << i) > 0 {
v.push(i);
}
}
v
}
pub fn has_ext(&self, dir: Dir, base: u8) -> bool {
let bits = self.dir_bits(dir);
(bits & (1 << base)) > 0
}
pub fn from_slice_bounds(src: &[u8], start: usize, length: usize) -> Exts {
let l_extend = if start > 0 {
1u8 << (src[start - 1])
} else {
0u8
};
let r_extend = if start + length < src.len() {
1u8 << src[start + length]
} else {
0u8
};
Exts { val: (r_extend << 4) | l_extend }
}
pub fn from_dna_string(src: &dna_string::DnaString,
start: usize, length: usize) -> Exts {
let l_extend = if start > 0 {
1u8 << (src.get(start - 1))
} else {
0u8
};
let r_extend = if start + length < src.len() {
1u8 << src.get(start + length)
} else {
0u8
};
Exts { val: (r_extend << 4) | l_extend }
}
pub fn num_exts_l(&self) -> u8 {
self.num_ext_dir(Dir::Left)
}
pub fn num_exts_r(&self) -> u8 {
self.num_ext_dir(Dir::Right)
}
pub fn num_ext_dir(&self, dir: Dir) -> u8 {
let e = self.dir_bits(dir);
((e & 1u8) >> 0) + ((e & 2u8) >> 1) + ((e & 4u8) >> 2) + ((e & 8u8) >> 3)
}
pub fn mk_left(base: u8) -> Exts {
Exts::empty().set(Dir::Left, base)
}
pub fn mk_right(base: u8) -> Exts {
Exts::empty().set(Dir::Right, base)
}
pub fn mk(left_base: u8, right_base: u8) -> Exts {
Exts::merge(Exts::mk_left(left_base), Exts::mk_right(right_base))
}
pub fn get_unique_extension(&self, dir: Dir) -> Option<u8> {
if self.num_ext_dir(dir) != 1 {
None
} else {
let e = self.dir_bits(dir);
for i in 0..4 {
if (e & (1 << i)) > 0 {
return Some(i);
}
}
None
}
}
pub fn single_dir(&self, dir: Dir) -> Exts {
match dir {
Dir::Right => Exts { val: self.val >> 4 },
Dir::Left => Exts { val: self.val & 0xfu8 },
}
}
pub fn complement(&self) -> Exts {
let v = self.val;
let mut r = (v & 0x55u8) << 1 | ((v >> 1) & 0x55u8);
r = (r & 0x33u8) << 2 | ((r >> 2) & 0x33u8);
Exts { val: r }
}
pub fn reverse(&self) -> Exts {
let v = self.val;
let r = (v & 0xf) << 4 | (v >> 4);
Exts { val: r }
}
pub fn rc(&self) -> Exts {
self.reverse().complement()
}
}
impl fmt::Debug for Exts {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut s = String::new();
for b in self.get(Dir::Left) {
s.push(bits_to_base(b));
}
s.push('|');
for b in self.get(Dir::Right) {
s.push(bits_to_base(b));
}
write!(f, "{}", s)
}
}
pub struct KmerIter<'a, K: Kmer, D>
where
D: 'a,
{
bases: &'a D,
kmer: K,
pos: usize,
}
impl<'a, K: Kmer, D: Mer> Iterator for KmerIter<'a, K, D> {
type Item = K;
fn next(&mut self) -> Option<K> {
if self.pos <= self.bases.len() {
let retval = self.kmer;
if self.pos < self.bases.len() {
self.kmer = self.kmer.extend_right(self.bases.get(self.pos));
}
self.pos = self.pos + 1;
Some(retval)
} else {
None
}
}
}
pub struct KmerExtsIter<'a, K: Kmer, D>
where
D: 'a,
{
bases: &'a D,
exts: Exts,
kmer: K,
pos: usize,
}
impl<'a, K: Kmer, D: Mer> Iterator for KmerExtsIter<'a, K, D> {
type Item = (K, Exts);
fn next(&mut self) -> Option<(K, Exts)> {
if self.pos <= self.bases.len() {
let next_base =
if self.pos < self.bases.len() {
self.bases.get(self.pos)
} else {
0u8
};
let cur_left =
if self.pos == K::k() {
self.exts
} else {
Exts::mk_left(self.bases.get(self.pos - K::k() - 1))
};
let cur_right =
if self.pos < self.bases.len() {
Exts::mk_right(next_base)
} else {
self.exts
};
let cur_exts = Exts::merge(cur_left, cur_right);
let retval = self.kmer;
self.kmer = self.kmer.extend_right(next_base);
self.pos = self.pos + 1;
Some((retval, cur_exts))
} else {
None
}
}
}