kswx.h 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779
  1. /*
  2. *
  3. * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __KSW_EXT_RJ_H
  20. #define __KSW_EXT_RJ_H
  21. #include "ksw.h"
  22. #include "dna.h"
  23. #include "chararray.h"
  24. #include "list.h"
  25. #include <math.h>
  26. #include <float.h>
  27. typedef struct {
  28. int score;
  29. int tb, te, qb, qe;
  30. int aln, mat, mis, ins, del;
  31. } kswx_t;
  32. static const kswr_t KSWR_NULL = (kswr_t){0, 0, 0, 0, 0, 0, 0};
  33. static const kswx_t KSWX_NULL = (kswx_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  34. static inline kswr_t kswx2kswr(kswx_t x){
  35. kswr_t r;
  36. r.score = x.score;
  37. r.tb = x.tb;
  38. r.te = x.te;
  39. r.qb = x.qb;
  40. r.qe = x.qe;
  41. r.score2 = 0;
  42. r.te2 = 0;
  43. return r;
  44. }
  45. static inline void kswx_push_cigar(u32list *cigars, uint32_t op, uint32_t len){
  46. if(len == 0) return;
  47. if(cigars->size && (cigars->buffer[cigars->size-1] & 0xF) == op){
  48. cigars->buffer[cigars->size-1] += len << 4;
  49. } else push_u32list(cigars, (len << 4) | op);
  50. }
  51. static inline void kswx_push_cigars(u32list *cigars, uint32_t *cigar, size_t size){
  52. if(size == 0) return;
  53. if(cigars->size && (cigars->buffer[cigars->size-1] & 0x0F) == (cigar[0] & 0x0FU)){
  54. cigars->buffer[cigars->size-1] += cigar[0] & 0xFFFFFFF0U;
  55. append_array_u32list(cigars, cigar + 1, size - 1);
  56. } else append_array_u32list(cigars, cigar, size);
  57. }
  58. static inline void kswx_print_cigars(uint32_t *cigar, size_t size, FILE *out){
  59. size_t i;
  60. uint32_t op, len;
  61. for(i=0;i<size;i++){
  62. op = cigar[i] & 0x0F;
  63. len = cigar[i] >> 4;
  64. fprintf(out, "%d%c", len, "MIDX"[op]);
  65. }
  66. fflush(out);
  67. }
  68. static inline kswx_t kswx_stat_cigars(uint32_t *cigar, size_t size){
  69. kswx_t x;
  70. size_t i;
  71. uint32_t op, len;
  72. x = KSWX_NULL;
  73. for(i=0;i<size;i++){
  74. op = cigar[i] & 0x0F;
  75. len = cigar[i] >> 4;
  76. x.aln += len;
  77. switch(op){
  78. case 0: x.mat += len; x.te += len; x.qe += len; break;
  79. case 1: x.ins += len; x.qe += len; break;
  80. case 2: x.del += len; x.te += len; break;
  81. }
  82. }
  83. return x;
  84. }
  85. static inline void revseq_bytes(uint8_t *ary, int size){
  86. int i, v;
  87. for(i=0;i<size>>1;i++){
  88. v = ary[i]; ary[i] = ary[size - 1 - i]; ary[size - 1 - i] = v;
  89. }
  90. }
  91. static inline void revseq_4bytes(uint32_t *ary, int size){
  92. int i;
  93. uint32_t v;
  94. for(i=0;i<size>>1;i++){
  95. v = ary[i]; ary[i] = ary[size - 1 - i]; ary[size - 1 - i] = v;
  96. }
  97. }
  98. #define kswx_roundup8x(n) (((n) + 0x7LLU) & 0xFFFFFFFFFFFFFFF8LLU)
  99. static inline void kswx_overlap_align_core(kswx_t *xs[2], u32list *cigars[2], int qlen, uint8_t *query, int tlen, uint8_t *target, int strand, int M, int X, int I, int D, int E, u8list *mem_cache){
  100. kswx_t x;
  101. int *rh, *re;
  102. uint8_t *z, *zi, d;
  103. int ql, tl, n_col;
  104. int i, j, k, h1, h, m, e, f, t;
  105. int imax, mj2, gmax, gi;
  106. if(xs[0]) *xs[0] = KSWX_NULL;
  107. if(xs[1]) *xs[1] = KSWX_NULL;
  108. if(qlen <= 0 || tlen <= 0){
  109. if(xs[0]) clear_u32list(cigars[0]);
  110. if(xs[1]) clear_u32list(cigars[1]);
  111. return;
  112. }
  113. ql = qlen; tl = tlen;
  114. n_col = tl;
  115. encap_u8list(mem_cache, kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x(((long long)ql) * n_col));
  116. rh = (int*)(mem_cache->buffer + mem_cache->size);
  117. re = (int*)(mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)));
  118. z = (mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)));
  119. for(j=0;j<=tl;j++) rh[j] = 0;
  120. for(j=0;j<=tl;j++) re[j] = -10000;
  121. gmax = 0; gi = -1;
  122. imax = 0; mj2 = -1;
  123. for(i=0;i<ql;i++){
  124. h1 = 0; f = -10000;
  125. zi = &z[i * n_col];
  126. for(j=0;j<tl;j++){
  127. m = rh[j] + ((query[i * strand] == target[j * strand])? M : X);
  128. rh[j] = h1;
  129. e = re[j];
  130. d = m >= e? 0 : 1;
  131. h = m >= e? m : e;
  132. d = h >= f? d : 2;
  133. h = h >= f? h : f;
  134. h1 = h;
  135. if(i + 1 == ql){
  136. imax = imax > h? imax : h;
  137. mj2 = imax > h? mj2 : j;
  138. }
  139. t = m + I + E;
  140. e = e + E;
  141. d |= e > t? 1<<2 : 0;
  142. e = e > t? e : t;
  143. re[j] = e;
  144. t = m + D + E;
  145. f = f + E;
  146. d |= f > t? 2<<4 : 0;
  147. f = f > t? f : t;
  148. zi[j] = d;
  149. }
  150. rh[j] = h1; re[j] = -10000;
  151. if(gmax < h1){ gmax = h1; gi = i; }
  152. }
  153. for(k=0;k<2;k++){
  154. if(xs[k] == NULL) continue;
  155. x = KSWX_NULL;
  156. if(k){
  157. x.score = gmax; x.qe = gi; x.te = tl - 1;
  158. } else {
  159. x.score = imax; x.qe = ql - 1; x.te = mj2;
  160. }
  161. i = x.qe; j = x.te;
  162. d = 0;
  163. if(cigars[k]) clear_u32list(cigars[k]);
  164. while(i >= 0 && j >= 0){
  165. d = (z[i * n_col + j] >> (d << 1)) & 0x03;
  166. if(d == 0){
  167. if(query[i * strand] == target[j * strand]){ x.mat ++; } else { x.mis ++; } i --; j --;
  168. } else if(d == 1){
  169. i --; x.ins ++;
  170. } else {
  171. j --; x.del ++;
  172. }
  173. if(cigars[k]) kswx_push_cigar(cigars[k], d, 1);
  174. }
  175. switch(d){
  176. case 1: x.qb = i + 1; x.tb = j; break;
  177. case 2: x.tb = i; x.tb = j + 1; break;
  178. default: x.qb = i + 1; x.tb = j + 1; break;
  179. }
  180. if(cigars[k]) reverse_u32list(cigars[k]);
  181. x.aln = x.mat + x.mis + x.ins + x.del;
  182. x.qe ++; x.te ++;
  183. *xs[k] = x;
  184. }
  185. }
  186. // diagonal will shift 1 bp to max score of each row
  187. static inline kswx_t kswx_extend_align_shift_core(int qlen, uint8_t *query, int tlen, uint8_t *target, int strand, int init_score, int W, int M, int X, int I, int D, int E, int T, u8list *mem_cache, u32list *cigars){
  188. kswx_t x;
  189. int *rh, *re, *zb;
  190. uint8_t *z, *zi;
  191. int8_t *qp, *qpp;
  192. register uint8_t d;
  193. int ql, tl, n_col;
  194. int i, j, jb, je, h1, c;
  195. register int h, m, e, f, t;
  196. int max_gap, max, mi, mj, imax, mj2, gmax, gi, gj;
  197. clear_u32list(cigars);
  198. x = KSWX_NULL;
  199. if(init_score < 0) init_score = 0;
  200. if(qlen <= 0 || tlen <= 0){ x.score = init_score; return x; }
  201. if(W > 0){
  202. max = ((qlen < tlen)? qlen : tlen) * M + init_score + (- T);
  203. max_gap = (max + (((I > D)? I : D))) / (- E) + 1;
  204. if(max_gap < 1) max_gap = 1;
  205. if(W > max_gap) W = max_gap;
  206. } else W = -W;
  207. W = num_min(W, num_max(qlen, tlen));
  208. if(qlen < tlen){
  209. if(qlen + W < tlen){ ql = qlen; tl = qlen + W; }
  210. else { ql = qlen; tl = tlen; }
  211. } else {
  212. if(tlen + W < qlen){ tl = tlen; ql = tlen + W; }
  213. else { tl = tlen; ql = qlen; }
  214. }
  215. n_col = tl < 2 * W + 1? tl : 2 * W + 1;
  216. encap_u8list(mem_cache, kswx_roundup8x(mem_cache->size) - mem_cache->size
  217. + kswx_roundup8x((tl + 2) * sizeof(int)) * 2
  218. + kswx_roundup8x((ql + 2) * sizeof(int))
  219. + kswx_roundup8x(((long long)ql) * n_col)
  220. + kswx_roundup8x(tl + 2) * sizeof(int8_t) * 4); // assume only A,C,G,T
  221. rh = (int*)(mem_cache->buffer + kswx_roundup8x(mem_cache->size));
  222. re = (int*)(mem_cache->buffer + kswx_roundup8x(mem_cache->size) + kswx_roundup8x((tl + 2) * sizeof(int)));
  223. zb = (int*)(mem_cache->buffer + kswx_roundup8x(mem_cache->size) + kswx_roundup8x((tl + 2) * sizeof(int)) * 2);
  224. z = (mem_cache->buffer + kswx_roundup8x(mem_cache->size) + kswx_roundup8x((tl + 2) * sizeof(int)) * 2 + kswx_roundup8x((ql + 2) * sizeof(int)));
  225. qp = (int8_t*)(mem_cache->buffer + kswx_roundup8x(mem_cache->size) + kswx_roundup8x((tl + 2) * sizeof(int)) * 2 + kswx_roundup8x((ql + 2) * sizeof(int)) + kswx_roundup8x(((long long)ql) * n_col));
  226. for(c=i=0;c<4;c++){
  227. for(j=0;j<tl;j++) qp[i++] = (c == target[j * strand])? M : X;
  228. }
  229. rh[0] = init_score; rh[1] = init_score + D + E;
  230. for(j=2;j<=tl;j++) rh[j] = rh[j - 1] + E;
  231. for(j=0;j<=tl;j++) re[j] = -10000;
  232. max = init_score; mi = -1; mj = -1; gmax = 0; gi = -1; gj = -1;
  233. jb = 0; je = tl;
  234. for(i=c=0;i<ql;i++){
  235. if(jb < c - W) jb = c - W;
  236. if(je > c + W + 1) je = c + W + 1;
  237. if(je > tl) je = tl;
  238. if(jb == 0) h1 = init_score + I + E * (i + 1);
  239. else h1 = -10000;
  240. zi = z + (i * n_col);
  241. zb[i] = jb;
  242. imax = 0; mj2 = -1; f = -10000;
  243. qpp = qp + query[i * strand] * tl;
  244. for(j=jb;j<je;j++){
  245. //m = rh[j] + ((query[i * strand] == target[j * strand])? M : X);
  246. m = rh[j] + qpp[j];
  247. rh[j] = h1;
  248. e = re[j];
  249. if(m >= e){ d = 0; h = m; }
  250. else { d = 1; h = e; }
  251. if(h < f){ d = 2; h = f; }
  252. h1 = h;
  253. if(h > imax){ imax = h; mj2 = j; }
  254. t = m + I + E;
  255. e = e + E;
  256. if(e > t){ d |= 1 << 2; }
  257. else { e = t; }
  258. re[j] = e;
  259. t = m + D + E;
  260. f = f + E;
  261. if(f > t){ d |= 2 << 4; }
  262. else { f = t; }
  263. zi[j-jb] = d;
  264. }
  265. rh[j] = h1; re[j] = -10000;
  266. if(j == tlen && gmax < h1){ gmax = h1; gi = i; gj = j - 1; }
  267. if(i + 1 == qlen && gmax < imax){ gmax = imax; gi = i; gj = mj2; }
  268. if(imax > max){
  269. max = imax; mi = i; mj = mj2;
  270. } else if(imax <= 0) break;
  271. c ++;
  272. if(c < mj2){
  273. c ++;
  274. if(je < tl){
  275. rh[je + 1] = -10000;
  276. re[je + 1] = -10000;
  277. }
  278. } else if(c > mj2) {
  279. c --;
  280. if(jb){
  281. rh[jb-1] = -10000;
  282. re[jb-1] = -10000;
  283. }
  284. }
  285. jb = 0; je = tl;
  286. }
  287. if(gmax > 0 && gmax >= max + T){
  288. x.score = gmax; x.qe = gi; x.te = gj;
  289. } else {
  290. x.score = max; x.qe = mi; x.te = mj;
  291. }
  292. i = x.qe; j = x.te;
  293. d = 0;
  294. while(i >= 0 && j >= 0){
  295. d = (z[i * n_col + (j - zb[i])] >> (d << 1)) & 0x03;
  296. if(d == 0){
  297. if(query[i * strand] == target[j * strand]){ x.mat ++; } else { x.mis ++; } i --; j --;
  298. } else if(d == 1){
  299. i --; x.ins ++;
  300. } else {
  301. j --; x.del ++;
  302. }
  303. if(cigars) kswx_push_cigar(cigars, d, 1);
  304. }
  305. if(i >= 0){
  306. x.ins += i + 1;
  307. if(cigars) kswx_push_cigar(cigars, 1, i + 1);
  308. }
  309. if(j >= 0){
  310. x.del += j + 1;
  311. if(cigars) kswx_push_cigar(cigars, 2, j + 1);
  312. }
  313. if(cigars) reverse_u32list(cigars);
  314. x.aln = x.mat + x.mis + x.ins + x.del;
  315. x.qe ++; x.te ++;
  316. return x;
  317. }
  318. static inline kswx_t kswx_extend_align_core(int qlen, uint8_t *query, int tlen, uint8_t *target, int strand, int init_score, int W, int M, int X, int I, int D, int E, int T, u8list *mem_cache, u32list *cigars){
  319. kswx_t x;
  320. int *rh, *re;
  321. uint8_t *z, *zi, d;
  322. int ql, tl, n_col;
  323. int i, j, jb, je, h1, h, m, e, f, t;
  324. int max_gap, max, mi, mj, imax, mj2, gmax, gi, gj;
  325. x = KSWX_NULL;
  326. if(init_score < 0) init_score = 0;
  327. if(qlen <= 0 || tlen <= 0){ x.score = init_score; return x; }
  328. if(W > 0){
  329. max = ((qlen < tlen)? qlen : tlen) * M + init_score + (- T);
  330. max_gap = (max + (((I > D)? I : D))) / (- E) + 1;
  331. if(max_gap < 1) max_gap = 1;
  332. if(W > max_gap) W = max_gap;
  333. } else W = -W;
  334. W = num_min(W, num_max(qlen, tlen));
  335. if(qlen < tlen){
  336. if(qlen + W < tlen){ ql = qlen; tl = qlen + W; }
  337. else { ql = qlen; tl = tlen; }
  338. } else {
  339. if(tlen + W < qlen){ tl = tlen; ql = tlen + W; }
  340. else { tl = tlen; ql = qlen; }
  341. }
  342. n_col = tl < 2 * W + 1? tl : 2 * W + 1;
  343. encap_u8list(mem_cache, kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x(((long long)ql) * n_col));
  344. rh = (int*)(mem_cache->buffer + mem_cache->size);
  345. re = (int*)(mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)));
  346. z = (mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)));
  347. rh[0] = init_score; rh[1] = init_score + D + E;
  348. for(j=2;j<=tl;j++) rh[j] = rh[j - 1] + E;
  349. for(j=0;j<=tl;j++) re[j] = -10000;
  350. max = init_score; mi = -1; mj = -1; gmax = 0; gi = -1; gj = -1;
  351. jb = 0; je = tl;
  352. for(i=0;i<ql;i++){
  353. jb = i - W; if(jb < 0) jb = 0;
  354. je = i + W + 1; if(je > tl) je = tl;
  355. if(jb == 0) h1 = init_score + I + E * (i + 1);
  356. else h1 = -10000;
  357. zi = &z[i * n_col];
  358. imax = 0; mj2 = -1; f = -10000;
  359. for(j=jb;j<je;j++){
  360. m = rh[j] + ((query[i * strand] == target[j * strand])? M : X);
  361. rh[j] = h1;
  362. e = re[j];
  363. d = m >= e? 0 : 1;
  364. h = m >= e? m : e;
  365. d = h >= f? d : 2;
  366. h = h >= f? h : f;
  367. h1 = h;
  368. imax = imax > h? imax : h;
  369. mj2 = imax > h? mj2 : j;
  370. t = m + I + E;
  371. e = e + E;
  372. d |= e > t? 1<<2 : 0;
  373. e = e > t? e : t;
  374. re[j] = e;
  375. t = m + D + E;
  376. f = f + E;
  377. d |= f > t? 2<<4 : 0;
  378. f = f > t? f : t;
  379. zi[j-jb] = d;
  380. }
  381. rh[j] = h1; re[j] = -10000;
  382. if(j == tlen && gmax < h1){ gmax = h1; gi = i; gj = j - 1; }
  383. if(i + 1 == qlen && gmax < imax){ gmax = imax; gi = i; gj = mj2; }
  384. if(imax > max){
  385. max = imax; mi = i; mj = mj2;
  386. } else if(imax <= 0) break;
  387. }
  388. if(gmax > 0 && gmax >= max + T){
  389. x.score = gmax; x.qe = gi; x.te = gj;
  390. } else {
  391. x.score = max; x.qe = mi; x.te = mj;
  392. }
  393. i = x.qe; j = x.te;
  394. d = 0;
  395. clear_u32list(cigars);
  396. while(i >= 0 && j >= 0){
  397. d = (z[i * n_col + j - (i > W? i - W : 0)] >> (d << 1)) & 0x03;
  398. if(d == 0){
  399. if(query[i * strand] == target[j * strand]){ x.mat ++; } else { x.mis ++; } i --; j --;
  400. } else if(d == 1){
  401. i --; x.ins ++;
  402. } else {
  403. j --; x.del ++;
  404. }
  405. if(cigars) kswx_push_cigar(cigars, d, 1);
  406. }
  407. if(i >= 0){
  408. x.ins += i + 1;
  409. if(cigars) kswx_push_cigar(cigars, 1, i + 1);
  410. }
  411. if(j >= 0){
  412. x.del += j + 1;
  413. if(cigars) kswx_push_cigar(cigars, 2, j + 1);
  414. }
  415. if(cigars) reverse_u32list(cigars);
  416. x.aln = x.mat + x.mis + x.ins + x.del;
  417. x.qe ++; x.te ++;
  418. return x;
  419. }
  420. static inline kswx_t kswx_mismatch_free_extend_align_core(int qlen, uint8_t *query, int tlen, uint8_t *target, int strand, int init_score, int W, int M, int I, int D, int E, int T, u8list *mem_cache, u32list *cigars){
  421. kswx_t x;
  422. int *rh, *re;
  423. uint8_t *z, *zi, d;
  424. int ql, tl, n_col;
  425. int i, j, jb, je, h1, h, m, e, f;
  426. int max_gap, max, mi, mj, imax, mj2, gmax, gi, gj;
  427. x = KSWX_NULL;
  428. if(init_score < 0) init_score = 0;
  429. if(qlen <= 0 || tlen <= 0){ x.score = init_score; return x; }
  430. if(W > 0){
  431. max = ((qlen < tlen)? qlen : tlen) * M + init_score + (- T);
  432. max_gap = (max + (((I > D)? I : D))) / (- E) + 1;
  433. if(max_gap < 1) max_gap = 1;
  434. if(W > max_gap) W = max_gap;
  435. } else W = -W;
  436. W = num_min(W, num_max(qlen, tlen));
  437. if(qlen < tlen){
  438. if(qlen + W < tlen){ ql = qlen; tl = qlen + W; }
  439. else { ql = qlen; tl = tlen; }
  440. } else {
  441. if(tlen + W < qlen){ tl = tlen; ql = tlen + W; }
  442. else { tl = tlen; ql = qlen; }
  443. }
  444. n_col = tl < 2 * W + 1? tl : 2 * W + 1;
  445. encap_u8list(mem_cache, kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x(((long long)ql) * n_col));
  446. rh = (int*)(mem_cache->buffer + mem_cache->size);
  447. re = (int*)(mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)));
  448. z = (mem_cache->buffer + mem_cache->size + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)));
  449. rh[0] = init_score; rh[1] = init_score + D + E;
  450. for(j=2;j<=tl;j++) rh[j] = rh[j - 1] + E;
  451. for(j=0;j<=tl;j++) re[j] = -10000;
  452. max = init_score; mi = -1; mj = -1; gmax = 0; gi = -1; gj = -1;
  453. jb = 0; je = tl;
  454. for(i=0;i<ql;i++){
  455. jb = i - W; if(jb < 0) jb = 0;
  456. je = i + W + 1; if(je > tl) je = tl;
  457. if(jb == 0) h1 = init_score + I + E * (i + 1);
  458. else h1 = -10000;
  459. zi = &z[i * n_col];
  460. imax = 0; mj2 = -1; f = h1 + D + E;
  461. for(j=jb;j<je;j++){
  462. e = re[j];
  463. if(query[i * strand] == target[j * strand]){
  464. m = rh[j] + M;
  465. rh[j] = h1;
  466. if(m >= e){
  467. if(m >= f){
  468. d = 0;
  469. h = m;
  470. f = m + D + E;
  471. e = m + I + E;
  472. } else {
  473. d = 2;
  474. h = f;
  475. f = f + E;
  476. e = f + I + E;
  477. }
  478. } else {
  479. if(e >= f){
  480. d = 1;
  481. h = e;
  482. f = e + D + E;
  483. e = e + E;
  484. } else {
  485. d = 2;
  486. h = f;
  487. f = f + E;
  488. e = f + I + E;
  489. }
  490. }
  491. } else {
  492. rh[j] = h1;
  493. {
  494. if(e >= f){
  495. d = 1;
  496. h = e;
  497. f = e + D + E;
  498. e = e + E;
  499. } else {
  500. d = 2;
  501. h = f;
  502. f = f + E;
  503. e = f + I + E;
  504. }
  505. }
  506. }
  507. h1 = h;
  508. if(imax < h){ imax = h; mj2 = j; }
  509. re[j] = e;
  510. zi[j-jb] = d;
  511. }
  512. rh[j] = h1; re[j] = -10000;
  513. if(j == tlen && gmax < h1){ gmax = h1; gi = i; gj = j - 1; }
  514. if(i + 1 == qlen && gmax < imax){ gmax = imax; gi = i; gj = mj2; }
  515. if(imax > max){
  516. max = imax; mi = i; mj = mj2;
  517. } else if(imax <= 0) break;
  518. }
  519. if(gmax > 0 && gmax >= max + T){
  520. x.score = gmax; x.qe = gi; x.te = gj;
  521. } else {
  522. x.score = max; x.qe = mi; x.te = mj;
  523. }
  524. i = x.qe; j = x.te;
  525. d = 0;
  526. clear_u32list(cigars);
  527. while(i >= 0 && j >= 0){
  528. d = (z[i * n_col + j - (i > W? i - W : 0)]) & 0x03;
  529. if(d == 0){
  530. x.mat ++; i --; j --;
  531. } else if(d == 1){
  532. i --; x.ins ++;
  533. } else {
  534. j --; x.del ++;
  535. }
  536. if(cigars) kswx_push_cigar(cigars, d, 1);
  537. }
  538. if(i >= 0){
  539. x.ins += i + 1;
  540. if(cigars) kswx_push_cigar(cigars, 1, i + 1);
  541. }
  542. if(j >= 0){
  543. x.del += j + 1;
  544. if(cigars) kswx_push_cigar(cigars, 2, j + 1);
  545. }
  546. if(cigars) reverse_u32list(cigars);
  547. x.aln = x.mat + x.mis + x.ins + x.del;
  548. x.qe ++; x.te ++;
  549. return x;
  550. }
  551. static inline kswx_t kswx_extend_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int strand, int init_score, int W, int M, int X, int I, int D, int E, int T, int *n_cigar, uint32_t **cigar){
  552. u8list *caches;
  553. u32list *cigars;
  554. kswx_t x;
  555. caches = init_u8list(256);
  556. cigars = init_u32list(64);
  557. x = kswx_extend_align_shift_core(qlen, query, tlen, target, strand, init_score, W, M, X, I, D, E, T, caches, cigars);
  558. free_u8list(caches);
  559. *n_cigar = cigars->size;
  560. *cigar = cigars->buffer;
  561. free(cigars);
  562. return x;
  563. }
  564. static inline kswx_t kswx_flexible_banded_affine_alignment(uint8_t *query, int ql, uint8_t *target, int tl, int M, int X, int I, int D, int E, int T, int is_local, int *bands[2], u8list *mem, u32list *cigars){
  565. kswx_t x;
  566. int *rh, *re;
  567. uint8_t *z, *zi;
  568. long long tot_z;
  569. int8_t *qp, *qpp;
  570. register uint8_t d;
  571. register int h, h1, m, e, f, t, imax, mj2;
  572. int i, j, c, max, mi, mj, gmax, gi, gj;
  573. long long ii;
  574. clear_u32list(cigars);
  575. tot_z = 0;
  576. for(i=0;i<ql;i++){ tot_z += bands[1][i] - bands[0][i]; }
  577. clear_and_encap_u8list(mem, kswx_roundup8x((tl + 2) * sizeof(int)) * 2 // rh, re
  578. + kswx_roundup8x(tot_z) // z
  579. + kswx_roundup8x(tl + 2) * sizeof(int8_t) * 4); // qp
  580. rh = (int*)(mem->buffer);
  581. re = (int*)(((void*)rh) + kswx_roundup8x((tl + 2) * sizeof(int)));
  582. z = (uint8_t*)(((void*)re) + kswx_roundup8x((tl + 2) * sizeof(int)));
  583. qp = (int8_t*)(((void*)z) + kswx_roundup8x(tot_z));
  584. for(c=ii=0;c<4;c++){
  585. for(j=0;j<tl;j++) qp[ii++] = (c == target[j])? M : X;
  586. }
  587. rh[0] = 0; rh[1] = D + E;
  588. for(i=2;i<=tl;i++) rh[i] = rh[i-1] + E;
  589. for(i=0;i<=tl;i++) re[i] = -10000;
  590. max = 0; mi = 0; mj = 0;
  591. gmax = 0; gi = 0; gj = 0;
  592. zi = z;
  593. for(i=0;i<ql;i++){
  594. qpp = qp + query[i] * tl;
  595. h1 = (bands[0][i] == 0)? I + E * (i + 1) : -10000;
  596. f = -10000;
  597. imax = -10000; mj2 = -1;
  598. for(j=bands[0][i];j<bands[1][i];j++){
  599. m = rh[j] + qpp[j];
  600. rh[j] = h1;
  601. e = re[j];
  602. if(m >= e){ d = 0; h = m; }
  603. else { d = 1; h = e; }
  604. if(h < f){ d = 2; h = f; }
  605. if(is_local && h < 0){ d = 3; h = 0; }
  606. h1 = h;
  607. if(imax < h){ imax = h; mj2 = j; }
  608. t = m + I + E;
  609. e = e + E;
  610. if(e > t){ d |= 1 << 2; }
  611. else { e = t; }
  612. re[j] = e;
  613. t = m + D + E;
  614. f = f + E;
  615. if(f > t){ d |= 2 << 4; }
  616. else { f = t; }
  617. *zi = d;
  618. zi ++;
  619. }
  620. rh[j] = h1; re[j] = -10000;
  621. if(j == tl && gmax < h1){ gmax = h1; gi = i; gj = j - 1; }
  622. if(i + 1 == ql && gmax < imax){ gmax = imax; gi = i; gj = mj2; }
  623. if(imax > max){ max = imax; mi = i; mj = mj2; }
  624. }
  625. if(gmax >= max + T){
  626. x.score = gmax; x.qe = gi; x.te = gj;
  627. } else {
  628. x.score = max; x.qe = mi; x.te = mj;
  629. }
  630. i = x.qe; j = x.te;
  631. d = 0;
  632. for(i=ql-1;i>=x.qe;i--){ zi -= bands[1][i] - bands[0][i]; }
  633. clear_u32list(cigars);
  634. while(i >= 0 && j >= 0){
  635. d = (zi[j - bands[0][i]] >> (d << 1)) & 0x03;
  636. if(d == 3) break;
  637. if(d == 0){
  638. x.mat ++; i --; zi -= bands[1][i] - bands[0][i]; j --;
  639. } else if(d == 1){
  640. i --; zi -= bands[1][i] - bands[0][i]; x.ins ++;
  641. } else {
  642. j --; x.del ++;
  643. }
  644. if(cigars) kswx_push_cigar(cigars, d, 1);
  645. }
  646. if(d != 3){
  647. if(i >= 0){
  648. x.ins += i + 1;
  649. if(cigars) kswx_push_cigar(cigars, 1, i + 1);
  650. }
  651. if(j >= 0){
  652. x.del += j + 1;
  653. if(cigars) kswx_push_cigar(cigars, 2, j + 1);
  654. }
  655. }
  656. x.qb = i; x.tb = j;
  657. if(cigars) reverse_u32list(cigars);
  658. x.aln = x.mat + x.mis + x.ins + x.del;
  659. x.qe ++; x.te ++;
  660. return x;
  661. }
  662. static inline kswx_t kswx_refine_alignment(uint8_t *query, int qb, uint8_t *target, int tb, int W, int M, int X, int I, int D, int E, u32list *cigars, u8list *mem_cache, u32list *cigars2){
  663. kswx_t y;
  664. int *rh, *re;
  665. int *zb, *ze, *zw;
  666. int ql, tl, _zb, _ze, tx, qx;
  667. int qe, te;
  668. uint8_t *z, *zi;
  669. int8_t *qp, *qpp;
  670. register uint8_t d;
  671. register int h, m, e, f, t;
  672. long long i, j;
  673. int op, len, h1, c;
  674. clear_u32list(cigars2);
  675. qe = qb; te = tb;
  676. for(i=0;i<(int)cigars->size;i++){
  677. op = cigars->buffer[i] & 0xFU;
  678. len = cigars->buffer[i] >> 4;
  679. switch(op){
  680. case 0: qe += len; te += len; break;
  681. case 1: qe += len; break;
  682. default: te += len;
  683. }
  684. }
  685. ql = qe - qb;
  686. tl = te - tb;
  687. if(ql == 0 || tl == 0) return KSWX_NULL;
  688. clear_and_encap_u8list(mem_cache,
  689. kswx_roundup8x((tl + 2) * sizeof(int)) * 2 // rh, re
  690. + kswx_roundup8x((ql + 2) * sizeof(int)) * 3 // zb, ze, zw
  691. + kswx_roundup8x(((long long)ql) * tl) // z
  692. + kswx_roundup8x(tl + 2) * sizeof(int8_t) * 4); // qp
  693. rh = (int*)(mem_cache->buffer);
  694. re = (int*)(((void*)rh) + kswx_roundup8x((tl + 2) * sizeof(int)));
  695. zb = (int*)(((void*)re) + kswx_roundup8x((tl + 2) * sizeof(int)));
  696. ze = (int*)(((void*)zb) + kswx_roundup8x((ql + 2) * sizeof(int)));
  697. zw = (int*)(((void*)ze) + kswx_roundup8x((ql + 2) * sizeof(int)));
  698. z = (uint8_t*)(((void*)zw) + kswx_roundup8x((ql + 2) * sizeof(int)));
  699. qp = (int8_t*)(((void*)z) + kswx_roundup8x(((long long)ql) * tl));
  700. for(c=i=0;c<4;c++){
  701. for(j=0;j<tl;j++) qp[i++] = (c == target[j + tb])? M : X;
  702. }
  703. // calculate alignment band
  704. // basic band
  705. tx = qx = 0;
  706. for(i=0;i<(int)cigars->size;i++){
  707. op = cigars->buffer[i] & 0xFU;
  708. len = cigars->buffer[i] >> 4;
  709. switch(op){
  710. case 0:
  711. for(j=0;j<len;j++) zw[qx++] = W;
  712. break;
  713. case 1:
  714. for(j=0;j<len;j++) zw[qx++] = W + len;
  715. break;
  716. default:
  717. zw[qx] += len;
  718. }
  719. }
  720. // extending bandwidth around indels
  721. tx = qx = 0;
  722. for(i=0;i<(int)cigars->size;i++){
  723. op = cigars->buffer[i] & 0xFU;
  724. len = cigars->buffer[i] >> 4;
  725. switch(op){
  726. case 0:
  727. qx += len;
  728. break;
  729. case 1:
  730. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  731. qx += len - 1;
  732. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  733. qx ++;
  734. break;
  735. default:
  736. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  737. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  738. }
  739. }
  740. // generate band region
  741. tx = qx = 0;
  742. for(i=0;i<(int)cigars->size;i++){
  743. op = cigars->buffer[i] & 0xFU;
  744. len = cigars->buffer[i] >> 4;
  745. switch(op){
  746. case 0:
  747. for(j=0;j<len;j++){
  748. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  749. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  750. zb[qx] = _zb;
  751. ze[qx] = _ze;
  752. tx ++;
  753. qx ++;
  754. }
  755. break;
  756. case 1:
  757. for(j=0;j<len;j++){
  758. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  759. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  760. zb[qx] = _zb;
  761. ze[qx] = _ze;
  762. qx ++;
  763. }
  764. break;
  765. default:
  766. tx += len;
  767. }
  768. }
  769. // trim band beg
  770. _zb = 0;
  771. for(i=0;i<ql;i++){
  772. if(zb[i] < _zb) zb[i] = _zb;
  773. else if(zb[i] > _zb) _zb = zb[i];
  774. }
  775. // trim band end
  776. _ze = tl;
  777. for(i=ql-1;i>=0;i--){
  778. if(ze[i] > _ze) ze[i] = _ze;
  779. else if(ze[i] < _ze) _ze = ze[i];
  780. }
  781. // perform alignment
  782. rh[0] = 0;
  783. for(i=1;i<=tl;i++) rh[i] = -10000;
  784. for(i=0;i<=tl;i++) re[i] = -10000;
  785. for(i=0;i<ql;i++){
  786. qpp = qp + query[i + qb] * tl;
  787. h1 = f = -10000;
  788. zi = z + (((long long)i) * tl);
  789. //fprintf(stdout, "ZW\t%d\t%d\t%d\t%d\n", (int)i, zb[i], ze[i], ze[i] - zb[i]);
  790. for(j=zb[i];j<ze[i];j++){
  791. m = rh[j] + qpp[j];
  792. rh[j] = h1;
  793. e = re[j];
  794. if(m >= e){ d = 0; h = m; }
  795. else { d = 1; h = e; }
  796. if(h < f){ d = 2; h = f; }
  797. h1 = h;
  798. t = m + I + E;
  799. e = e + E;
  800. if(e > t){ d |= 1 << 2; }
  801. else { e = t; }
  802. re[j] = e;
  803. t = m + D + E;
  804. f = f + E;
  805. if(f > t){ d |= 2 << 4; }
  806. else { f = t; }
  807. zi[j-zb[i]] = d;
  808. }
  809. rh[j] = h1; re[j] = -10000;
  810. }
  811. y.qb = qb; y.qe = qe; y.tb = tb; y.te = te;
  812. y.score = rh[tl];
  813. y.mat = y.mis = y.ins = y.del = y.aln = 0;
  814. d = 0;
  815. i = ql - 1; j = tl - 1;
  816. while(i >= 0 && j >= 0){
  817. d = (z[i * tl + (j - zb[i])] >> (d << 1)) & 0x03;
  818. if(d == 0){
  819. if(query[i + y.qb] == target[j + y.tb]){ y.mat ++; } else { y.mis ++; } i --; j --;
  820. } else if(d == 1){
  821. i --; y.ins ++;
  822. } else {
  823. j --; y.del ++;
  824. }
  825. kswx_push_cigar(cigars2, d, 1);
  826. }
  827. if(i >= 0){
  828. y.ins += i + 1;
  829. kswx_push_cigar(cigars2, 1, i + 1);
  830. }
  831. if(j >= 0){
  832. y.del += j + 1;
  833. kswx_push_cigar(cigars2, 2, j + 1);
  834. }
  835. reverse_u32list(cigars2);
  836. y.aln = y.mat + y.mis + y.ins + y.del;
  837. return y;
  838. }
  839. // qvs: 0:errQV, 1:misQV, 2:insQV, 3:delQV, 4:mrgQV, 5, misTag, 6, delTag
  840. static inline kswx_t kswx_refine_alignment_5q(uint8_t *query, int qb, uint8_t *target, int tb, int W, uint8_t QCLP, uint8_t QMIS, uint8_t QDEL, uint8_t *qvs[7], u32list *cigars, u8list *mem_cache, u32list *cigars2){
  841. kswx_t y;
  842. uint32_t *rh, SCORE_INF;
  843. int *zb, *ze, *zw;
  844. int ql, tl, rl, _zb, _ze, tx, qx;
  845. int qe, te;
  846. uint8_t *z, *zi;
  847. uint8_t d, qmat[4], qins, qdel[4];
  848. register uint32_t h1, h, m, e, f;
  849. long long i, j;
  850. int op, len;
  851. SCORE_INF = 0xFFFFFFFFU >> 1;
  852. clear_u32list(cigars2);
  853. qe = qb; te = tb;
  854. for(i=0;i<(int)cigars->size;i++){
  855. op = cigars->buffer[i] & 0xFU;
  856. len = cigars->buffer[i] >> 4;
  857. switch(op){
  858. case 0: qe += len; te += len; break;
  859. case 1: qe += len; break;
  860. default: te += len;
  861. }
  862. }
  863. ql = qe - qb;
  864. tl = te - tb;
  865. if(ql == 0 || tl == 0) return KSWX_NULL;
  866. rl = tl;
  867. clear_and_encap_u8list(mem_cache,
  868. kswx_roundup8x((tl + 2) * sizeof(uint32_t)) // rh
  869. + kswx_roundup8x((ql + 2) * sizeof(int)) * 3 // zb, ze, zw
  870. + kswx_roundup8x(((long long)ql) * rl)); // z
  871. rh = (uint32_t*)(mem_cache->buffer);
  872. zb = (int*)(((void*)rh) + kswx_roundup8x((tl + 2) * sizeof(uint32_t)));
  873. ze = (int*)(((void*)zb) + kswx_roundup8x((ql + 2) * sizeof(int)));
  874. zw = (int*)(((void*)ze) + kswx_roundup8x((ql + 2) * sizeof(int)));
  875. z = (uint8_t*)(((void*)zw) + kswx_roundup8x((ql + 2) * sizeof(int)));
  876. // calculate alignment band
  877. // basic band
  878. tx = qx = 0;
  879. for(i=0;i<(int)cigars->size;i++){
  880. op = cigars->buffer[i] & 0xFU;
  881. len = cigars->buffer[i] >> 4;
  882. switch(op){
  883. case 0:
  884. for(j=0;j<len;j++) zw[qx++] = W;
  885. break;
  886. case 1:
  887. for(j=0;j<len;j++) zw[qx++] = W + len;
  888. break;
  889. default:
  890. zw[qx] += len;
  891. }
  892. }
  893. // extending bandwidth around indels
  894. tx = qx = 0;
  895. for(i=0;i<(int)cigars->size;i++){
  896. op = cigars->buffer[i] & 0xFU;
  897. len = cigars->buffer[i] >> 4;
  898. switch(op){
  899. case 0:
  900. qx += len;
  901. break;
  902. case 1:
  903. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  904. qx += len - 1;
  905. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  906. qx ++;
  907. break;
  908. default:
  909. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  910. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  911. }
  912. }
  913. // generate band region
  914. tx = qx = 0;
  915. for(i=0;i<(int)cigars->size;i++){
  916. op = cigars->buffer[i] & 0xFU;
  917. len = cigars->buffer[i] >> 4;
  918. switch(op){
  919. case 0:
  920. for(j=0;j<len;j++){
  921. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  922. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  923. zb[qx] = _zb;
  924. ze[qx] = _ze;
  925. tx ++;
  926. qx ++;
  927. }
  928. break;
  929. case 1:
  930. for(j=0;j<len;j++){
  931. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  932. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  933. zb[qx] = _zb;
  934. ze[qx] = _ze;
  935. qx ++;
  936. }
  937. break;
  938. default:
  939. tx += len;
  940. }
  941. }
  942. // trim band beg
  943. _zb = 0;
  944. for(i=0;i<ql;i++){
  945. if(zb[i] < _zb) zb[i] = _zb;
  946. else if(zb[i] > _zb) _zb = zb[i];
  947. }
  948. // trim band end
  949. _ze = tl;
  950. for(i=ql-1;i>=0;i--){
  951. if(ze[i] > _ze) ze[i] = _ze;
  952. else if(ze[i] < _ze) _ze = ze[i];
  953. }
  954. // perform alignment
  955. for(i=0;i<=tl;i++) rh[i] = SCORE_INF;
  956. for(i=0;i<ql;i++){
  957. if(i == 0){
  958. h1 = i * QCLP;
  959. f = (i + 1) * QCLP;
  960. } else {
  961. h1 = f = SCORE_INF;
  962. }
  963. zi = z + (i * rl);
  964. for(j=0;j<4;j++){
  965. if(j == query[i + qb]) qmat[j] = 0;
  966. else if(j == qvs[5][i + qb]) qmat[j] = qvs[1][i + qb];
  967. else qmat[j] = QMIS;
  968. }
  969. qins = qvs[2][i + qb];
  970. if(i + 1 == ql) qdel[0] = qdel[1] = qdel[2] = qdel[3] = QCLP;
  971. else {
  972. for(j=0;j<4;j++){
  973. if(j == qvs[6][i + 1 + qb]){
  974. qdel[j] = qvs[3][i + 1 + qb];
  975. } else qdel[j] = QDEL;
  976. }
  977. }
  978. for(j=zb[i];j<ze[i];j++){
  979. m = h1 + qmat[target[j + tb]];
  980. f = f + qdel[target[j + tb]];
  981. if(m <= f){ d = 0; h = m; }
  982. else { d = 2; h = f; }
  983. e = rh[j] + qins;
  984. if(e < h){ d = 1; h = e; }
  985. h1 = rh[j];
  986. rh[j] = f = h;
  987. zi[j-zb[i]] = d;
  988. }
  989. }
  990. y.qb = qb; y.qe = qe; y.tb = tb; y.te = te;
  991. y.score = - (int)rh[tl-1];
  992. y.mat = y.mis = y.ins = y.del = y.aln = 0;
  993. d = 0;
  994. i = ql - 1; j = tl - 1;
  995. //String *qs = init_string(1024);
  996. //String *ts = init_string(1024);
  997. //String *ms = init_string(1024);
  998. while(i >= 0 && j >= 0){
  999. d = z[i * rl + (j - zb[i])] & 0x03;
  1000. if(d == 0){
  1001. //add_char_string(qs, bit_base_table[query[i + y.qb]]);
  1002. //add_char_string(ts, bit_base_table[target[j + y.tb]]);
  1003. if(query[i + y.qb] == target[j + y.tb]){
  1004. //add_char_string(ms, '|');
  1005. y.mat ++;
  1006. } else {
  1007. //add_char_string(ms, '*');
  1008. y.mis ++;
  1009. }
  1010. i --; j --;
  1011. } else if(d == 1){
  1012. //add_char_string(qs, bit_base_table[query[i + y.qb]]);
  1013. //add_char_string(ts, '-');
  1014. //add_char_string(ms, '-');
  1015. i --; y.ins ++;
  1016. } else {
  1017. //add_char_string(qs, '-');
  1018. //add_char_string(ts, bit_base_table[target[j + y.tb]]);
  1019. //add_char_string(ms, '-');
  1020. j --; y.del ++;
  1021. }
  1022. kswx_push_cigar(cigars2, d, 1);
  1023. }
  1024. //reverse_string(qs);
  1025. //reverse_string(ts);
  1026. //reverse_string(ms);
  1027. //printf("%s\n%s\n%s\n", qs->string, ms->string, ts->string);
  1028. //free_string(qs);
  1029. //free_string(ts);
  1030. //free_string(ms);
  1031. if(1){
  1032. if(i >= 0){
  1033. y.ins += i + 1;
  1034. kswx_push_cigar(cigars2, 1, i + 1);
  1035. }
  1036. if(j >= 0){
  1037. y.del += j + 1;
  1038. kswx_push_cigar(cigars2, 2, j + 1);
  1039. }
  1040. } else {
  1041. y.qb += i + 1;
  1042. y.tb += j + 1;
  1043. }
  1044. reverse_u32list(cigars2);
  1045. y.aln = y.mat + y.mis + y.ins + y.del;
  1046. return y;
  1047. }
  1048. static inline kswx_t kswx_refine_affine_alignment_5q(uint8_t *query, int qb, uint8_t *target, int tb, int W, uint8_t QCLP, uint8_t QMIS, uint8_t QDEL, uint8_t QEXT, uint8_t *qvs[7], u32list *cigars, u8list *mem_cache, u32list *cigars2){
  1049. kswx_t y;
  1050. uint32_t *rh, *re, SCORE_INF;
  1051. int *zb, *ze, *zw;
  1052. int ql, tl, rl, _zb, _ze, tx, qx;
  1053. int qe, te;
  1054. uint8_t *z, *zi;
  1055. uint8_t d, qmat[4], qins, qdel[4];
  1056. register uint32_t h1, h, m, e, f, t;
  1057. long long i, j;
  1058. int op, len;
  1059. SCORE_INF = 0xFFFFFFFFU >> 1;
  1060. clear_u32list(cigars2);
  1061. qe = qb; te = tb;
  1062. for(i=0;i<(int)cigars->size;i++){
  1063. op = cigars->buffer[i] & 0xFU;
  1064. len = cigars->buffer[i] >> 4;
  1065. switch(op){
  1066. case 0: qe += len; te += len; break;
  1067. case 1: qe += len; break;
  1068. default: te += len;
  1069. }
  1070. }
  1071. ql = qe - qb;
  1072. tl = te - tb;
  1073. if(ql == 0 || tl == 0) return KSWX_NULL;
  1074. rl = tl;
  1075. clear_and_encap_u8list(mem_cache,
  1076. kswx_roundup8x((tl + 2) * sizeof(uint32_t)) * 2 // rh, re
  1077. + kswx_roundup8x((ql + 2) * sizeof(int)) * 3 // zb, ze, zw
  1078. + kswx_roundup8x(((long long)ql) * rl)); // z
  1079. rh = (uint32_t*)(mem_cache->buffer);
  1080. re = (uint32_t*)(((void*)rh) + kswx_roundup8x((tl + 2) * sizeof(uint32_t)));
  1081. zb = (int*)(((void*)re) + kswx_roundup8x((tl + 2) * sizeof(uint32_t)));
  1082. ze = (int*)(((void*)zb) + kswx_roundup8x((ql + 2) * sizeof(int)));
  1083. zw = (int*)(((void*)ze) + kswx_roundup8x((ql + 2) * sizeof(int)));
  1084. z = (uint8_t*)(((void*)zw) + kswx_roundup8x((ql + 2) * sizeof(int)));
  1085. // calculate alignment band
  1086. // basic band
  1087. tx = qx = 0;
  1088. for(i=0;i<(int)cigars->size;i++){
  1089. op = cigars->buffer[i] & 0xFU;
  1090. len = cigars->buffer[i] >> 4;
  1091. switch(op){
  1092. case 0:
  1093. for(j=0;j<len;j++) zw[qx++] = W;
  1094. break;
  1095. case 1:
  1096. for(j=0;j<len;j++) zw[qx++] = W + len;
  1097. break;
  1098. default:
  1099. zw[qx] += len;
  1100. }
  1101. }
  1102. // extending bandwidth around indels
  1103. tx = qx = 0;
  1104. for(i=0;i<(int)cigars->size;i++){
  1105. op = cigars->buffer[i] & 0xFU;
  1106. len = cigars->buffer[i] >> 4;
  1107. switch(op){
  1108. case 0:
  1109. qx += len;
  1110. break;
  1111. case 1:
  1112. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  1113. qx += len - 1;
  1114. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  1115. qx ++;
  1116. break;
  1117. default:
  1118. for(j=1;j<len&&j<qx;j++) zw[qx-j] += len - j;
  1119. for(j=1;j<len&&j+qx<ql;j++) zw[qx+j] += len - j;
  1120. }
  1121. }
  1122. // generate band region
  1123. tx = qx = 0;
  1124. for(i=0;i<(int)cigars->size;i++){
  1125. op = cigars->buffer[i] & 0xFU;
  1126. len = cigars->buffer[i] >> 4;
  1127. switch(op){
  1128. case 0:
  1129. for(j=0;j<len;j++){
  1130. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  1131. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  1132. zb[qx] = _zb;
  1133. ze[qx] = _ze;
  1134. tx ++;
  1135. qx ++;
  1136. }
  1137. break;
  1138. case 1:
  1139. for(j=0;j<len;j++){
  1140. _zb = tx - zw[qx]; if(_zb < 0) _zb = 0;
  1141. _ze = tx + 1 + zw[qx]; if(_ze > tl) _ze = tl;
  1142. zb[qx] = _zb;
  1143. ze[qx] = _ze;
  1144. qx ++;
  1145. }
  1146. break;
  1147. default:
  1148. tx += len;
  1149. }
  1150. }
  1151. // trim band beg
  1152. _zb = 0;
  1153. for(i=0;i<ql;i++){
  1154. if(zb[i] < _zb) zb[i] = _zb;
  1155. else if(zb[i] > _zb) _zb = zb[i];
  1156. }
  1157. // trim band end
  1158. _ze = tl;
  1159. for(i=ql-1;i>=0;i--){
  1160. if(ze[i] > _ze) ze[i] = _ze;
  1161. else if(ze[i] < _ze) _ze = ze[i];
  1162. }
  1163. // perform alignment
  1164. rh[0] = 0;
  1165. for(i=1;i<=tl;i++) rh[i] = rh[i - 1] + QCLP;
  1166. for(i=0;i<=tl;i++) re[i] = SCORE_INF;
  1167. for(i=0;i<ql;i++){
  1168. if(zb[i] == 0){
  1169. h1 = i * QCLP;
  1170. f = h1 + QCLP;
  1171. } else {
  1172. h1 = f = SCORE_INF;
  1173. }
  1174. zi = z + (i * rl);
  1175. for(j=0;j<4;j++){
  1176. if(j == query[i + qb]) qmat[j] = 0;
  1177. else if(j == qvs[5][i + qb]) qmat[j] = qvs[1][i + qb];
  1178. else qmat[j] = QMIS;
  1179. }
  1180. qins = i + 1 == ql? QCLP : qvs[2][i + 1 + qb];
  1181. if(i + 1 == ql) qdel[0] = qdel[1] = qdel[2] = qdel[3] = QCLP;
  1182. else {
  1183. for(j=0;j<4;j++){
  1184. if(j == qvs[6][i + 1 + qb]){
  1185. qdel[j] = qvs[3][i + 1 + qb];
  1186. } else qdel[j] = QDEL;
  1187. }
  1188. }
  1189. for(j=zb[i];j<ze[i];j++){
  1190. m = rh[j] + qmat[target[j + tb]];
  1191. rh[j] = h1;
  1192. e = re[j];
  1193. if(m <= e){ d = 0; h = m; }
  1194. else { d = 1; h = e; }
  1195. if(h > f){ d = 2; h = f; }
  1196. h1 = h;
  1197. t = m + qins;
  1198. //e = e + QEXT;
  1199. e = e + qins;
  1200. if(e < t){ d |= 1 << 2; }
  1201. else { e = t; }
  1202. re[j] = e;
  1203. t = m + qdel[target[j + tb]];
  1204. f = f + QEXT;
  1205. if(f < t){ d |= 2 << 4; }
  1206. else { f = t; }
  1207. zi[j-zb[i]] = d;
  1208. }
  1209. rh[j] = h1; re[j] = SCORE_INF;
  1210. }
  1211. y.qb = qb; y.qe = qe; y.tb = tb; y.te = te;
  1212. y.score = - (int)rh[tl];
  1213. y.mat = y.mis = y.ins = y.del = y.aln = 0;
  1214. d = 0;
  1215. i = ql - 1; j = tl - 1;
  1216. //String *qs = init_string(1024);
  1217. //String *ts = init_string(1024);
  1218. //String *ms = init_string(1024);
  1219. while(i >= 0 && j >= 0){
  1220. d = (z[i * rl + (j - zb[i])] >> (d << 1)) & 0x03;
  1221. if(d == 0){
  1222. //add_char_string(qs, bit_base_table[query[i + y.qb]]);
  1223. //add_char_string(ts, bit_base_table[target[j + y.tb]]);
  1224. if(query[i + y.qb] == target[j + y.tb]){
  1225. //add_char_string(ms, '|');
  1226. y.mat ++;
  1227. } else {
  1228. //add_char_string(ms, '*');
  1229. y.mis ++;
  1230. }
  1231. i --; j --;
  1232. } else if(d == 1){
  1233. //add_char_string(qs, bit_base_table[query[i + y.qb]]);
  1234. //add_char_string(ts, '-');
  1235. //add_char_string(ms, '-');
  1236. i --; y.ins ++;
  1237. } else {
  1238. //add_char_string(qs, '-');
  1239. //add_char_string(ts, bit_base_table[target[j + y.tb]]);
  1240. //add_char_string(ms, '-');
  1241. j --; y.del ++;
  1242. }
  1243. kswx_push_cigar(cigars2, d, 1);
  1244. }
  1245. //reverse_string(qs);
  1246. //reverse_string(ts);
  1247. //reverse_string(ms);
  1248. //printf("%s\n%s\n%s\n", qs->string, ms->string, ts->string);
  1249. //free_string(qs);
  1250. //free_string(ts);
  1251. //free_string(ms);
  1252. if(1){
  1253. if(i >= 0){
  1254. y.ins += i + 1;
  1255. kswx_push_cigar(cigars2, 1, i + 1);
  1256. }
  1257. if(j >= 0){
  1258. y.del += j + 1;
  1259. kswx_push_cigar(cigars2, 2, j + 1);
  1260. }
  1261. } else {
  1262. y.qb += i + 1;
  1263. y.tb += j + 1;
  1264. }
  1265. reverse_u32list(cigars2);
  1266. y.aln = y.mat + y.mis + y.ins + y.del;
  1267. return y;
  1268. }
  1269. static inline void kswx_cigar2string(String *str, int n, uint32_t *cigar){
  1270. int i, j, c, op, len;
  1271. char ch;
  1272. for(i=0;i<n;i++){
  1273. op = cigar[i] & 0xFU;
  1274. len = cigar[i] >> 4;
  1275. if(len == 0) continue;
  1276. if(op > 2){
  1277. fprintf(stderr, " -- CIGAR only support M(0),I(1),D(2) cigar, but met ?(%d) in %s -- %s:%d --\n", op, __FUNCTION__, __FILE__, __LINE__);
  1278. exit(1);
  1279. }
  1280. c = 0;
  1281. encap_string(str, 20);
  1282. while(len){
  1283. str->string[str->size + c] = '0' + (len % 10);
  1284. len = len / 10;
  1285. c ++;
  1286. }
  1287. for(j=0;j<c>>1;j++){
  1288. ch = str->string[str->size + j];
  1289. str->string[str->size + j] = str->string[str->size + c - 1 - j];
  1290. str->string[str->size + c - 1 - j] = ch;
  1291. }
  1292. str->size += c;
  1293. str->string[str->size++] = "MIDX"[op];
  1294. }
  1295. str->string[str->size] = '\0';
  1296. }
  1297. static inline uint32_t kswx_string2cigar(u32list *cigar, char *str){
  1298. uint32_t c, len, op, n;
  1299. char *p;
  1300. p = str;
  1301. len = 0;
  1302. n = 0;
  1303. while(*p){
  1304. if(*p >= '0' && *p <= '9'){
  1305. len = len * 10 + (*p) - '0';
  1306. } else {
  1307. switch(*p){
  1308. case 'M': op = 0; break;
  1309. case 'I': op = 1; break;
  1310. case 'D': op = 2; break;
  1311. default: op = 3;
  1312. }
  1313. c = (len << 4) | op;
  1314. if(cigar->size && (cigar->buffer[cigar->size - 1] & 0xFU) == op){
  1315. cigar->buffer[cigar->size - 1] += len << 4;
  1316. } else push_u32list(cigar, c);
  1317. n ++;
  1318. len = 0;
  1319. }
  1320. p ++;
  1321. }
  1322. return n;
  1323. }
  1324. static inline void kswx_cigar2pairwise(String *alns[2], uint8_t *seqs[2], int n, uint32_t *cigar){
  1325. char *p1, *p2;
  1326. int i, j, op, len, off[2];
  1327. off[0] = off[1] = 0;
  1328. for(i=0;i<n;i++){
  1329. op = cigar[i] & 0xFU;
  1330. len = cigar[i] >> 4;
  1331. if(len == 0) continue;
  1332. encap_string(alns[0], len);
  1333. encap_string(alns[1], len);
  1334. p1 = alns[0]->string + alns[0]->size;
  1335. p2 = alns[1]->string + alns[1]->size;
  1336. switch(op){
  1337. case 0:
  1338. for(j=0;j<len;j++){
  1339. p1[j] = bit_base_table[(int)seqs[0][off[0] + j]];
  1340. p2[j] = bit_base_table[(int)seqs[1][off[1] + j]];
  1341. }
  1342. off[0] += len;
  1343. off[1] += len;
  1344. break;
  1345. case 1:
  1346. for(j=0;j<len;j++){
  1347. p1[j] = bit_base_table[(int)seqs[0][off[0] + j]];
  1348. p2[j] = '-';
  1349. }
  1350. off[0] += len;
  1351. break;
  1352. case 2:
  1353. for(j=0;j<len;j++){
  1354. p1[j] = '-';
  1355. p2[j] = bit_base_table[(int)seqs[1][off[1] + j]];
  1356. }
  1357. off[1] += len;
  1358. break;
  1359. default:
  1360. fprintf(stderr, " -- CIGAR only support M(0),I(1),D(2) cigar, but met ?(%d) in %s -- %s:%d --\n", op, __FUNCTION__, __FILE__, __LINE__);
  1361. exit(1);
  1362. }
  1363. alns[0]->size += len;
  1364. alns[1]->size += len;
  1365. }
  1366. alns[0]->string[alns[0]->size] = 0;
  1367. alns[1]->string[alns[1]->size] = 0;
  1368. }
  1369. static inline void kswx_polish_pairwise(char *alns[2], int len){
  1370. int i, j, m, c, gaps[2];
  1371. while(1){
  1372. c = 0;
  1373. gaps[0] = gaps[1] = 0;
  1374. for(i=0;i<len;i++){
  1375. for(j=0;j<2;j++){
  1376. if(alns[j][i] == '-'){ gaps[j] ++; continue; }
  1377. if(gaps[j] == 0) continue;
  1378. for(m=i-gaps[j];m<i;m++){
  1379. if(alns[!j][m] == alns[j][i]){
  1380. alns[j][m] = alns[j][i];
  1381. alns[j][i] = '-';
  1382. c ++;
  1383. break;
  1384. }
  1385. }
  1386. gaps[j] = i - m;
  1387. }
  1388. }
  1389. if(c == 0) break;
  1390. }
  1391. }
  1392. static inline uint32_t kswx_pairwise2cigar(u32list *cigar, char *alns[2], uint32_t size){
  1393. uint32_t i, op, len, n, s;
  1394. op = len = n = 0;
  1395. for(i=0;i<=size;i++){
  1396. if(i == n) s = 10;
  1397. if(alns[0][i] == '-') s = 2;
  1398. else if(alns[1][i] == '-') s = 1;
  1399. else s = 0;
  1400. if(op == s) len ++;
  1401. else {
  1402. push_u32list(cigar, (len << 4) | op);
  1403. n ++;
  1404. op = s;
  1405. len = 1;
  1406. }
  1407. }
  1408. return n;
  1409. }
  1410. #define KSWX_ZIGAR_ZERO 32
  1411. #define KSWX_ZIGAR_MAX_LEN 30
  1412. static inline void kswx_cigar2zigar(String *str, int n, uint32_t *cigar){
  1413. int i, op, len, t, s, ss;
  1414. t = s = 0;
  1415. for(i=0;i<=n;i++){
  1416. if(i < n){
  1417. op = cigar[i] & 0xFU;
  1418. if(op > 2){
  1419. fprintf(stderr, " -- ZIGAR only support M(0),I(1),D(2) cigar, but met ?(%d) in %s -- %s:%d --\n", op, __FUNCTION__, __FILE__, __LINE__);
  1420. exit(1);
  1421. }
  1422. len = cigar[i] >> 4;
  1423. if(len == 0) continue;
  1424. } else { op = 0x1FU; len = 0; }
  1425. if(op == t) s += len;
  1426. else {
  1427. while(s){
  1428. ss = (s <= KSWX_ZIGAR_MAX_LEN)? s : KSWX_ZIGAR_MAX_LEN;
  1429. s -= ss;
  1430. ss = ss + t * KSWX_ZIGAR_MAX_LEN + KSWX_ZIGAR_ZERO;
  1431. add_char_string(str, ss);
  1432. }
  1433. t = op;
  1434. s = len;
  1435. }
  1436. }
  1437. }
  1438. static inline uint32_t kswx_zigar2cigar(u32list *cigar, char *str){
  1439. uint32_t c, len, op, s, t;
  1440. char *p;
  1441. p = str;
  1442. op = 0x1FU;
  1443. len = 0;
  1444. c = 0;
  1445. while(1){
  1446. if(*p){
  1447. s = (*p) - KSWX_ZIGAR_ZERO;
  1448. t = s / KSWX_ZIGAR_MAX_LEN;
  1449. s = s % KSWX_ZIGAR_MAX_LEN;
  1450. } else {
  1451. t = 0x1FU;
  1452. s = 0;
  1453. }
  1454. if(op == t) len += s;
  1455. else {
  1456. if(len){
  1457. push_u32list(cigar, (len << 4) | op);
  1458. c ++;
  1459. }
  1460. op = t;
  1461. len = s;
  1462. }
  1463. if(*p == 0) break;
  1464. p ++;
  1465. }
  1466. return c;
  1467. }
  1468. static inline void kswx_zigar2string(String *str, char *zigar){
  1469. int j, c, op, len, s, t;
  1470. char ch, *p;
  1471. p = zigar;
  1472. op = 0x1FU;
  1473. len = 0;
  1474. while(1){
  1475. if(*p){
  1476. s = (*p) - KSWX_ZIGAR_ZERO;
  1477. t = s / KSWX_ZIGAR_MAX_LEN;
  1478. s = s % KSWX_ZIGAR_MAX_LEN;
  1479. } else {
  1480. t = 0x1FU;
  1481. s = 0;
  1482. }
  1483. if(op == t) len += s;
  1484. else {
  1485. if(len){
  1486. c = 0;
  1487. encap_string(str, 20);
  1488. while(len){
  1489. str->string[str->size + c] = '0' + (len % 10);
  1490. len = len / 10;
  1491. c ++;
  1492. }
  1493. for(j=0;j<c>>1;j++){
  1494. ch = str->string[str->size + j];
  1495. str->string[str->size + j] = str->string[str->size + c - 1 - j];
  1496. str->string[str->size + c - 1 - j] = ch;
  1497. }
  1498. str->size += c;
  1499. str->string[str->size++] = "MIDX"[op];
  1500. }
  1501. op = t;
  1502. len = s;
  1503. }
  1504. if(*p == '\0') break;
  1505. p ++;
  1506. }
  1507. str->string[str->size] = '\0';
  1508. }
  1509. static inline int kswx_string2zigar(String *zigar, char *str){
  1510. uint32_t len, op, n, t, s, ss;
  1511. char *p;
  1512. p = str;
  1513. op = 0;
  1514. len = 0;
  1515. n = 0;
  1516. t = s = 0;
  1517. while(1){
  1518. if(*p >= '0' && *p <= '9'){
  1519. len = len * 10 + (*p) - '0';
  1520. p ++;
  1521. continue;
  1522. }
  1523. if(*p){
  1524. switch(*p){
  1525. case 'M': op = 0; break;
  1526. case 'I': op = 1; break;
  1527. case 'D': op = 2; break;
  1528. default: op = 3;
  1529. fprintf(stderr, " -- ZIGAR only support M(0),I(1),D(2) cigar, but met %c(?) in %s -- %s:%d --\n", *p, __FUNCTION__, __FILE__, __LINE__);
  1530. fprintf(stderr, "%s\n", str); fflush(stderr);
  1531. exit(1);
  1532. }
  1533. } else { op = 0x1FU; len = 0; }
  1534. if(op == t){ s += len; len = 0; }
  1535. else {
  1536. while(s){
  1537. ss = (s <= KSWX_ZIGAR_MAX_LEN)? s : KSWX_ZIGAR_MAX_LEN;
  1538. s -= ss;
  1539. ss = ss + t * KSWX_ZIGAR_MAX_LEN + KSWX_ZIGAR_ZERO;
  1540. add_char_string(zigar, ss);
  1541. n ++;
  1542. }
  1543. t = op;
  1544. s = len;
  1545. len = 0;
  1546. }
  1547. if(*p == '\0') break;
  1548. p ++;
  1549. };
  1550. return n;
  1551. }
  1552. static inline kswr_t kswx_extend_core(int qlen, uint8_t *query, int tlen, uint8_t *target, kswr_t r, int m, const int8_t *matrix, int w, int I, int D, int E, int T){
  1553. int y1, y2, x1, x2, x3, score, gscore, max_off;
  1554. if(T < 0){
  1555. // try extend left
  1556. do {
  1557. if(r.qb == 0 || r.tb == 0) break;
  1558. if(r.tb >= r.qb){
  1559. y1 = (r.qb + w > r.tb)? r.tb : r.qb + w;
  1560. y2 = r.qb;
  1561. revseq_bytes(target + r.tb - y1, y1); revseq_bytes(query + r.qb - y2, y2);
  1562. score = ksw_extend2(y2, query + r.qb - y2, y1, target + r.tb - y1, m, matrix, - D, - E, - I, - E, w, - T, -1, r.score, &x2, &x1, &x3, &gscore, &max_off);
  1563. revseq_bytes(target + r.tb - y1, y1); revseq_bytes(query + r.qb - y2, y2);
  1564. if(gscore <= 0 || gscore <= score + T){
  1565. r.tb = r.tb - x1; r.qb = r.qb - x2; r.score = score;
  1566. } else {
  1567. r.tb = r.tb - x3; r.qb = 0; r.score = gscore;
  1568. }
  1569. } else {
  1570. y1 = r.tb;
  1571. y2 = (r.tb + w > r.qb)? r.qb : r.tb + w;
  1572. revseq_bytes(target + r.tb - y1, y1); revseq_bytes(query + r.qb - y2, y2);
  1573. score = ksw_extend2(y1, target + r.tb - y1, y2, query + r.qb - y2, m, matrix, - I, - E, - D, - E, w, - T, -1, r.score, &x1, &x2, &x3, &gscore, &max_off);
  1574. revseq_bytes(target + r.tb - y1, y1); revseq_bytes(query + r.qb - y2, y2);
  1575. if(gscore <= 0 || gscore <= score + T){
  1576. r.tb = r.tb - x1; r.qb = r.qb - x2; r.score = score;
  1577. } else {
  1578. r.qb = r.qb - x3; r.tb = 0; r.score = gscore;
  1579. }
  1580. }
  1581. } while(0);
  1582. // try extend right
  1583. do {
  1584. if(r.qe == qlen || r.te == tlen) break;
  1585. if(tlen - r.te >= qlen - r.qe){
  1586. y1 = (qlen - r.qe + w > tlen - r.te)? tlen - r.te : qlen - r.qe + w;
  1587. y2 = qlen - r.qe;
  1588. score = ksw_extend2(y2, query + r.qe, y1, target + r.te, m, matrix, - D, - E, - I, - E, w, - T, -1, r.score, &x2, &x1, &x3, &gscore, &max_off);
  1589. if(gscore <= 0 || gscore <= score + T){
  1590. r.te += x1; r.qe += x2; r.score = score;
  1591. } else {
  1592. r.te += x3; r.qe = qlen; r.score = gscore;
  1593. }
  1594. } else {
  1595. y1 = tlen - r.te;
  1596. y2 = (tlen - r.te + w > qlen - r.qe)? qlen - r.qe : tlen - r.te + w;
  1597. score = ksw_extend2(y1, target + r.te, y2, query + r.qe, m, matrix, - I, - E, - D, - E, w, - T, -1, r.score, &x1, &x2, &x3, &gscore, &max_off);
  1598. if(gscore <= 0 || gscore <= score + T){
  1599. r.te += x1; r.qe += x2; r.score = score;
  1600. } else {
  1601. r.te = tlen; r.qe += x3; r.score = gscore;
  1602. }
  1603. }
  1604. } while(0);
  1605. }
  1606. return r;
  1607. }
  1608. static inline kswx_t kswx_gen_cigar_core2(int qlen, uint8_t *query, int tlen, uint8_t *target, kswr_t r, int m, const int8_t *matrix, int w, int I, int D, int E, int *_n_cigar, uint32_t **_cigar){
  1609. kswx_t x;
  1610. uint32_t *cigar;
  1611. int i, j, n_cigar, w2, x1, x2, op, len;
  1612. UNUSED(qlen);
  1613. UNUSED(tlen);
  1614. n_cigar = *_n_cigar;
  1615. cigar = *_cigar;
  1616. if(w >= 0){
  1617. x1 = r.te - r.tb;
  1618. x2 = r.qe - r.qb;
  1619. x1 -= x2;
  1620. if(x1 < 0) x1 = -x1;
  1621. if(w < x1 * 1.5) w = x1 * 1.5;
  1622. w2 = (((r.te - r.tb < r.qe - r.qb)? r.te - r.tb : r.qe - r.qb) * matrix[0] - r.score) / (-E) + 1;
  1623. if(w2 < (w / 4) + 1) w2 = (w / 4) + 1;
  1624. if(w > w2) w = w2;
  1625. } else w = - w;
  1626. x.score = ksw_global2(r.qe - r.qb, query + r.qb, r.te - r.tb, target + r.tb, m, matrix, - D, - E, - I, - E, w, &n_cigar, &cigar);
  1627. x.tb = r.tb;
  1628. x.te = r.te;
  1629. x.qb = r.qb;
  1630. x.qe = r.qe;
  1631. x.aln = x.mat = x.mis = x.ins = x.del = 0;
  1632. for(i=x1=x2=0;i<n_cigar;i++){
  1633. op = cigar[i] & 0xF;
  1634. len = cigar[i] >> 4;
  1635. x.aln += len;
  1636. switch(op){
  1637. case 0: for(j=0;j<len;j++){ if(query[r.qb + x1 + j] == target[r.tb + x2 + j]) x.mat ++; else x.mis ++; } x1 += len; x2 += len; break;
  1638. case 1: x1 += len; x.del += len; break;
  1639. case 2: x2 += len; x.ins += len; break;
  1640. }
  1641. }
  1642. *_n_cigar = n_cigar;
  1643. *_cigar = cigar;
  1644. return x;
  1645. }
  1646. static inline kswx_t kswx_gen_cigar_core(int qlen, uint8_t *query, int tlen, uint8_t *target, kswr_t r, int m, const int8_t *matrix, int w, int I, int D, int E){
  1647. kswx_t x;
  1648. uint32_t *cigar;
  1649. int n_cigar;
  1650. n_cigar = 0;
  1651. cigar = NULL;
  1652. x = kswx_gen_cigar_core2(qlen, query, tlen, target, r, m, matrix, w, I, D, E, &n_cigar, &cigar);
  1653. if(cigar) free(cigar);
  1654. return x;
  1655. }
  1656. static inline kswx_t kswx_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *matrix, int w, int I, int D, int E, int T){
  1657. kswr_t r;
  1658. r = ksw_align2(qlen, query, tlen, target, m, matrix, - D, - E, - I, - E, KSW_XSTART, NULL);
  1659. if(r.qb <= -1 || r.tb <= -1 || r.qe <= -1 || r.te <= -1) return KSWX_NULL;
  1660. r.qe ++; r.te ++;
  1661. r = kswx_extend_core(qlen, query, tlen, target, r, m, matrix, w, I, D, E, T);
  1662. return kswx_gen_cigar_core(qlen, query, tlen, target, r, m, matrix, w, I, D, E);
  1663. }
  1664. static inline kswr_t kswx_align_no_stat(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *matrix, int w, int I, int D, int E, int T){
  1665. kswr_t r;
  1666. r = ksw_align2(qlen, query, tlen, target, m, matrix, - D, - E, - I, - E, KSW_XSTART, NULL);
  1667. if(r.qb <= -1 || r.tb <= -1 || r.qe <= -1 || r.te <= -1) return KSWR_NULL;
  1668. r.qe ++; r.te ++;
  1669. r = kswx_extend_core(qlen, query, tlen, target, r, m, matrix, w, I, D, E, T);
  1670. return r;
  1671. }
  1672. static inline kswx_t kswx_align_with_cigar(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *matrix, int w, int I, int D, int E, int T, int *n_cigar, uint32_t **cigar){
  1673. kswr_t r;
  1674. r = ksw_align2(qlen, query, tlen, target, m, matrix, - D, - E, - I, - E, KSW_XSTART, NULL);
  1675. if(r.qb <= -1 || r.tb <= -1 || r.qe <= -1 || r.te <= -1) return KSWX_NULL;
  1676. r.qe ++; r.te ++;
  1677. r = kswx_extend_core(qlen, query, tlen, target, r, m, matrix, w, I, D, E, T);
  1678. return kswx_gen_cigar_core2(qlen, query, tlen, target, r, m, matrix, w, I, D, E, n_cigar, cigar);
  1679. }
  1680. // qb, qe, tb, te are the positions of seed match region, will be extended to whole reads
  1681. static inline kswx_t kswx_fast_align(int qlen, uint8_t *query, int qb, int qe, int tlen, uint8_t *target, int tb, int te, int W, int M, int X, int I, int D, int E, int T, float min_sm, String *cigar_str){
  1682. kswr_t r;
  1683. kswx_t x, x1;
  1684. int8_t i, m, matrix[16];
  1685. int ol, n_cigar[3], w;
  1686. uint32_t *cigar[3];
  1687. m = 4;
  1688. for(i=0;i<16;i++) matrix[i] = ((i / 4) == (i % 4))? M : X;
  1689. r = ksw_align2(qe - qb, query + qb, te - tb, target + tb, m, matrix, - D, - E, - I, - E, KSW_XSTART, NULL);
  1690. if(r.qb <= -1 || r.tb <= -1 || r.qe <= -1 || r.te <= -1) return KSWX_NULL;
  1691. r.qe ++; r.te ++;
  1692. // check whether the seed region was well aligned
  1693. ol = (r.qe - r.qb < r.te - r.tb)? r.qe - r.qb : r.te - r.tb;
  1694. if(ol < 30) return KSWX_NULL;
  1695. n_cigar[0] = n_cigar[1] = n_cigar[2] = 0;
  1696. cigar[0] = cigar[1] = cigar[2] = NULL;
  1697. w = (r.qe - r.qb < r.te - r.tb)? ((r.te - r.tb) - (r.qe - r.qb)) : ((r.qe - r.qb) - (r.te - r.tb));
  1698. if(w < W) w = W;
  1699. if(cigar_str) x = kswx_gen_cigar_core2(qe - qb, query + qb, te - tb, target + tb, r, m, matrix, w, I, D, E, n_cigar + 1, cigar + 1);
  1700. else x = kswx_gen_cigar_core2(qe - qb, query + qb, te - tb, target + tb, r, m, matrix, w, I, D, E, NULL, NULL);
  1701. if(x.mat < x.aln * min_sm){
  1702. if(cigar[1]) free(cigar[1]);
  1703. return KSWX_NULL;
  1704. }
  1705. x.qb += qb; x.qe += qb;
  1706. x.tb += tb; x.te += tb;
  1707. if(cigar_str) x1 = kswx_extend_align(qlen - x.qe, query + x.qe, tlen - x.te, target + x.te, 1, x.score, W, M, X, I, D, E, T, n_cigar + 2, cigar + 2);
  1708. else x1 = kswx_extend_align(qlen - x.qe, query + x.qe, tlen - x.te, target + x.te, 1, x.score, W, M, X, I, D, E, T, NULL, NULL);
  1709. x.score = x1.score;
  1710. x.aln += x1.aln;
  1711. x.mat += x1.mat;
  1712. x.mis += x1.mis;
  1713. x.ins += x1.ins;
  1714. x.del += x1.del;
  1715. x.qe += x1.qe;
  1716. x.te += x1.te;
  1717. if(cigar_str) x1 = kswx_extend_align(x.qb, query + x.qb - 1, x.tb, target + x.tb - 1, -1, x.score, W, M, X, I, D, E, T, n_cigar + 0, cigar + 0);
  1718. else x1 = kswx_extend_align(x.qb, query + x.qb - 1, x.tb, target + x.tb - 1, -1, x.score, W, M, X, I, D, E, T, NULL, NULL);
  1719. x.score = x1.score;
  1720. x.aln += x1.aln;
  1721. x.mat += x1.mat;
  1722. x.mis += x1.mis;
  1723. x.ins += x1.ins;
  1724. x.del += x1.del;
  1725. x.qb -= x1.qe;
  1726. x.tb -= x1.te;
  1727. if(cigar_str){
  1728. clear_string(cigar_str);
  1729. revseq_4bytes(cigar[0], n_cigar[0]);
  1730. kswx_cigar2string(cigar_str, n_cigar[0], cigar[0]);
  1731. kswx_cigar2string(cigar_str, n_cigar[1], cigar[1]);
  1732. kswx_cigar2string(cigar_str, n_cigar[2], cigar[2]);
  1733. if(cigar[0]) free(cigar[0]);
  1734. if(cigar[1]) free(cigar[1]);
  1735. if(cigar[2]) free(cigar[2]);
  1736. }
  1737. return x;
  1738. }
  1739. #endif