wtpoa.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. /*
  2. *
  3. * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __WTDBG_POA_CNS_RJ_H
  20. #define __WTDBG_POA_CNS_RJ_H
  21. #include "tripoa.h"
  22. #include "kswx.h"
  23. #include "filereader.h"
  24. typedef struct {
  25. char *reftag;
  26. u4i reflen, refoff;
  27. u4i node1, node2;
  28. } lay_blk_t;
  29. typedef struct {
  30. u4i chridx, bidx; // block idx
  31. u4i rdidx, rdoff:31, rddir:1;
  32. char *rdtag;
  33. BaseBank *seq;
  34. u4i rbeg, rend;
  35. } lay_seq_t;
  36. static inline void _init_lay_seq_t(lay_seq_t *sq){
  37. ZEROS(sq);
  38. sq->seq = init_basebank();
  39. }
  40. static inline void _free_lay_seq_t(lay_seq_t *sq){
  41. free_basebank(sq->seq);
  42. }
  43. define_recycle_list(layseqr, lay_seq_t, u4i, _init_lay_seq_t(a), _free_lay_seq_t(a));
  44. typedef lay_seq_t* (*iter_cns_block)(void *obj);
  45. typedef void (*info_cns_block)(void *obj, lay_seq_t *sq, lay_blk_t *bk);
  46. typedef struct {
  47. u4i cidx, eidx;
  48. u4i node1, node2, soff, slen;
  49. int beg, end;
  50. } edge_cns_t;
  51. define_list(edgecnsv, edge_cns_t);
  52. typedef struct {
  53. u4i cidx;
  54. edgecnsv *rs;
  55. String *tag;
  56. BaseBank *seq, *cns;
  57. u4i cnt;
  58. } ctg_cns_t;
  59. define_list(ctgcnsv, ctg_cns_t*);
  60. static inline ctg_cns_t* init_ctg(u4i cidx){
  61. ctg_cns_t *ctg;
  62. ctg = malloc(sizeof(ctg_cns_t));
  63. ctg->cidx = cidx;
  64. ctg->rs = init_edgecnsv(2);
  65. ctg->tag = init_string(32);
  66. ctg->seq = init_basebank(2048);
  67. ctg->cns = init_basebank(2048);
  68. ctg->cnt = 0;
  69. return ctg;
  70. }
  71. static inline void clear_ctg(ctg_cns_t *ctg){
  72. ctg->cidx = MAX_U4;
  73. ctg->cnt = 0;
  74. clear_edgecnsv(ctg->rs);
  75. clear_string(ctg->tag);
  76. clear_basebank(ctg->seq);
  77. clear_basebank(ctg->cns);
  78. }
  79. static inline void free_ctg(ctg_cns_t *ctg){
  80. free_edgecnsv(ctg->rs);
  81. free_string(ctg->tag);
  82. free_basebank(ctg->seq);
  83. free_basebank(ctg->cns);
  84. free(ctg);
  85. }
  86. typedef struct {
  87. u4i cidx, eidx;
  88. int type; // 0, none; 1, edge cns; 2, edges aligning; 3, merging into cns
  89. } cns_evt_t;
  90. define_list(cnsevtv, cns_evt_t);
  91. struct CTGCNS;
  92. thread_beg_def(mcns);
  93. struct CTGCNS *cc;
  94. TriPOG *g;
  95. cns_evt_t task;
  96. edge_cns_t edges[2];
  97. u1v *seq1, *seq2;
  98. thread_end_def(mcns);
  99. typedef struct CTGCNS {
  100. void *obj;
  101. iter_cns_block itercns;
  102. info_cns_block infocns;
  103. lay_seq_t *sq;
  104. lay_blk_t BK;
  105. u4i ncpu;
  106. ctgcnsv *ctgs, *cycs;
  107. u4i nctg;
  108. cnsevtv *evts;
  109. u8i ridx, bases;
  110. u4i chridx, bidx;
  111. int state;
  112. u4i seqmax, inpmax;
  113. int winlen, winmin, fail_skip;
  114. int M, X, I, D, E, reglen;
  115. u8i tri_rets[2];
  116. u4i print_progress;
  117. thread_def_shared_vars(mcns);
  118. } CTGCNS;
  119. static inline int revise_joint_point(u32list *cigars, int *qe, int *te){
  120. u4i i, op, ln, max;
  121. int qq, q, tt, t;
  122. q = t = 0;
  123. qq = tt = 0;
  124. max = 0;
  125. for(i=1;i<=cigars->size;i++){
  126. op = cigars->buffer[cigars->size - i] & 0xF;
  127. ln = cigars->buffer[cigars->size - i] >> 4;
  128. if(op == 0){
  129. if(ln > max){
  130. qq = q; tt = t;
  131. max = ln;
  132. }
  133. q += ln;
  134. t += ln;
  135. } else if(op == 1){
  136. q += ln;
  137. } else {
  138. t += ln;
  139. }
  140. }
  141. *qe -= qq;
  142. *te -= tt;
  143. return 1;
  144. }
  145. thread_beg_func(mcns);
  146. struct CTGCNS *cc;
  147. TriPOG *g;
  148. ctg_cns_t *ctg;
  149. edge_cns_t *edge;
  150. u1v *seq1, *seq2;
  151. kswx_t XX, *xs[2];
  152. u8list *mem_cache;
  153. u32list *cigars[2];
  154. u4i i;
  155. int qb, qe, tb, te, b, e, ol;
  156. cc = mcns->cc;
  157. g = mcns->g;
  158. seq1 = mcns->seq1;
  159. seq2 = mcns->seq2;
  160. mem_cache = init_u8list(1024);
  161. cigars[0] = init_u32list(64);
  162. cigars[1] = NULL;
  163. xs[0] = &XX;
  164. xs[1] = NULL;
  165. thread_beg_loop(mcns);
  166. if(mcns->task.type == 1){
  167. if(g->seqs->nseq){
  168. end_tripog(g);
  169. }
  170. } else if(mcns->task.type == 2){
  171. qb = 0; qe = seq1->size;
  172. tb = 0; te = seq2->size;
  173. if(qe > cc->reglen) qb = qe - cc->reglen;
  174. if(te > cc->reglen) te = cc->reglen;
  175. ol = num_min(qe -qb, te - tb);
  176. kswx_overlap_align_core(xs, cigars, qe - qb, seq1->buffer + qb, te - tb, seq2->buffer + tb, 1, cc->M, cc->X, (cc->I + cc->D) / 2, (cc->I + cc->D) / 2, cc->E, mem_cache);
  177. if(XX.aln < Int(0.5 * cc->reglen) || XX.mat < Int(XX.aln * 0.9)){
  178. // full length alignment
  179. int maxl;
  180. maxl = num_min(seq1->size, seq2->size);
  181. maxl = num_min(maxl, cc->reglen * 4);
  182. qb = 0; qe = seq1->size;
  183. tb = 0; te = seq2->size;
  184. if(qe > maxl) qb = qe - maxl;
  185. if(te > maxl) te = maxl;
  186. ol = num_min(qe -qb, te - tb);
  187. kswx_overlap_align_core(xs, cigars, qe - qb, seq1->buffer + qb, te - tb, seq2->buffer + tb, 1, cc->M, cc->X, (cc->I + cc->D) / 2, (cc->I + cc->D) / 2, cc->E, mem_cache);
  188. }
  189. XX.qb += qb;
  190. XX.qe += qb;
  191. XX.tb += tb;
  192. XX.te += tb;
  193. b = XX.qe;
  194. e = XX.te;
  195. revise_joint_point(cigars[0], &b, &e);
  196. mcns->edges[0].end = b;
  197. mcns->edges[1].beg = e;
  198. if(cns_debug){
  199. thread_beg_syn(mcns);
  200. fprintf(stderr, "JOINT\tC%u\tE%u\tqe = %d -> %d\tte = %d -> %d\t[%5d,%5d]\t[%5d,%5d,%d,%d,%d]\n", mcns->edges[0].cidx, mcns->edges[0].eidx, XX.qe, b, XX.te, e, (int)seq1->size, (int)seq2->size, XX.aln, XX.mat, XX.mis, XX.ins, XX.del);
  201. fflush(stderr);
  202. thread_end_syn(mcns);
  203. }
  204. } else if(mcns->task.type == 3){
  205. ctg = get_ctgcnsv(cc->ctgs, mcns->task.cidx);
  206. clear_basebank(ctg->cns);
  207. for(i=0;i<ctg->rs->size;i++){
  208. edge = ref_edgecnsv(ctg->rs, i);
  209. if(edge->end > edge->beg){
  210. fast_fwdbits2basebank(ctg->cns, ctg->seq->bits, edge->soff + edge->beg, edge->end - edge->beg);
  211. } else if(Int(ctg->cns->size) + edge->end > edge->beg){
  212. ctg->cns->size = ctg->cns->size + edge->end - edge->beg;
  213. normalize_basebank(ctg->cns);
  214. } else {
  215. clear_basebank(ctg->cns);
  216. }
  217. }
  218. }
  219. thread_end_loop(mcns);
  220. free_u8list(mem_cache);
  221. free_u32list(cigars[0]);
  222. thread_end_func(mcns);
  223. static inline CTGCNS* init_ctgcns(void *obj, iter_cns_block itercns, info_cns_block infocns, u4i ncpu, int shuffle_rds, u4i seqmax, int winlen, int winmin, int fail_skip, int reglen, POGPar *par){
  224. CTGCNS *cc;
  225. thread_prepare(mcns);
  226. cc = malloc(sizeof(CTGCNS));
  227. cc->obj = obj;
  228. cc->itercns = itercns;
  229. cc->infocns = infocns;
  230. cc->sq = NULL;
  231. ZEROS(&cc->BK);
  232. cc->ncpu = ncpu;
  233. cc->ctgs = init_ctgcnsv(8);
  234. cc->cycs = init_ctgcnsv(8);
  235. cc->evts = init_cnsevtv(64);
  236. cc->nctg = 0;
  237. cc->ridx = 0;
  238. cc->bases = 0;
  239. cc->state = 1;
  240. cc->seqmax = seqmax;
  241. cc->inpmax = seqmax * 5;
  242. cc->winlen = winlen;
  243. cc->winmin = winmin;
  244. cc->fail_skip = fail_skip;
  245. cc->M = par->M;
  246. cc->X = par->X;
  247. cc->I = par->I;
  248. cc->D = par->D;
  249. cc->E = par->E;
  250. cc->reglen = reglen;
  251. thread_beg_init(mcns, ncpu);
  252. mcns->cc = cc;
  253. mcns->g = init_tripog(seqmax, shuffle_rds, winlen, winmin, fail_skip, par);
  254. ZEROS(&mcns->task);
  255. mcns->seq1 = init_u1v(1024);
  256. mcns->seq2 = init_u1v(1024);
  257. ZEROS(&(mcns->edges[0]));
  258. ZEROS(&(mcns->edges[1]));
  259. mcns->edges[0].eidx = MAX_U4;
  260. mcns->edges[1].eidx = MAX_U4;
  261. thread_end_init(mcns);
  262. cc->tri_rets[0] = 0;
  263. cc->tri_rets[1] = 0;
  264. cc->print_progress = 0;
  265. cc->chridx = MAX_U4;
  266. cc->bidx = MAX_U4;
  267. thread_export(mcns, cc);
  268. return cc;
  269. }
  270. static inline void reset_ctgcns(CTGCNS *cc, void *obj, iter_cns_block itercns, info_cns_block infocns){
  271. ctg_cns_t *ctg;
  272. u4i i;
  273. cc->obj = obj;
  274. cc->itercns = itercns;
  275. cc->infocns = infocns;
  276. cc->sq = NULL;
  277. ZEROS(&cc->BK);
  278. cc->ridx = 0;
  279. cc->state = 1;
  280. cc->chridx = MAX_U4;
  281. cc->bidx = MAX_U4;
  282. for(i=0;i<cc->ctgs->size;i++){
  283. ctg = get_ctgcnsv(cc->ctgs, i);
  284. if(ctg == NULL) continue;
  285. free_ctg(ctg);
  286. }
  287. clear_ctgcnsv(cc->ctgs);
  288. cc->nctg = 0;
  289. clear_cnsevtv(cc->evts);
  290. cc->bases = 0;
  291. }
  292. static inline void free_ctgcns(CTGCNS *cc){
  293. ctg_cns_t *ctg;
  294. u4i i;
  295. thread_prepare(mcns);
  296. thread_import(mcns, cc);
  297. thread_beg_close(mcns);
  298. free_tripog(mcns->g);
  299. free_u1v(mcns->seq1);
  300. free_u1v(mcns->seq2);
  301. thread_end_close(mcns);
  302. for(i=0;i<cc->ctgs->size;i++){
  303. ctg = get_ctgcnsv(cc->ctgs, i);
  304. if(ctg == NULL) continue;
  305. free_ctg(ctg);
  306. }
  307. free_ctgcnsv(cc->ctgs);
  308. for(i=0;i<cc->cycs->size;i++){
  309. ctg = get_ctgcnsv(cc->cycs, i);
  310. if(ctg == NULL) continue;
  311. free_ctg(ctg);
  312. }
  313. free_ctgcnsv(cc->cycs);
  314. free_cnsevtv(cc->evts);
  315. free(cc);
  316. }
  317. static inline void print_lays_ctgcns(CTGCNS *cc, FILE *out){
  318. lay_blk_t *bk;
  319. bk = &cc->BK;
  320. while((cc->sq = cc->itercns(cc->obj))){
  321. if(cc->sq->chridx != cc->chridx){
  322. cc->chridx = cc->sq->chridx;
  323. cc->bidx = MAX_U4;
  324. cc->infocns(cc->obj, cc->sq, bk);
  325. fflush(out);
  326. fprintf(out, ">%s len=%u\n", bk->reftag, bk->reflen);
  327. }
  328. if(cc->sq->bidx != cc->bidx){
  329. cc->bidx = cc->sq->bidx;
  330. cc->infocns(cc->obj, cc->sq, bk);
  331. fprintf(out, "E\t%u\tN%u\t+\tN%u\t+\n", bk->refoff, bk->node1, bk->node2);
  332. }
  333. {
  334. if(cc->sq->rdtag){
  335. fprintf(out, "S\t%s\t%c\t%u\t%u\t", cc->sq->rdtag, "+-"[cc->sq->rddir], cc->sq->rdoff, (u4i)cc->sq->seq->size);
  336. } else {
  337. fprintf(out, "S\tR%llu\t%c\t%u\t%u\t", (u8i)cc->sq->rdidx, "+-"[cc->sq->rddir], cc->sq->rdoff, (u4i)cc->sq->seq->size);
  338. }
  339. print_seq_basebank(cc->sq->seq, 0, cc->sq->seq->size, out);
  340. fprintf(out, "\t%u\t%u\n", cc->sq->rbeg, cc->sq->rend);
  341. }
  342. }
  343. }
  344. static inline void repay_ctgcns(CTGCNS *cc, ctg_cns_t *ctg){
  345. clear_ctg(ctg);
  346. push_ctgcnsv(cc->cycs, ctg);
  347. }
  348. static inline ctg_cns_t* iter_ctgcns(CTGCNS *cc){
  349. ctg_cns_t *ctg, *ret;
  350. edge_cns_t *edge;
  351. cns_evt_t EVT;
  352. lay_blk_t *bk;
  353. u8i eidx;
  354. thread_prepare(mcns);
  355. thread_import(mcns, cc);
  356. bk = &cc->BK;
  357. ret = NULL;
  358. while(1){
  359. cc->sq = cc->itercns(cc->obj);
  360. if(cc->sq){
  361. if(cc->sq->chridx != cc->chridx){
  362. if(cc->cycs->size){
  363. pop_ctgcnsv(cc->cycs, &ctg);
  364. ctg->cidx = cc->ctgs->size;
  365. } else {
  366. ctg = init_ctg(cc->ctgs->size);
  367. }
  368. push_ctgcnsv(cc->ctgs, ctg);
  369. cc->chridx = cc->sq->chridx;
  370. cc->bidx = MAX_U4;
  371. cc->infocns(cc->obj, cc->sq, bk);
  372. if(bk->reftag){
  373. append_string(ctg->tag, bk->reftag, strlen(bk->reftag));
  374. } else {
  375. append_string(ctg->tag, "anonymous", 10);
  376. }
  377. }
  378. if(cc->sq->bidx != cc->bidx){
  379. cc->ridx ++;
  380. if(!cns_debug && cc->print_progress && (cc->ridx % cc->print_progress) == 0){
  381. fprintf(stderr, "\r%u contigs %llu edges %llu bases", (u4i)cc->ctgs->size, cc->ridx, cc->bases); fflush(stderr);
  382. }
  383. } else {
  384. if(mcns->g->seqs->nseq < cc->inpmax){
  385. fwdbitpush_tripog(mcns->g, cc->sq->seq->bits, 0, cc->sq->seq->size, cc->sq->rbeg, cc->sq->rend);
  386. }
  387. continue;
  388. }
  389. }
  390. if(mcns->task.type != 0){
  391. thread_wake(mcns);
  392. }
  393. while(1){
  394. thread_wait_one(mcns);
  395. if(mcns->task.type == 2){
  396. ctg = get_ctgcnsv(cc->ctgs, mcns->task.cidx);
  397. ctg->rs->buffer[mcns->edges[0].eidx].end = mcns->edges[0].end;
  398. ctg->rs->buffer[mcns->edges[1].eidx].beg = mcns->edges[1].beg;
  399. mcns->edges[0].eidx = MAX_U4;
  400. mcns->edges[1].eidx = MAX_U4;
  401. mcns->task.type = 0;
  402. ctg->cnt ++;
  403. if(ctg->cnt + 1 == ctg->rs->size && (ctg->cidx + 1 < cc->ctgs->size || cc->sq == NULL)){
  404. EVT.cidx = ctg->cidx;
  405. EVT.eidx = MAX_U4;
  406. EVT.type = 3;
  407. array_heap_push(cc->evts->buffer, cc->evts->size, cc->evts->cap, cns_evt_t, EVT, num_cmpxx(b.type, a.type, a.cidx, b.cidx, a.eidx, b.eidx));
  408. }
  409. } else if(mcns->task.type == 1){
  410. ctg = get_ctgcnsv(cc->ctgs, mcns->task.cidx);
  411. cc->tri_rets[mcns->g->is_tripog] ++;
  412. if(cns_debug){
  413. thread_beg_syn(mcns);
  414. fprintf(stderr, "%u_%s_N%u_N%u\t%d\t%d\t", mcns->task.eidx, ctg->tag->string, mcns->edges[0].node1, mcns->edges[0].node2, mcns->g->seqs->nseq, (u4i)mcns->g->cns->size);
  415. println_seq_basebank(mcns->g->cns, 0, mcns->g->cns->size, stderr);
  416. thread_end_syn(mcns);
  417. }
  418. mcns->edges[0].soff = ctg->seq->size;
  419. mcns->edges[0].slen = mcns->g->cns->size;
  420. mcns->edges[0].beg = 0;
  421. mcns->edges[0].end = mcns->g->cns->size;
  422. fast_fwdbits2basebank(ctg->seq, mcns->g->cns->bits, 0, mcns->g->cns->size);
  423. eidx = mcns->task.eidx;
  424. ctg->rs->buffer[eidx] = mcns->edges[0];
  425. mcns->edges[0].eidx = MAX_U4;
  426. mcns->task.type = 0;
  427. if(eidx && ctg->rs->buffer[eidx - 1].eidx != MAX_U4){
  428. EVT.cidx = ctg->cidx;
  429. EVT.eidx = eidx - 1;
  430. EVT.type = 2;
  431. array_heap_push(cc->evts->buffer, cc->evts->size, cc->evts->cap, cns_evt_t, EVT, num_cmpxx(b.type, a.type, a.cidx, b.cidx, a.eidx, b.eidx));
  432. }
  433. if(eidx + 1 < ctg->rs->size && ctg->rs->buffer[eidx + 1].eidx != MAX_U4){
  434. EVT.cidx = ctg->cidx;
  435. EVT.eidx = eidx;
  436. EVT.type = 2;
  437. array_heap_push(cc->evts->buffer, cc->evts->size, cc->evts->cap, cns_evt_t, EVT, num_cmpxx(b.type, a.type, a.cidx, b.cidx, a.eidx, b.eidx));
  438. }
  439. if(ctg->rs->size == 1 && (ctg->cidx + 1 < cc->ctgs->size || cc->sq == NULL)){
  440. EVT.cidx = ctg->cidx;
  441. EVT.eidx = MAX_U4;
  442. EVT.type = 3;
  443. array_heap_push(cc->evts->buffer, cc->evts->size, cc->evts->cap, cns_evt_t, EVT, num_cmpxx(b.type, a.type, a.cidx, b.cidx, a.eidx, b.eidx));
  444. }
  445. } else if(mcns->task.type == 3){
  446. ctg = get_ctgcnsv(cc->ctgs, mcns->task.cidx);
  447. cc->bases += ctg->cns->size;
  448. cc->nctg ++;
  449. ret = ctg;
  450. set_ctgcnsv(cc->ctgs, mcns->task.cidx, NULL);
  451. mcns->task.type = 0;
  452. break;
  453. }
  454. if(cc->evts->size){
  455. EVT = cc->evts->buffer[0];
  456. array_heap_remove(cc->evts->buffer, cc->evts->size, cc->evts->cap, cns_evt_t, 0, num_cmpxx(b.type, a.type, a.cidx, b.cidx, a.eidx, b.eidx));
  457. mcns->task = EVT;
  458. if(EVT.type == 2){
  459. ctg = get_ctgcnsv(cc->ctgs, EVT.cidx);
  460. mcns->edges[0] = ctg->rs->buffer[EVT.eidx];
  461. mcns->edges[0].eidx = EVT.eidx;
  462. clear_and_inc_u1v(mcns->seq1, mcns->edges[0].slen);
  463. bitseq_basebank(ctg->seq, mcns->edges[0].soff, mcns->edges[0].slen, mcns->seq1->buffer);
  464. mcns->edges[1] = ctg->rs->buffer[EVT.eidx + 1];
  465. mcns->edges[1].eidx = EVT.eidx + 1;
  466. clear_and_inc_u1v(mcns->seq2, mcns->edges[1].slen);
  467. bitseq_basebank(ctg->seq, mcns->edges[1].soff, mcns->edges[1].slen, mcns->seq2->buffer);
  468. }
  469. thread_wake(mcns);
  470. } else {
  471. break;
  472. }
  473. }
  474. if(cc->sq){
  475. ctg = get_ctgcnsv(cc->ctgs, cc->ctgs->size - 1);
  476. cc->bidx = cc->sq->bidx;
  477. cc->infocns(cc->obj, cc->sq, bk);
  478. mcns->task.cidx = ctg->cidx;
  479. mcns->task.eidx = ctg->rs->size;
  480. mcns->task.type = 1;
  481. mcns->edges[1].eidx = MAX_U4;
  482. edge = next_ref_edgecnsv(ctg->rs);
  483. edge->cidx = ctg->cidx;
  484. edge->eidx = MAX_U4;
  485. edge->node1 = bk->node1;
  486. edge->node2 = bk->node2;
  487. edge->soff = 0;
  488. edge->slen = 0;
  489. edge->beg = 0;
  490. edge->end = 0;
  491. mcns->edges[0] = *edge;
  492. mcns->edges[0].eidx = offset_edgecnsv(ctg->rs, edge);
  493. beg_tripog(mcns->g);
  494. fwdbitpush_tripog(mcns->g, cc->sq->seq->bits, 0, cc->sq->seq->size, cc->sq->rbeg, cc->sq->rend);
  495. }
  496. if(ret){
  497. break;
  498. } else if(cc->sq == NULL){
  499. if(cc->nctg == cc->ctgs->size){ // all finished
  500. if(!cns_debug && cc->print_progress){
  501. fprintf(stderr, "\r%u contigs %llu edges %llu bases\n", (u4i)cc->ctgs->size, cc->ridx, cc->bases); fflush(stderr);
  502. }
  503. break;
  504. }
  505. }
  506. }
  507. thread_export(mcns, cc);
  508. return ret;
  509. }
  510. /*
  511. static inline int iter_ctgcns2(CTGCNS *cc){
  512. edge_cns_t *edge;
  513. lay_blk_t *bk;
  514. u8i i, eidx;
  515. int next, waitall;
  516. thread_prepare(mcns);
  517. thread_import(mcns, cc);
  518. next = 0;
  519. waitall = 0;
  520. bk = &cc->BK;
  521. while(cc->state){
  522. if(cc->state == 1){
  523. if(cc->sq == NULL){
  524. cc->sq = cc->itercns(cc->obj);
  525. }
  526. if(cc->sq == NULL || cc->sq->chridx != cc->chridx){
  527. if(cc->chridx != MAX_U4){
  528. if(mcns->edges[0].idx != MAX_U8){
  529. thread_wake(mcns);
  530. cc->erun ++;
  531. }
  532. cc->state = 2;
  533. next = 4;
  534. waitall = 1;
  535. } else if(cc->sq){
  536. cc->state = 5;
  537. } else {
  538. thread_export(mcns, cc);
  539. return 0;
  540. }
  541. } else if(cc->sq->bidx != cc->bidx){
  542. cc->ridx ++;
  543. if(mcns->edges[0].idx != MAX_U8){
  544. thread_wake(mcns);
  545. cc->erun ++;
  546. }
  547. cc->state = 2;
  548. next = 3;
  549. waitall = 0;
  550. if(!cns_debug && cc->print_progress && (cc->ridx % cc->print_progress) == 0){
  551. fprintf(stderr, "\r%u contigs %llu edges %llu bases", (u4i)cc->ctgs->size, cc->ridx, cc->bases); fflush(stderr);
  552. }
  553. } else {
  554. if(0){
  555. if(cc->sq->rdtag){
  556. fprintf(stdout, "S\t%s\t%c\t%u\t%u\t", cc->sq->rdtag, "+-"[cc->sq->rddir], cc->sq->rdoff, (u4i)cc->sq->seq->size);
  557. } else {
  558. fprintf(stdout, "S\tR%llu\t%c\t%u\t%u\t", (u8i)cc->sq->rdidx, "+-"[cc->sq->rddir], cc->sq->rdoff, (u4i)cc->sq->seq->size);
  559. }
  560. print_seq_basebank(cc->sq->seq, 0, cc->sq->seq->size, stdout);
  561. fprintf(stdout, "\t%u\t%u\n", cc->sq->rbeg, cc->sq->rend);
  562. }
  563. if(mcns->g->seqs->nseq < cc->inpmax){
  564. fwdbitpush_tripog(mcns->g, cc->sq->seq->bits, 0, cc->sq->seq->size, cc->sq->rbeg, cc->sq->rend);
  565. }
  566. cc->sq = NULL;
  567. }
  568. } else if(cc->state == 2){
  569. while(1){
  570. if(waitall){
  571. if(cc->tasks->size == 0){
  572. thread_wait_done(mcns);
  573. } else {
  574. thread_wait_one(mcns);
  575. }
  576. } else {
  577. thread_wait_one(mcns);
  578. }
  579. if(mcns->edges[1].idx != MAX_U8){
  580. cc->erun --;
  581. cc->rs->buffer[mcns->edges[0].idx].end = mcns->edges[0].end;
  582. cc->rs->buffer[mcns->edges[1].idx].beg = mcns->edges[1].beg;
  583. mcns->edges[0].idx = MAX_U8;
  584. mcns->edges[1].idx = MAX_U8;
  585. } else if(mcns->edges[0].idx != MAX_U8){
  586. cc->erun --;
  587. cc->tri_rets[mcns->g->is_tripog] ++;
  588. if(cns_debug){
  589. thread_beg_syn(mcns);
  590. fprintf(stderr, "%llu_%s_N%u_N%u\t%d\t%d\t", mcns->edges[0].idx, cc->tag->string, mcns->edges[0].node1, mcns->edges[0].node2, mcns->g->seqs->nseq, (u4i)mcns->g->cns->size);
  591. println_seq_basebank(mcns->g->cns, 0, mcns->g->cns->size, stderr);
  592. thread_end_syn(mcns);
  593. }
  594. mcns->edges[0].soff = cc->seq->size;
  595. mcns->edges[0].slen = mcns->g->cns->size;
  596. mcns->edges[0].beg = 0;
  597. mcns->edges[0].end = mcns->g->cns->size;
  598. fast_fwdbits2basebank(cc->seq, mcns->g->cns->bits, 0, mcns->g->cns->size);
  599. eidx = mcns->edges[0].idx;
  600. cc->rs->buffer[eidx] = mcns->edges[0];
  601. mcns->edges[0].idx = MAX_U8;
  602. if(eidx && cc->rs->buffer[eidx - 1].idx != MAX_U8){
  603. push_u8v(cc->tasks, eidx - 1);
  604. }
  605. if(eidx + 1 < cc->rs->size && cc->rs->buffer[eidx + 1].idx != MAX_U8){
  606. push_u8v(cc->tasks, eidx);
  607. }
  608. }
  609. if(pop_u8v(cc->tasks, &eidx)){
  610. mcns->edges[0] = cc->rs->buffer[eidx];
  611. mcns->edges[0].idx = eidx;
  612. clear_and_inc_u1v(mcns->seq1, mcns->edges[0].slen);
  613. bitseq_basebank(cc->seq, mcns->edges[0].soff, mcns->edges[0].slen, mcns->seq1->buffer);
  614. mcns->edges[1] = cc->rs->buffer[eidx + 1];
  615. mcns->edges[1].idx = eidx + 1;
  616. clear_and_inc_u1v(mcns->seq2, mcns->edges[1].slen);
  617. bitseq_basebank(cc->seq, mcns->edges[1].soff, mcns->edges[1].slen, mcns->seq2->buffer);
  618. thread_wake(mcns);
  619. cc->erun ++;
  620. } else {
  621. if(waitall){
  622. if(cc->erun == 0){
  623. break;
  624. }
  625. } else {
  626. break;
  627. }
  628. }
  629. }
  630. cc->state = next;
  631. } else if(cc->state == 3){
  632. cc->bidx = cc->sq->bidx;
  633. cc->infocns(cc->obj, cc->sq, bk);
  634. mcns->edges[1].idx = MAX_U8;
  635. edge = next_ref_edgecnsv(cc->rs);
  636. edge->idx = MAX_U8;
  637. edge->node1 = bk->node1;
  638. edge->node2 = bk->node2;
  639. edge->soff = 0;
  640. edge->slen = 0;
  641. edge->beg = 0;
  642. edge->end = 0;
  643. mcns->edges[0] = *edge;
  644. mcns->edges[0].idx = offset_edgecnsv(cc->rs, edge);
  645. beg_tripog(mcns->g);
  646. cc->state = 1;
  647. if(0){
  648. fprintf(stdout, "E\t%u\tN%u\t+\tN%u\t+\n", bk->refoff, bk->node1, bk->node2);
  649. }
  650. } else if(cc->state == 4){
  651. clear_basebank(cc->cns);
  652. for(i=0;i<cc->rs->size;i++){
  653. edge = ref_edgecnsv(cc->rs, i);
  654. if(edge->end > edge->beg){
  655. fast_fwdbits2basebank(cc->cns, cc->seq->bits, edge->soff + edge->beg, edge->end - edge->beg);
  656. } else if(Int(cc->cns->size) + edge->end > edge->beg){
  657. cc->cns->size = cc->cns->size + edge->end - edge->beg;
  658. normalize_basebank(cc->cns);
  659. } else {
  660. clear_basebank(cc->cns);
  661. }
  662. }
  663. cc->bases += cc->cns->size;
  664. if(cc->sq == NULL){
  665. if(!cns_debug && cc->print_progress){
  666. fprintf(stderr, "\r%u contigs %llu edges %llu bases\n", (u4i)cc->ctgs->size, cc->ridx, cc->bases); fflush(stderr);
  667. }
  668. cc->state = 0;
  669. } else {
  670. cc->state = 5;
  671. }
  672. thread_export(mcns, cc);
  673. return 1;
  674. } else if(cc->state == 5){
  675. cc->chridx = cc->sq->chridx;
  676. //cc->cidx ++;
  677. cc->bidx = MAX_U4;
  678. cc->infocns(cc->obj, cc->sq, bk);
  679. clear_string(cc->tag);
  680. if(bk->reftag){
  681. append_string(cc->tag, bk->reftag, strlen(bk->reftag));
  682. } else {
  683. append_string(cc->tag, "anonymous", 10);
  684. }
  685. clear_basebank(cc->seq);
  686. clear_u8v(cc->tasks);
  687. clear_edgecnsv(cc->rs);
  688. cc->state = 1;
  689. if(0){
  690. fflush(stdout);
  691. fprintf(stdout, ">%s len=%u\n", bk->reftag, bk->reflen);
  692. }
  693. }
  694. }
  695. thread_export(mcns, cc);
  696. return 0;
  697. }
  698. */
  699. typedef struct {
  700. u4i chridx;
  701. u4v *chrs;
  702. SeqBank *refs;
  703. BitVec *vsts;
  704. FileReader *fr;
  705. layseqr *seqs;
  706. u4v *heap;
  707. u8i rdidx;
  708. u4i cidx, bidx, sidx, lidx;
  709. u2i bsize, bstep; // block size, and slide steps
  710. int flags; // if flags & 0x1, only polish reference presented in SAM lines. 0x2: Don't filter secondary/supplementary alignments
  711. u4i bidx2;
  712. } SAMBlock;
  713. static inline SAMBlock* init_samblock(SeqBank *refs, FileReader *fr, u2i bsize, u2i bovlp, int flags){
  714. SAMBlock *sb;
  715. lay_seq_t *stop;
  716. u2i bstep;
  717. bstep = bsize - bovlp;
  718. assert(bstep <= bsize && 2 * bstep >= bsize);
  719. sb = malloc(sizeof(SAMBlock));
  720. sb->chridx = 0;
  721. sb->chrs = init_u4v(32);
  722. push_u4v(sb->chrs, MAX_U4);
  723. sb->refs = refs;
  724. sb->vsts = init_bitvec(sb->refs->nseq);
  725. sb->fr = fr;
  726. sb->seqs = init_layseqr(1024);
  727. sb->heap = init_u4v(1024);
  728. sb->rdidx = 0;
  729. sb->cidx = 1;
  730. sb->bidx = 0;
  731. sb->bidx2 = MAX_U4;
  732. sb->bsize = bsize;
  733. sb->bstep = bstep;
  734. sb->sidx = MAX_U4; // last output lay_seq
  735. sb->lidx = MAX_U4; // last input lay_seq
  736. sb->flags = flags;
  737. stop = pop_layseqr(sb->seqs);
  738. stop->chridx = refs->nseq + 1;
  739. stop->bidx = 0;
  740. return sb;
  741. }
  742. static inline void free_samblock(SAMBlock *sb){
  743. free_u4v(sb->chrs);
  744. free_bitvec(sb->vsts);
  745. free_layseqr(sb->seqs);
  746. free_u4v(sb->heap);
  747. free(sb);
  748. }
  749. static inline lay_seq_t* _push_padding_ref_samblock(SAMBlock *sb, lay_seq_t *sq){
  750. lay_seq_t *st;
  751. u8i off;
  752. u4i sqidx, len, i;
  753. if(sb->cidx > sb->refs->nseq) return sq;
  754. sqidx = offset_layseqr(sb->seqs, sq);
  755. if((sb->flags & 0x1) && sb->bidx2 == MAX_U4){
  756. sb->bidx = sq->bidx;
  757. }
  758. sb->bidx2 = sb->bidx;
  759. while(sb->cidx < sq->chridx || (sb->cidx == sq->chridx && sb->bidx <= sq->bidx)){
  760. st = pop_layseqr(sb->seqs);
  761. st->chridx = sb->cidx;
  762. st->bidx = sb->bidx;
  763. st->rdidx = 0;
  764. st->rddir = 0;
  765. st->rdoff = st->bidx * sb->bstep;
  766. st->rbeg = st->rdoff;
  767. if(sb->chrs->size <= sb->cidx){
  768. for(i=0;i<sb->refs->nseq;i++){
  769. if(get_bitvec(sb->vsts, i) == 0) break;
  770. }
  771. if(i == sb->refs->nseq) break;
  772. push_u4v(sb->chrs, i);
  773. one_bitvec(sb->vsts, i);
  774. }
  775. off = sb->refs->rdoffs->buffer[sb->chrs->buffer[sb->cidx]];
  776. len = sb->refs->rdlens->buffer[sb->chrs->buffer[sb->cidx]];
  777. st->rend = num_min(st->rbeg + sb->bsize, len);
  778. clear_basebank(st->seq);
  779. fast_fwdbits2basebank(st->seq, sb->refs->rdseqs->bits, off + st->rdoff, st->rend - st->rbeg);
  780. array_heap_push(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, offset_layseqr(sb->seqs, st),
  781. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  782. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  783. if(st->rend >= len){
  784. sb->cidx ++;
  785. sb->bidx = 0;
  786. sb->bidx2 = sb->bidx;
  787. if(sb->cidx >= sb->refs->nseq) break;
  788. } else {
  789. sb->bidx ++;
  790. }
  791. sq = ref_layseqr(sb->seqs, sqidx);
  792. }
  793. sq = ref_layseqr(sb->seqs, sqidx);
  794. return sq;
  795. }
  796. static inline lay_seq_t* iter_samblock(void *obj){
  797. SAMBlock *sb;
  798. lay_seq_t *sc, *sl;
  799. u4i chr, chridx, scidx, off, minlen, rddir, rdoff, nxt, val, len, op;
  800. char *ptr, *str;
  801. int c, samflag;
  802. sb = (SAMBlock*)obj;
  803. if(sb->sidx != MAX_U4){
  804. recyc_layseqr(sb->seqs, sb->sidx);
  805. sb->sidx = MAX_U4;
  806. }
  807. do {
  808. if(sb->heap->size == 0){
  809. sb->lidx = MAX_U4;
  810. }
  811. while(sb->lidx == MAX_U4){
  812. c = readtable_filereader(sb->fr);
  813. if(c == -1){
  814. if(sb->flags & 0x1){
  815. } else {
  816. sc = ref_layseqr(sb->seqs, 0);
  817. sc = _push_padding_ref_samblock(sb, sc);
  818. }
  819. break;
  820. }
  821. if(sb->fr->line->string[0] == '@') continue;
  822. if(c < 11) continue;
  823. samflag = atoi(get_col_str(sb->fr, 1));
  824. if(samflag & 0x004) continue;
  825. //if(get_col_str(sb->fr, 9)[0] == '*') continue;
  826. if(!(sb->flags & 0x2)){
  827. if(samflag & (0x100 | 0x800)) continue; // filter secondary/supplementary alignments
  828. }
  829. // chr
  830. if((chr = getval_cuhash(sb->refs->rdhash, get_col_str(sb->fr, 2))) == MAX_U4){
  831. continue;
  832. }
  833. if(chr != sb->chrs->buffer[sb->chridx]){
  834. chridx = ++ sb->chridx;
  835. push_u4v(sb->chrs, chr);
  836. one_bitvec(sb->vsts, chr);
  837. } else {
  838. chridx = sb->chridx;
  839. }
  840. off = atol(get_col_str(sb->fr, 3)) - 1;
  841. ptr = get_col_str(sb->fr, 5);
  842. if((*ptr) == '*') continue;
  843. sb->rdidx ++;
  844. rdoff = 0;
  845. minlen = num_min(get_col_len(sb->fr, 9), sb->bsize) / 2;
  846. rddir = (atol(get_col_str(sb->fr, 1)) & 0x10) >> 4;
  847. str = get_col_str(sb->fr, 9);
  848. {
  849. encap_layseqr(sb->seqs, 2);
  850. sc = pop_layseqr(sb->seqs);
  851. sc->chridx = chridx;
  852. sc->bidx = (off / sb->bstep);
  853. sc->rdidx = sb->rdidx;
  854. sc->rddir = rddir;
  855. sc->rdoff = rdoff;
  856. sc->rbeg = off;
  857. sc->rend = 0;
  858. clear_basebank(sc->seq);
  859. }
  860. sl = NULL;
  861. // parse SAM cigar
  862. {
  863. nxt = (off / sb->bstep) * sb->bstep;
  864. if(nxt && off - nxt < UInt(sb->bsize - sb->bstep)){
  865. if(sc->bidx){
  866. scidx = offset_layseqr(sb->seqs, sc);
  867. sl = pop_layseqr(sb->seqs);
  868. sc = ref_layseqr(sb->seqs, scidx);
  869. sl->chridx = chridx;
  870. sl->bidx = sc->bidx - 1;
  871. sl->rdidx = sb->rdidx;
  872. sl->rddir = rddir;
  873. sl->rdoff = rdoff;
  874. sl->rbeg = off;
  875. sl->rend = 0;
  876. clear_basebank(sl->seq);
  877. }
  878. nxt += sb->bsize - sb->bstep;
  879. } else {
  880. nxt += sb->bstep;
  881. }
  882. }
  883. val = 0;
  884. while(*ptr){
  885. op = MAX_U4;
  886. switch(*ptr){
  887. case '0' ... '9': val = val * 10 + (*ptr) - '0'; break;
  888. case 'X':
  889. case '=':
  890. case 'M': op = 0b111; break;
  891. case 'I': op = 0b110; break;
  892. case 'D': op = 0b101; break;
  893. case 'N': op = 0b001; break;
  894. case 'S': op = 0b010; break;
  895. case 'H':
  896. case 'P': op = 0b000; break;
  897. default:
  898. fprintf(stderr, " -- %s\n", get_col_str(sb->fr, 5));
  899. fprintf(stderr, " -- Bad cigar %d '%c' in %s -- %s:%d --\n", (int)(ptr - get_col_str(sb->fr, 5)), *ptr, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  900. abort();
  901. }
  902. ptr ++;
  903. if(op == MAX_U4) continue;
  904. if(val == 0) val = 1;
  905. while(val){
  906. if(op & 0b001){
  907. len = num_min(val, nxt - off);
  908. off += len;
  909. } else {
  910. len = val;
  911. }
  912. val -= len;
  913. if(op & 0b010){
  914. rdoff += len;
  915. if(op & 0b100){
  916. if(sc){
  917. if(sc->seq->size == 0){
  918. sc->rbeg = (op & 0b001)? off - len : off;
  919. }
  920. seq2basebank(sc->seq, str, len);
  921. sc->rend = off;
  922. }
  923. if(sl){
  924. if(sl->seq->size == 0){
  925. sl->rbeg = (op & 0b001)? off - len : off;
  926. }
  927. seq2basebank(sl->seq, str, len);
  928. sl->rend = off;
  929. }
  930. }
  931. str += len;
  932. }
  933. if(off == nxt){
  934. if(sl){
  935. if(sl->rend >= sl->rbeg + minlen){
  936. scidx = offset_layseqr(sb->seqs, sc);
  937. sl = _push_padding_ref_samblock(sb, sl);
  938. sc = ref_layseqr(sb->seqs, scidx);
  939. if(sb->lidx == MAX_U4){
  940. sb->lidx = offset_layseqr(sb->seqs, sl);
  941. }
  942. array_heap_push(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, offset_layseqr(sb->seqs, sl),
  943. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  944. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  945. } else {
  946. push_layseqr(sb->seqs, sl);
  947. }
  948. sl = NULL;
  949. nxt += 2 * sb->bstep - sb->bsize;
  950. } else {
  951. scidx = offset_layseqr(sb->seqs, sc);
  952. encap_layseqr(sb->seqs, 1);
  953. sl = ref_layseqr(sb->seqs, scidx);
  954. sc = pop_layseqr(sb->seqs);
  955. sc->chridx = chridx;
  956. sc->bidx = sl->bidx + 1;
  957. sc->rdidx = sb->rdidx;
  958. sc->rddir = rddir;
  959. sc->rdoff = rdoff;
  960. sc->rbeg = off;
  961. sc->rend = 0;
  962. clear_basebank(sc->seq);
  963. nxt += sb->bsize - sb->bstep;
  964. }
  965. }
  966. }
  967. }
  968. if(sl){
  969. if(sl->rend >= sl->rbeg + minlen){
  970. u4i scidx;
  971. scidx = sc? offset_layseqr(sb->seqs, sc) : MAX_U4;
  972. sl = _push_padding_ref_samblock(sb, sl);
  973. sc = scidx == MAX_U4? NULL : ref_layseqr(sb->seqs, scidx);
  974. if(sb->lidx == MAX_U4){
  975. sb->lidx = offset_layseqr(sb->seqs, sl);
  976. }
  977. array_heap_push(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, offset_layseqr(sb->seqs, sl),
  978. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  979. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  980. } else {
  981. push_layseqr(sb->seqs, sl);
  982. }
  983. }
  984. if(sc){
  985. if(sc->rend >= sc->rbeg + minlen){
  986. scidx = sl? offset_layseqr(sb->seqs, sl) : MAX_U4;
  987. sc = _push_padding_ref_samblock(sb, sc);
  988. sl = scidx == MAX_U4? NULL : ref_layseqr(sb->seqs, scidx);
  989. if(sb->lidx == MAX_U4){
  990. sb->lidx = offset_layseqr(sb->seqs, sc);
  991. }
  992. array_heap_push(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, offset_layseqr(sb->seqs, sc),
  993. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  994. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  995. } else {
  996. push_layseqr(sb->seqs, sc);
  997. }
  998. }
  999. }
  1000. if(sb->heap->size == 0) break;
  1001. if(sb->lidx != MAX_U4){
  1002. sl = ref_layseqr(sb->seqs, sb->lidx);
  1003. sb->sidx = sb->heap->buffer[0];
  1004. sc = ref_layseqr(sb->seqs, sb->sidx);
  1005. if(sc->chridx == sl->chridx && sc->bidx + 1 >= sl->bidx){
  1006. sb->lidx = MAX_U4;
  1007. continue;
  1008. }
  1009. array_heap_remove(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, 0,
  1010. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  1011. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  1012. sc->rbeg -= sc->bidx * sb->bstep;
  1013. sc->rend -= sc->bidx * sb->bstep;
  1014. } else {
  1015. sb->sidx = sb->heap->buffer[0];
  1016. array_heap_remove(sb->heap->buffer, sb->heap->size, sb->heap->cap, u4i, 0,
  1017. num_cmpxx(ref_layseqr(sb->seqs, a)->chridx, ref_layseqr(sb->seqs, b)->chridx, ref_layseqr(sb->seqs, a)->bidx,
  1018. ref_layseqr(sb->seqs, b)->bidx, ref_layseqr(sb->seqs, a)->rdidx, ref_layseqr(sb->seqs, b)->rdidx));
  1019. sc = ref_layseqr(sb->seqs, sb->sidx);
  1020. sc->rbeg -= sc->bidx * sb->bstep;
  1021. sc->rend -= sc->bidx * sb->bstep;
  1022. }
  1023. return sc;
  1024. } while(1);
  1025. sb->sidx = MAX_U4;
  1026. return NULL;
  1027. }
  1028. static inline void info_samblock(void *obj, lay_seq_t *sq, lay_blk_t *bk){
  1029. SAMBlock *sb;
  1030. sb = (SAMBlock*)obj;
  1031. if(sq == NULL) return;
  1032. bk->node1 = sq->bidx;
  1033. bk->node2 = sq->bidx + 1;
  1034. bk->reftag = sb->refs->rdtags->buffer[sb->chrs->buffer[sq->chridx]];
  1035. bk->reflen = sb->refs->rdlens->buffer[sb->chrs->buffer[sq->chridx]];
  1036. bk->refoff = sq->bidx * sb->bstep;
  1037. }
  1038. typedef struct {
  1039. String *tag;
  1040. FileReader *fr;
  1041. u4i chridx, bidx;
  1042. u8i rdidx;
  1043. lay_seq_t *key;
  1044. lay_blk_t *blk;
  1045. } WTLAYBlock;
  1046. static inline WTLAYBlock* init_wtlayblock(FileReader *fr){
  1047. WTLAYBlock *wb;
  1048. wb = malloc(sizeof(WTLAYBlock));
  1049. wb->tag = init_string(32);
  1050. append_string(wb->tag, "annonymous", 10);
  1051. wb->fr = fr;
  1052. wb->chridx = 0;
  1053. wb->bidx = 0;
  1054. wb->rdidx = 0;
  1055. wb->key = calloc(1, sizeof(lay_seq_t));
  1056. wb->key->seq = init_basebank();
  1057. wb->blk = calloc(1, sizeof(lay_blk_t));
  1058. return wb;
  1059. }
  1060. static inline void free_wtlayblock(WTLAYBlock *wb){
  1061. free_string(wb->tag);
  1062. free_basebank(wb->key->seq);
  1063. free(wb->key);
  1064. free(wb->blk);
  1065. free(wb);
  1066. }
  1067. static inline lay_seq_t* iter_wtlayblock(void *obj){
  1068. WTLAYBlock *wb;
  1069. char *ss;
  1070. int c, sl;
  1071. wb = (WTLAYBlock*)obj;
  1072. clear_basebank(wb->key->seq);
  1073. while((c = readtable_filereader(wb->fr)) != -1){
  1074. switch(wb->fr->line->string[0]){
  1075. case '>':
  1076. wb->chridx ++;
  1077. wb->bidx = 0;
  1078. clear_string(wb->tag);
  1079. ss = get_col_str(wb->fr, 0) + 1;
  1080. for(sl=0;ss[sl] && ss[sl] != ' ' && ss[sl] != '\t';sl++){
  1081. }
  1082. append_string(wb->tag, ss, sl);
  1083. wb->blk->node1 = 0;
  1084. wb->blk->node2 = 0;
  1085. ss = strstr(ss + sl, "len=");
  1086. if(ss){
  1087. wb->blk->reflen = atol(ss);
  1088. } else {
  1089. wb->blk->reflen = 0;
  1090. }
  1091. wb->blk->reftag = wb->tag->string;
  1092. break;
  1093. case 'E':
  1094. wb->bidx ++;
  1095. wb->blk->refoff = atoll(get_col_str(wb->fr, 1));
  1096. wb->blk->node1 = atoll(get_col_str(wb->fr, 2) + 1);
  1097. wb->blk->node2 = atoll(get_col_str(wb->fr, 4) + 1);
  1098. wb->blk->reftag = wb->tag->string;
  1099. break;
  1100. case 'S':
  1101. case 's':
  1102. ss = get_col_str(wb->fr, 5);
  1103. sl = get_col_len(wb->fr, 5);
  1104. wb->key->chridx = wb->chridx;
  1105. wb->key->bidx = wb->bidx;
  1106. wb->key->rdidx = wb->rdidx ++;
  1107. wb->key->rddir = (get_col_str(wb->fr, 2)[0] == '-');
  1108. wb->key->rdoff = atoi(get_col_str(wb->fr, 3));
  1109. if(c >= 8){
  1110. wb->key->rbeg = atoi(get_col_str(wb->fr, 6));
  1111. wb->key->rend = atoi(get_col_str(wb->fr, 7));
  1112. } else {
  1113. wb->key->rbeg = 0;
  1114. wb->key->rend = 0;
  1115. }
  1116. seq2basebank(wb->key->seq, ss, sl);
  1117. return wb->key;
  1118. }
  1119. }
  1120. return NULL;
  1121. }
  1122. static inline void info_wtlayblock(void *obj, lay_seq_t *sq, lay_blk_t *bk){
  1123. if(sq == NULL) return;
  1124. memcpy(bk, ((WTLAYBlock*)obj)->blk, sizeof(lay_blk_t));
  1125. }
  1126. #endif