123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899 |
- #include "dna.h"
- #include "chararray.h"
- #include "list.h"
- #include "queue.h"
- #include "hashset.h"
- #include "general_graph.h"
- #include <math.h>
- #include <float.h>
- #ifndef PACBIO_PROBS_DAGCNS_RJ_H
- #define PACBIO_PROBS_DAGCNS_RJ_H
- #define DAGCNS_MAX_LEN 0x3FFF
- static int dagcns_debug = 0;
- typedef struct {
- int x, y;
- u4i dnidx;
- f8i probs[2];
- } bdp_node_t;
- define_list(bdpnodev, bdp_node_t);
- typedef struct {
- f8i prob;
- u1i cigar, base;
- } bdp_edge_t;
- define_list(bdpedgev, bdp_edge_t);
- define_simple_geg_callback(bdp, bdpnodev, bdp_node_t, bdpedgev, bdp_edge_t);
- typedef struct {
- u4i gnidx, dnidx;
- } bdp_link_t;
- define_list(bdplinkv, bdp_link_t);
- typedef struct {
- uint32_t nodes[2];
- uint32_t links[2];
- uint32_t cov:28, visit:1, closed:1, cns:1, alt:1;
- double score;
- } dagedge_t;
- define_list(dagedgev, dagedge_t);
- #define NODE_MAX_FW_EDGE 0xFFFFFFFFU
- typedef struct dagnode_t {
- uint32_t pos:28, base:2, cns:1, visit:1;
- uint32_t fw_edge;
- uint32_t edges[2];
- f8i aux;
- } dagnode_t;
- define_list(dagnodev, dagnode_t);
- typedef struct {
- uint32_t pos;
- uint32_t bases[4];
- } dagsnp_t;
- define_list(dagsnpv, dagsnp_t);
- typedef struct {
- u8list *cns;
- u32list *deps;
- dagnodev *nodes;
- dagedgev *edges;
- u32list *trash;
- String *alns[2];
- int W, M, X, I, D, E;
- f4i pM, pX, pI, pD;
- double ref_penalty, alt_penalty;
- double cns_score;
- uint32_t cns_head, backbone_size;
- } DAGCNS;
- static inline DAGCNS* init_dagcns(int W, int M, int X, int I, int D, int E, f4i pM, f4i pX, f4i pI, f4i pD){
- DAGCNS *g;
- g = malloc(sizeof(DAGCNS));
- g->cns = init_u8list(1024);
- g->deps = init_u32list(1024);
- g->nodes = init_dagnodev(1024);
- g->edges = init_dagedgev(1024);
- g->trash = init_u32list(1024);
- g->alns[0] = init_string(1024);
- g->alns[1] = init_string(1024);
- g->cns_score = 0;
- g->cns_head = 0xFFFFFFFFU;
- g->backbone_size = 0;
- g->W = W;
- g->M = M;
- g->X = X;
- g->I = I;
- g->D = D;
- g->E = E;
- g->pM = pM;
- g->pX = pX;
- g->pI = pI;
- g->pD = pD;
- g->ref_penalty = 0.5;
- g->alt_penalty = 0.2;
- return g;
- }
- static inline void free_dagcns(DAGCNS *g){
- free_dagnodev(g->nodes);
- free_dagedgev(g->edges);
- free_u32list(g->trash);
- free_u8list(g->cns);
- free_u32list(g->deps);
- free_string(g->alns[0]);
- free_string(g->alns[1]);
- free(g);
- }
- static inline void reset_dagcns(DAGCNS *g){
- clear_dagnodev(g->nodes);
- clear_dagedgev(g->edges);
- clear_u32list(g->trash);
- clear_u8list(g->cns);
- clear_u32list(g->deps);
- g->cns_score = 0;
- g->cns_head = 0xFFFFFFFFU;
- g->backbone_size = 0;
- }
- static uint32_t prepare_node_dagcns(DAGCNS *g, uint32_t pos, uint8_t base){
- dagnode_t *n;
- n = next_ref_dagnodev(g->nodes);
- n->pos = pos;
- n->base = base;
- n->cns = 0;
- n->aux = 0;
- n->visit = 0;
- n->edges[0] = 0xFFFFFFFFU;
- n->edges[1] = 0xFFFFFFFFU;
- n->fw_edge = NODE_MAX_FW_EDGE;
- return g->nodes->size - 1;
- }
- static inline dagedge_t* find_edge_by_node_dagcns(DAGCNS *g, uint32_t nid1, uint32_t nid2, int dir){
- dagnode_t *n;
- dagedge_t *e;
- n = ref_dagnodev(g->nodes, nid1);
- if(n->edges[dir] != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, n->edges[dir]);
- while(1){
- if(e->nodes[dir] == nid2) return e;
- if(e->links[dir] == 0xFFFFFFFFU) break;
- e = ref_dagedgev(g->edges, e->links[dir]);
- }
- }
- return NULL;
- }
- static inline dagedge_t* add_edge_dagcns(DAGCNS *g, uint32_t nid1, uint32_t nid2, int dir){
- dagnode_t *n;
- dagedge_t *e;
- uint32_t eid;
- n = ref_dagnodev(g->nodes, nid1);
- if(pop_u32list(g->trash, &eid)){
- e = ref_dagedgev(g->edges, eid);
- } else {
- eid = g->edges->size;
- e = next_ref_dagedgev(g->edges);
- }
- e->nodes[!dir] = nid1;
- e->nodes[dir] = nid2;
- e->cov = 1;
- e->score = 0;
- e->visit = 0;
- e->closed = 0;
- e->cns = 0;
- e->alt = 0;
- e->links[dir] = n->edges[dir];
- n->edges[dir] = eid;
- n = ref_dagnodev(g->nodes, nid2);
- e->links[!dir] = n->edges[!dir];
- n->edges[!dir] = eid;
- return e;
- }
- static inline dagedge_t* prepare_edge_dagcns(DAGCNS *g, uint32_t nid1, uint32_t nid2, int dir){
- dagedge_t *e;
- e = find_edge_by_node_dagcns(g, nid1, nid2, dir);
- if(e){ e->cov ++; return e; }
- return add_edge_dagcns(g, nid1, nid2, dir);
- }
- static inline void gen_pregraph_dagcns(DAGCNS *g){
- dagedge_t *e;
- uint32_t i;
- clear_dagnodev(g->nodes);
- clear_dagedgev(g->edges);
- clear_u32list(g->trash);
- clear_u32list(g->deps);
- g->backbone_size = g->cns->size;
- for(i=0;i<g->cns->size;i++){
- push_u32list(g->deps, 0);
- prepare_node_dagcns(g, i, g->cns->buffer[i]);
- if(i){
- e = add_edge_dagcns(g, i - 1, i, 0);
- e->cov = 0;
- }
- }
- }
- static inline int remove_edge_dagcns(DAGCNS *g, uint32_t eid){
- dagnode_t *n;
- dagedge_t *e;
- uint32_t i, lst;
- for(i=0;i<2;i++){
- e = ref_dagedgev(g->edges, eid);
- lst = e->links[i];
- n = ref_dagnodev(g->nodes, e->nodes[!i]);
- if(n->edges[i] == eid){
- n->edges[i] = lst;
- } else if(n->edges[i] == 0xFFFFFFFFU){
-
- return 0;
- } else {
- e = ref_dagedgev(g->edges, n->edges[i]);
- while(1){
- if(e->links[i] == eid){
- e->links[i] = lst; break;
- } else if(e->links[i] == 0xFFFFFFFFU){
-
- return 0;
- } else {
- e = ref_dagedgev(g->edges, e->links[i]);
- }
- }
- }
- }
- push_u32list(g->trash, eid);
- return 1;
- }
- #define MIN_SCORE -0x0FFFFFFF
- static inline f8i log_sum(f8i a, f8i b){
- f8i c;
- c = num_max(a, b);
- if(c - a >= 10 || c - b >= 10) return c;
- return logl(expl(a - c) + expl(b - c)) + c;
- }
- typedef struct {
- int x, y, d;
- } bdp_beg_t;
- define_list(bdpbegv, bdp_beg_t);
- static inline void fprint_dot_bdpgraph(GEGraph *g, bdpnodev *bnodes, bdpedgev *bedges, char *prefix, char *suffix){
- static const char *colors[2][2] = {{"blue", "green"}, {"red", "gray"}};
- FILE *out;
- ge_node_t *n;
- bdp_node_t *bn;
- ge_edge_t *e;
- bdp_edge_t *be;
- u8i i;
- out = open_file_for_write(prefix, suffix, 1);
- fprintf(out, "digraph {\nnode [shape=record]\nrankdir=LR\n");
- for(i=0;i<g->nodes->size;i++){
- n = ref_genodev(g->nodes, i);
- if(n->closed) continue;
- bn = ref_bdpnodev(bnodes, offset_genodev(g->nodes, n));
- fprintf(out, " N%llu [label=\"{N%llu|%d|%d}|{%.4Lf|%.4Lf}\"]\n", i, i, bn->x, bn->y, bn->probs[0], bn->probs[1]);
- }
- for(i=1;i<g->edges->size;i++){
- e = ref_geedgev(g->edges, i);
- if(e->closed) continue;
- be = ref_bdpedgev(bedges, offset_geedgev(g->edges, e));
- fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%c:%.4Lf\" color=%s]\n", (u8i)e->node1, (u8i)e->node2, "+-"[e->dir1], "+-"[e->dir2], e->cov, "MIDX"[be->cigar], be->prob, colors[e->dir1][e->dir2]);
- fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%c:%.4Lf\" color=%s]\n", (u8i)e->node2, (u8i)e->node1, "-+"[e->dir2], "-+"[e->dir1], e->cov, "MIDX"[be->cigar], be->prob, colors[!e->dir2][!e->dir1]);
- }
- fprintf(out, "}\n");
- fclose(out);
- }
- static inline u4i dp_matrix2alignment_graph(DAGCNS *dag, u1i *query, u1i *z, int x, int y, GEGraph *g, bdpnodev *bnodes, bdpedgev *bedges){
- ge_node_t *gn, *gn2;
- ge_edge_ref_t *gf;
- ge_edge_t *ge;
- bdp_node_t *bn, *bn2;
- bdp_edge_t *be;
- bdp_beg_t BEG;
- UUhash *bnhash;
- UUhash_t *u;
- bdpbegv *stack;
- f8i cigar_probs[4], curator;
- u8v *idxs;
- u1i *target;
- u4i i, k, nidx, nlst, nbegs[2], visit, ret;
- int d, f, n_col, ops[2], base, exists;
- cigar_probs[0] = dag->pM;
- cigar_probs[1] = dag->pI;
- cigar_probs[2] = dag->pD;
- cigar_probs[3] = dag->pX;
- n_col = dag->cns->size;
- target = dag->cns->buffer;
- bdp_set_callbacks_gegraph(g, bnodes, bedges);
- reset_gegraph(g);
- stack = init_bdpbegv(4);
- push_bdpbegv(stack, (bdp_beg_t){x, y, 0});
- idxs = init_u8v(4);
- nbegs[0] = nbegs[1] = 0;
- bnhash = init_UUhash(1023);
- ret = 0;
- while(stack->size){
- BEG = stack->buffer[--stack->size];
- u = prepare_UUhash(bnhash, (((b8i)BEG.x) << 32) | BEG.y, &exists);
- if(exists){
- nidx = u->val;
- } else {
- gn = add_node_gegraph(g);
- nidx = offset_genodev(g->nodes, gn);
- u->key = (((b8i)BEG.x) << 32) | BEG.y;
- u->val = nidx;
- bn = ref_bdpnodev(bnodes, nidx);
- bn->x = BEG.x; bn->y = BEG.y;
- bn->dnidx = MAX_VALUE_U4;
- }
- nlst = nidx;
- x = BEG.x; y = BEG.y;
- d = BEG.d;
- f = 0;
- clear_u8v(idxs);
- while(x >= 0 && y >= 0){
- d = (z[x * n_col + y] >> (d << 1)) & 0x03;
- if(d == 0){
- if(query[x] == target[y]){
- if(z[x * n_col + y] & (1 << 6)){
- f = 1;
- break;
- } else if(BEG.d == 0){
- z[x * n_col + y] |= 1 << 6;
- push_bdpbegv(stack, (bdp_beg_t){x, y, 1});
- push_bdpbegv(stack, (bdp_beg_t){x, y, 2});
- }
- ops[0] = 0; ops[1] = 3;
- } else {
- ops[0] = 1; ops[1] = 2;
- }
-
- } else if(d == 1){
- ops[0] = 1; ops[1] = 3;
-
- } else {
- ops[0] = 2; ops[1] = 3;
-
- }
- for(i=0;i<2;i++){
- if(ops[i] == 3) break;
- base = query[x];
- if(ops[i] == 0){ x --; y --; }
- else if(ops[i] == 1){ x --; }
- else { y --; }
- u = prepare_UUhash(bnhash, (((b8i)x) << 32) | y, &exists);
- if(exists){
- nidx = u->val;
- } else {
- gn = add_node_gegraph(g);
- nidx = offset_genodev(g->nodes, gn);
- u->key = (((b8i)x) << 32) | y;
- u->val = nidx;
- bn = ref_bdpnodev(bnodes, nidx);
- bn->x = x; bn->y = y;
- bn->dnidx = MAX_VALUE_U4;
- }
- ge = prepare_edge_gegraph(g, nlst, 1, nidx, 1, &exists);
- if(exists){
- ge->cov ++;
- } else {
- ge->cov = 1;
- be = ref_bdpedgev(bedges, offset_geedgev(g->edges, ge));
- be->cigar = ops[i];
- be->base = base;
- }
- nlst = nidx;
- if(BEG.d) push_u8v(idxs, offset_geedgev(g->edges, ge));
- }
- }
- if(BEG.d){
- if(f == 0){
-
- for(i=0;i<idxs->size;i++){
- ge = ref_geedgev(g->edges, idxs->buffer[i]);
- ge->cov --;
- if(ge->cov == 0) cut_edge_gegraph(g, ge);
- }
- } else ret ++;
- } else {
- ret ++;
- nbegs[0] = g->nodes->size > 1? g->nodes->size - 2 : 0;
- nbegs[1] = 0;
- }
- }
- free_bdpbegv(stack);
- free_UUhash(bnhash);
- u4i cnt = 0;
- for(i=0;i<g->nodes->size;i++){
- gn = ref_genodev(g->nodes, i);
- if(gn->edges[0].cnt || gn->edges[1].cnt){ cnt++; continue; }
- gn->closed = 1;
- }
-
- if(nbegs[0] == nbegs[1]){
- free_u8v(idxs);
- return ret;
- }
- for(k=0;k<2;k++){
- bn = ref_bdpnodev(bnodes, nbegs[k]);
- bn->probs[k] = 0;
- clear_u8v(idxs);
- push_u8v(idxs, nbegs[k]);
- visit = k + 1;
- while(idxs->size){
- gn = ref_genodev(g->nodes, idxs->buffer[idxs->size - 1]);
- bn = ref_bdpnodev(bnodes, idxs->buffer[idxs->size - 1]);
- idxs->size --;
- geg_beg_iter_edges(g, gn, k, gf, ge);
- if(ge->closed) continue;
- be = ref_bdpedgev(bedges, offset_geedgev(g->edges, ge));
- gn2 = ref_genodev(g->nodes, gf->flg? ge->node1 : ge->node2);
- bn2 = ref_bdpnodev(bnodes, gf->flg? ge->node1 : ge->node2);
- if(gn2->bt_visit == visit){
- bn2->probs[k] = log_sum(bn2->probs[k], bn->probs[k] + cigar_probs[be->cigar]);
- } else {
- gn2->bt_visit = visit;
- gn2->unvisit = gn2->edges[!k].cnt;
- bn2->probs[k] = bn->probs[k] + cigar_probs[be->cigar];
- }
- if(gn2->unvisit) gn2->unvisit --;
- if(gn2->unvisit == 0){
- push_u8v(idxs, offset_genodev(g->nodes, gn2));
- }
- geg_end_iter_edges();
- }
- }
- free_u8v(idxs);
-
- curator = -FLT_MAX;
- for(i=1;i<g->edges->size;i++){
- ge = ref_geedgev(g->edges, i);
- if(ge->closed) continue;
- be = ref_bdpedgev(bedges, i);
- be->prob = cigar_probs[be->cigar] + ref_bdpnodev(bnodes, ge->node1)->probs[ge->dir1] + ref_bdpnodev(bnodes, ge->node2)->probs[!ge->dir2];
- if(be->prob > curator) curator = be->prob;
- }
- for(i=1;i<g->edges->size;i++){
- ge = ref_geedgev(g->edges, i);
- if(ge->closed) continue;
- be = ref_bdpedgev(bedges, i);
- be->prob = expl(be->prob - curator);
- }
-
- return nbegs[0];
- }
- static inline u4i branched_dynamic_programming_alignment(DAGCNS *g, u1i *query, int ql, GEGraph *geg, bdpnodev *bnodes, bdpedgev *bedges, u1v *mem_buffer){
- u4i nbeg;
- int *rh, *re;
- u1i *z, *zi, d, *target;
- int tl, n_col;
- int i, j, jc, jb, je, h1, h, m, e, f, t;
- int mi, mj, max;
- int W, M, X, I, D, E;
- tl = g->cns->size;
- target = g->cns->buffer;
- if(ql <= 0 || tl <= 0){ return 0; }
- n_col = tl;
- encap_u1v(mem_buffer, kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x(((long long)ql) * n_col));
- rh = (int*)(mem_buffer->buffer + mem_buffer->size);
- re = (int*)(mem_buffer->buffer + mem_buffer->size + kswx_roundup8x((tl + 2) * sizeof(int)));
- z = (mem_buffer->buffer + mem_buffer->size + kswx_roundup8x((tl + 2) * sizeof(int)) + kswx_roundup8x((tl + 2) * sizeof(int)));
- W = g->W;
- M = g->M;
- X = g->X;
- I = g->I;
- D = g->D;
- E = g->E;
-
- rh[0] = 0;
- re[1] = 0 + D + E;
- for(j=2;j<=tl&&j<=W;j++) rh[j] = rh[j-1] + E;
- for(;j<tl;j++) rh[j] = MIN_SCORE;
- for(j=0;j<=tl;j++) re[j] = MIN_SCORE;
- mi = mj = 0;
- max = MIN_SCORE;
- for(i=0;i<ql;i++){
- f = MIN_SCORE;
- jc = (i * tl / ql);
- jb = jc > W? jc - W : 0;
- je = jc + W + 1 < tl? jc + W + 1 : tl;
- h1 = jb == 0? (I + E * (i + 1)) : MIN_SCORE;
- zi = &z[i * n_col];
- for(j=jb;j<je;j++){
- m = rh[j] + ((query[i] == target[j])? M : X);
- rh[j] = h1;
- e = re[j];
- d = m >= e? 0 : 1;
- h = m >= e? m : e;
- d = h >= f? d : 2;
- h = h >= f? h : f;
- h1 = h;
- t = m + I + E;
- e = e + E;
- d |= e > t? 1<<2 : 0;
- e = e > t? e : t;
- re[j] = e;
- t = m + D + E;
- f = f + E;
- d |= f > t? 2<<4 : 0;
- f = f > t? f : t;
- zi[j] = d;
- }
- rh[j] = h1; re[j] = MIN_SCORE;
- if(i + 1 == ql){
- for(j=jb;j<je;j++){
- if(rh[j + 1] > max){
- max = rh[j + 1];
- mi = i; mj = j;
- }
- }
- } else if(je == tl){
- if(h1 > max){
- max = h1;
- mi = i; mj = tl - 1;
- }
- }
- }
- if(max == MIN_SCORE) return 0;
- nbeg = dp_matrix2alignment_graph(g, query, z, mi, mj, geg, bnodes, bedges);
- return nbeg;
- }
- static inline void bdpgraph2dagcns(DAGCNS *dg, GEGraph *gg, bdpnodev *bnodes, bdpedgev *bedges, u4i nbeg, bdplinkv *stack){
- dagnode_t *dn, *dn2;
- dagedge_t *de;
- ge_node_t *gn, *gn2;
- ge_edge_ref_t *gf;
- ge_edge_t *ge;
- bdp_node_t *bn, *bn2;
- bdp_edge_t *be;
- bdp_link_t T;
- u4i i, j, beg, end;
- int open;
- for(i=0;i<gg->nodes->size;i++) gg->nodes->buffer[i].bt_visit = 0;
- clear_bdplinkv(stack);
- push_bdplinkv(stack, (bdp_link_t){nbeg, 0xFFFFFFFFU});
- beg = bnodes->buffer[nbeg].y;
- end = beg;
- open = 0;
- while(stack->size){
- T = stack->buffer[--stack->size];
- gn = ref_genodev(gg->nodes, T.gnidx);
- bn = ref_bdpnodev(bnodes, T.gnidx);
- if(bn->y > (int)end) end = bn->y;
- dn = (T.dnidx == 0xFFFFFFFFU)? NULL : ref_dagnodev(dg->nodes, T.dnidx);
- geg_beg_iter_edges(gg, gn, 0, gf, ge);
- if(ge->closed) continue;
- be = ref_bdpedgev(bedges, offset_geedgev(gg->edges, ge));
- gn2 = ref_genodev(gg->nodes, gf->flg? ge->node1 : ge->node2);
- bn2 = ref_bdpnodev(bnodes, gf->flg? ge->node1 : ge->node2);
- if(gn2->bt_visit == 0){
- gn2->bt_visit = 1;
- open ++;
- gn2->unvisit = gn2->edges[1].cnt;
- }
- if(gn2->unvisit) gn2->unvisit --;
- if(gn2->unvisit == 0){
- open --;
- }
- if(open < 0){
- fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
- }
- if(bn2->dnidx == MAX_VALUE_U4){
- if(be->cigar == 2){
- dn2 = dn;
- } else if(dn && (open || be->cigar == 1)){
- dn2 = ref_dagnodev(dg->nodes, prepare_node_dagcns(dg, bn2->y, be->base));
- dn = dn? ref_dagnodev(dg->nodes, T.dnidx) : NULL;
- } else if((dn || open == 0) && be->cigar == 0){
- dn2 = ref_dagnodev(dg->nodes, bn2->y);
- } else dn2 = NULL;
- bn2->dnidx = dn2? offset_dagnodev(dg->nodes, dn2) : MAX_VALUE_U4;
- } else {
- dn2 = ref_dagnodev(dg->nodes, bn2->dnidx);
- }
- if(dn && dn2 && dn != dn2){
- de = prepare_edge_dagcns(dg, offset_dagnodev(dg->nodes, dn), offset_dagnodev(dg->nodes, dn2), 0);
- de->score += be->prob;
- }
- if(gn2->unvisit == 0){
- push_bdplinkv(stack, (bdp_link_t){offset_genodev(gg->nodes, gn2), dn2? offset_dagnodev(dg->nodes, dn2) : 0xFFFFFFFFU});
- }
- geg_end_iter_edges();
- }
- for(j=beg;j<end;j++) dg->deps->buffer[j] ++;
- return;
- }
- static inline void merge_nodes_core_dagcns(DAGCNS *g, uint32_t nid, u32list *stack, u32list *cache[4], int dir){
- dagnode_t *n0, *n2, *n;
- dagedge_t *e, *e2, *e1;
- uint32_t base, eid, nid1, i, ret;
- clear_u32list(stack);
- push_u32list(stack, nid);
- ret = 0;
- while(pop_u32list(stack, &nid)){
- n0 = ref_dagnodev(g->nodes, nid);
- if((eid = n0->edges[dir]) == 0xFFFFFFFFU) continue;
- clear_u32list(cache[0]);
- clear_u32list(cache[1]);
- clear_u32list(cache[2]);
- clear_u32list(cache[3]);
- while(1){
- e = ref_dagedgev(g->edges, eid);
- n = ref_dagnodev(g->nodes, e->nodes[dir]);
- e2 = ref_dagedgev(g->edges, n->edges[!dir]);
- if(e2->links[!dir] == 0xFFFFFFFFU){
- push_u32list(cache[n->base], eid);
- }
- if((eid = e->links[dir]) == 0xFFFFFFFFU) break;
- }
- for(base=0;base<4;base++){
- if(cache[base]->size < 2) continue;
- for(i=0;i<cache[base]->size;i++) ref_dagedgev(g->edges, cache[base]->buffer[i])->visit = 1;
- e1 = ref_dagedgev(g->edges, cache[base]->buffer[0]);
- n = ref_dagnodev(g->nodes, e1->nodes[dir]);
- eid = n->edges[dir];
- nid1 = e1->nodes[dir];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- e->visit = 1;
- eid = e->links[dir];
- }
- for(i=1;i<cache[base]->size;i++){
- e2 = ref_dagedgev(g->edges, cache[base]->buffer[i]);
- n2 = ref_dagnodev(g->nodes, e2->nodes[dir]);
- e1->cov += e2->cov;
- e1->score += e2->score;
- remove_edge_dagcns(g, cache[base]->buffer[i]);
- eid = n2->edges[dir];
- while(eid != 0xFFFFFFFFU){
- e2 = ref_dagedgev(g->edges, eid);
- e = prepare_edge_dagcns(g, nid1, e2->nodes[dir], dir);
- {
- e1 = ref_dagedgev(g->edges, cache[base]->buffer[0]);
- }
- e->cov = e->cov - 1 + e2->cov;
- e->score = e->score + e2->score;
- e->visit = 1;
- eid = e2->links[dir];
- }
- eid = n2->edges[dir];
- while(eid != 0xFFFFFFFFU){
- e2 = ref_dagedgev(g->edges, eid);
- remove_edge_dagcns(g, eid);
- eid = e2->links[dir];
- }
- }
-
-
-
- push_u32list(stack, nid1);
- }
- }
- }
- static inline int has_non_visited_edge_dagcns(DAGCNS *g, uint32_t nid, int dir){
- dagnode_t *n;
- dagedge_t *e;
- uint32_t eid;
- n = ref_dagnodev(g->nodes, nid);
- eid = n->edges[dir];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- if(e->visit == 0) return 1;
- eid = e->links[dir];
- }
- return 0;
- }
- static inline void print_local_dot_dagcns(DAGCNS *g, uint32_t nid, int distance, FILE *out){
- u32list *stack;
- u32hash *hash;
- dagnode_t *n, *n1, *n2;
- dagedge_t *e;
- uint32_t id1, id2, eid, *u;
- int lo, hi, dir, exists;
- n = ref_dagnodev(g->nodes, nid);
- stack = init_u32list(32);
- hash = init_u32hash(1023);
- lo = n->pos - distance;
- hi = n->pos + distance;
- push_u32list(stack, nid);
- put_u32hash(hash, nid);
- fprintf(out, "digraph {\nrankdir=LR\n");
- while(stack->size){
- id1 = stack->buffer[--stack->size];
- n1 = ref_dagnodev(g->nodes, id1);
- for(dir=0;dir<1;dir++){
- eid = n1->edges[dir];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- id2 = e->nodes[dir];
- n2 = ref_dagnodev(g->nodes, id2);
- fprintf(out, "N%d_%d_%c -> N%d_%d_%c [label=\"%d:%0.6f\" color=%s]\n", id1, n1->pos, "ACGT"[n1->base], id2, n2->pos, "ACGT"[n2->base], e->cov, e->score, e->visit? "blue" : "black");
- if(n2->pos >= lo && n2->pos <= hi){
- u = prepare_u32hash(hash, id2, &exists);
- if(exists){
- } else {
- *u = id2;
- push_u32list(stack, id2);
- }
- }
- eid = e->links[dir];
- }
- }
- }
- fprintf(out, "}\n");
- fflush(out);
- free_u32list(stack);
- free_u32hash(hash);
- }
- static inline void fprint_local_dot_dagcns(DAGCNS *g, u4i nid, int level, char *prefix, char *suffix){
- FILE *out;
- out = open_file_for_write(prefix, suffix, 1);
- print_local_dot_dagcns(g, nid, level, out);
- fclose(out);
- }
- static inline void merge_nodes_dagcns(DAGCNS *g){
- dagnode_t *n;
- dagedge_t *e;
- u32list *stack, *cache[4];
- u32fifo *queue;
- uint32_t i, nid, eid;
- stack = init_u32list(1024);
- cache[0] = init_u32list(4);
- cache[1] = init_u32list(4);
- cache[2] = init_u32list(4);
- cache[3] = init_u32list(4);
- for(i=0;i<g->edges->size;i++) g->edges->buffer[i].visit = 0;
- for(i=0;i<g->nodes->size;i++) g->nodes->buffer[i].visit = 0;
- queue = init_u32fifo();
- for(i=0;i<g->nodes->size;i++){
- n = ref_dagnodev(g->nodes, i);
- if(n->edges[1] != 0xFFFFFFFFU) continue;
- push_u32fifo(queue, i);
- }
-
- while(pop_u32fifo(queue, &nid)){
- if(dagcns_debug > 1) fprintf(stdout, "\npop(%u) %u\n", (u4i)queue->size, nid);
- n = ref_dagnodev(g->nodes, nid);
- if(n->visit) continue;
- n->visit = 1;
- merge_nodes_core_dagcns(g, nid, stack, cache, 1);
- merge_nodes_core_dagcns(g, nid, stack, cache, 0);
- n = ref_dagnodev(g->nodes, nid);
- eid = n->edges[0];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- e->visit = 1;
- eid = e->links[0];
- }
- eid = n->edges[0];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- if(!has_non_visited_edge_dagcns(g, e->nodes[0], 1)){
- if(dagcns_debug > 1) fprintf(stdout, "push %u\n", e->nodes[0]);
- push_u32fifo(queue, e->nodes[0]);
- }
- eid = e->links[0];
- }
- }
- free_u32fifo(queue);
- free_u32list(stack);
- free_u32list(cache[0]);
- free_u32list(cache[1]);
- free_u32list(cache[2]);
- free_u32list(cache[3]);
- }
- static inline void print_seq_dagcns(DAGCNS *g, FILE *out){
- char buffer[100];
- uint32_t i, j;
- for(i=j=0;i<g->cns->size;i++){
- buffer[j++] = bit_base_table[g->cns->buffer[i]];
- if(j == 99){
- buffer[j] = '\0';
- fprintf(out, "%s", buffer);
- j = 0;
- }
- }
- buffer[j] = '\0';
- fprintf(out, "%s", buffer);
- }
- static inline void gen_consensus_dagcns(DAGCNS *g, u32list *map){
- dagnode_t *n1, *n2;
- dagedge_t *e;
- u32fifo *queue;
- uint32_t i, lst, nid, eid, best_e;
- f8i best_s, score;
- queue = init_u32fifo();
- if(queue == NULL){
- fprint_local_dot_dagcns(g, 0, 10, "test.dot", NULL);
- }
- for(i=0;i<g->nodes->size;i++){
- n1 = ref_dagnodev(g->nodes, i);
- if(n1->edges[0] == 0xFFFFFFFFU && n1->edges[1] != 0xFFFFFFFFU){
- push_u32fifo(queue, i);
- n1->fw_edge = NODE_MAX_FW_EDGE;
- n1->aux = 0;
- }
- }
- for(i=0;i<g->edges->size;i++) g->edges->buffer[i].visit = 0;
- while(pop_u32fifo(queue, &nid)){
- best_s = - FLT_MAX;
- best_e = 0xFFFFFFFFU;
- n1 = ref_dagnodev(g->nodes, nid);
- eid = n1->edges[0];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- n2 = ref_dagnodev(g->nodes, e->nodes[0]);
- if(e->nodes[0] < g->backbone_size){
-
- score = n2->aux + e->score - g->ref_penalty * g->deps->buffer[n1->pos];
- } else {
-
- score = n2->aux + e->score - g->alt_penalty * g->deps->buffer[n1->pos];
- }
- if(score > best_s){
- best_s = score;
- best_e = eid;
- }
- eid = e->links[0];
- }
- if(best_s > - FLT_MAX) n1->aux = best_s;
- n1->fw_edge = best_e;
- eid = n1->edges[1];
- while(eid != 0xFFFFFFFFU){
- e = ref_dagedgev(g->edges, eid);
- e->visit = 1;
- if(!has_non_visited_edge_dagcns(g, e->nodes[1], 0)){
- push_u32fifo(queue, e->nodes[1]);
- }
- eid = e->links[1];
- }
- }
- free_u32fifo(queue);
- clear_u8list(g->cns);
- clear_u32list(g->deps);
- if(map) clear_u32list(map);
- g->cns_head = 0;
- if(g->nodes->size == 0) return;
- n1 = ref_dagnodev(g->nodes, g->cns_head);
- g->cns_score = n1->aux;
- n1->cns = 1;
- lst = 0;
- if(map && g->cns_head < g->backbone_size){
- while(lst < g->cns_head){ push_u32list(map, g->cns->size); lst ++; }
- }
- push_u8list(g->cns, n1->base);
- push_u32list(g->deps, 0);
- while(n1->fw_edge != NODE_MAX_FW_EDGE){
- e = ref_dagedgev(g->edges, n1->fw_edge);
- e->cns = 1;
- if(map && e->nodes[0] < g->backbone_size){
- while(lst < e->nodes[0]){ push_u32list(map, g->cns->size); lst ++; }
- }
- n1 = ref_dagnodev(g->nodes, e->nodes[0]);
- n1->cns = 1;
- push_u8list(g->cns, n1->base);
- push_u32list(g->deps, 0);
- }
- if(map) while(lst <= g->backbone_size){ push_u32list(map, g->cns->size); lst ++; }
- }
- #endif
|