general_graph.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /*
  2. *
  3. * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __GENERAL_GRAPH_RJ_H
  20. #define __GENERAL_GRAPH_RJ_H
  21. #include "list.h"
  22. #include "hashset.h"
  23. #define GEG_MAX_NODE 0xFFFFFFFFFFLLU
  24. #define GEG_MAX_EDGE_CNT 0x3FFFF
  25. #define GEG_MAX_EDGE_COV 0x7FFFF
  26. #define GEG_MAX_EDGE_OFF 0x7FFFFF
  27. #define GEG_MIN_EDGE_OFF -0x7FFFFF
  28. typedef struct {
  29. u8i node1:40, dir1:1, dir2:1, closed:2, cov:19, visit:1;
  30. u8i node2:40; b8i off:24;
  31. } ge_edge_t;
  32. define_list(geedgev, ge_edge_t);
  33. static inline uint64_t _ge_edge_hashcode(ge_edge_t e){
  34. const uint64_t m = 0xc6a4a7935bd1e995LLU;
  35. const int r = 47;
  36. uint64_t h = 1023 ^ (16 * m);
  37. uint64_t k = (e.node1 << 1) | e.dir1;
  38. k *= m;
  39. k ^= k >> r;
  40. k *= m;
  41. h ^= k;
  42. h *= m;
  43. k = (e.node2 << 1) | e.dir2;
  44. k *= m;
  45. k ^= k >> r;
  46. k *= m;
  47. h ^= k;
  48. h *= m;
  49. h ^= h >> r;
  50. h *= m;
  51. h ^= h >> r;
  52. return h;
  53. }
  54. #define GEEDGEHASH(idx) ((geedgev *)set->userdata)->buffer[idx]
  55. #define ge_edge_hashcode(E) _ge_edge_hashcode(GEEDGEHASH(E))
  56. #define ge_edge_hashequals(E1, E2) (GEEDGEHASH(E1).node1 == GEEDGEHASH(E2).node1 && GEEDGEHASH(E1).node2 == GEEDGEHASH(E2).node2 \
  57. && GEEDGEHASH(E1).dir1 == GEEDGEHASH(E2).dir1 && GEEDGEHASH(E1).dir2 == GEEDGEHASH(E2).dir2)
  58. define_hashset(geedgehash, u8i, ge_edge_hashcode, ge_edge_hashequals);
  59. typedef struct {
  60. u8i idx:63, flg:1;
  61. u8i next;
  62. } ge_edge_ref_t;
  63. define_list(geedgerefv, ge_edge_ref_t);
  64. typedef struct { uint64_t idx:46, cnt:18; } ge_ptr_ref_t;
  65. static const ge_ptr_ref_t GE_PTR_REF_NULL = (ge_ptr_ref_t){0, 0};
  66. define_list(geptrrefv, ge_ptr_ref_t);
  67. typedef struct { uint64_t idx:46, cnt:18; } ge_vec_ref_t;
  68. static const ge_vec_ref_t GE_VEC_REF_NULL = (ge_vec_ref_t){0, 0};
  69. define_list(gevecrefv, ge_vec_ref_t);
  70. typedef struct {
  71. u8i closed:1, bt_visit:40, bt_dir:1, bt_idx:18, status:4;
  72. u8i unvisit:18, aux:46;
  73. ge_ptr_ref_t edges[2];
  74. } ge_node_t;
  75. define_list(genodev, ge_node_t);
  76. #define GEG_TRACE_MSG_ZERO 0
  77. #define GEG_TRACE_MSG_ONE 1
  78. #define GEG_TRACE_MSG_MORE 2
  79. #define GEG_TRACE_MSG_VISITED 3
  80. #define GEG_TRACE_MSG_UNDEF 4
  81. typedef void (*geg_clr_node_callback)(void *userdata);
  82. typedef void (*geg_add_node_callback)(void *userdata, u8i nidx);
  83. typedef void (*geg_del_node_callback)(void *userdata, u8i nidx);
  84. typedef void (*geg_clr_edge_callback)(void *userdata);
  85. typedef void (*geg_add_edge_callback)(void *userdata, u8i eidx);
  86. typedef void (*geg_del_edge_callback)(void *userdata, u8i eidx);
  87. #define define_simple_geg_callback(tag, node_tag, node_tag_t, edge_tag, edge_tag_t) \
  88. void tag##nodeclr(void *aux){ clear_##node_tag((node_tag*)aux); } \
  89. void tag##nodeadd(void *aux, u8i idx){ \
  90. node_tag *nodes = (node_tag*)aux; \
  91. if(idx < nodes->size){ \
  92. memset(ref_##node_tag(nodes, idx), 0, sizeof(node_tag_t)); \
  93. } else { \
  94. memset(next_ref_##node_tag(nodes), 0, sizeof(node_tag_t)); \
  95. } \
  96. } \
  97. void tag##nodedel(void *aux, u8i idx){ UNUSED(aux); UNUSED(idx); } \
  98. void tag##edgeclr(void *aux){ clear_##edge_tag((edge_tag*)aux); } \
  99. void tag##edgeadd(void *aux, u8i idx){ \
  100. edge_tag *edges = (edge_tag*)aux; \
  101. if(idx < edges->size){ \
  102. memset(ref_##edge_tag(edges, idx), 0, sizeof(edge_tag_t)); \
  103. } else { \
  104. memset(next_ref_##edge_tag(edges), 0, sizeof(edge_tag_t)); \
  105. } \
  106. } \
  107. void tag##edgedel(void *aux, u8i idx){ UNUSED(aux); UNUSED(idx); } \
  108. static inline void tag##_set_callbacks_gegraph(GEGraph *g, node_tag *nodeaux, edge_tag *edgeaux){ \
  109. set_callbacks_gegraph(g, (void*)nodeaux, (void*)edgeaux, tag##nodeclr, tag##nodeadd, tag##nodedel, tag##edgeclr, tag##edgeadd, tag##edgedel); \
  110. }
  111. typedef struct {
  112. genodev *nodes;
  113. geedgev *edges;
  114. geedgehash *ehash;
  115. geedgerefv *erefs;
  116. void *nodeaux;
  117. void *edgeaux;
  118. geg_clr_node_callback nodeclr;
  119. geg_add_node_callback nodeadd;
  120. geg_del_node_callback nodedel;
  121. geg_clr_edge_callback edgeclr;
  122. geg_add_edge_callback edgeadd;
  123. geg_del_edge_callback edgedel;
  124. } GEGraph;
  125. static inline GEGraph* init_gegraph(){
  126. GEGraph *g;
  127. g = malloc(sizeof(GEGraph));
  128. g->nodes = init_genodev(32);
  129. g->edges = init_geedgev(32);
  130. g->ehash = init_geedgehash(1023);
  131. set_userdata_geedgehash(g->ehash, g->edges);
  132. g->erefs = init_geedgerefv(32);
  133. g->nodeaux = NULL;
  134. g->edgeaux = NULL;
  135. g->nodeclr = NULL;
  136. g->nodeadd = NULL;
  137. g->nodedel = NULL;
  138. g->edgeclr = NULL;
  139. g->edgeadd = NULL;
  140. g->edgedel = NULL;
  141. return g;
  142. }
  143. static inline void set_callbacks_gegraph(GEGraph *g, void *nodeaux, void *edgeaux, geg_clr_node_callback nodeclr, geg_add_node_callback nodeadd, geg_del_node_callback nodedel, geg_clr_edge_callback edgeclr, geg_add_edge_callback edgeadd, geg_del_edge_callback edgedel){
  144. g->nodeaux = nodeaux;
  145. g->edgeaux = edgeaux;
  146. g->nodeclr = nodeclr;
  147. g->nodeadd = nodeadd;
  148. g->nodedel = nodedel;
  149. g->edgeclr = edgeclr;
  150. g->edgeadd = edgeadd;
  151. g->edgedel = edgedel;
  152. }
  153. static inline void free_gegraph(GEGraph *g){
  154. free_genodev(g->nodes);
  155. free_geedgev(g->edges);
  156. free_geedgehash(g->ehash);
  157. free_geedgerefv(g->erefs);
  158. free(g);
  159. }
  160. static inline void reset_gegraph(GEGraph *g){
  161. clear_genodev(g->nodes);
  162. if(g->nodeclr) g->nodeclr(g->nodeaux);
  163. clear_geedgev(g->edges);
  164. memset(next_ref_geedgev(g->edges), 0, sizeof(ge_edge_t));
  165. if(g->edgeclr){
  166. g->edgeclr(g->edgeaux);
  167. g->edgeadd(g->edgeaux, 0);
  168. }
  169. clear_geedgehash(g->ehash);
  170. clear_geedgerefv(g->erefs);
  171. memset(next_ref_geedgerefv(g->erefs), 0, sizeof(ge_edge_ref_t));
  172. }
  173. static inline ge_node_t* add_node_gegraph(GEGraph *g){
  174. ge_node_t *n;
  175. n = next_ref_genodev(g->nodes);
  176. memset(n, 0, sizeof(ge_node_t));
  177. if(g->nodeadd) g->nodeadd(g->nodeaux, offset_genodev(g->nodes, n));
  178. return n;
  179. }
  180. static inline ge_edge_t* prepare_edge_gegraph(GEGraph *g, u8i node1, int dir1, u8i node2, int dir2, int *exists){
  181. ge_node_t *n;
  182. ge_edge_t *e;
  183. ge_edge_ref_t *f;
  184. u8i *u;
  185. e = ref_geedgev(g->edges, 0);
  186. if(node1 <= node2){
  187. e->node1 = node1;
  188. e->dir1 = dir1;
  189. e->node2 = node2;
  190. e->dir2 = dir2;
  191. } else {
  192. e->node1 = node2;
  193. e->dir1 = !dir2;
  194. e->node2 = node1;
  195. e->dir2 = !dir1;
  196. }
  197. e->cov = 0;
  198. e->off = 0;
  199. e->visit = 0;
  200. e->closed = 0;
  201. u = prepare_geedgehash(g->ehash, 0, exists);
  202. if(*exists){
  203. return g->edges->buffer + *u;
  204. } else {
  205. *u = g->edges->size;
  206. e = next_ref_geedgev(g->edges);
  207. *e = g->edges->buffer[0];
  208. n = g->nodes->buffer + e->node1;
  209. f = next_ref_geedgerefv(g->erefs);
  210. f->idx = *u;
  211. f->flg = 0;
  212. f->next = n->edges[e->dir1].idx;
  213. n->edges[e->dir1].idx = g->erefs->size - 1;
  214. n->edges[e->dir1].cnt ++;
  215. n = g->nodes->buffer + e->node2;
  216. f = next_ref_geedgerefv(g->erefs);
  217. f->idx = *u;
  218. f->flg = 1;
  219. f->next = n->edges[!e->dir2].idx;
  220. n->edges[!e->dir2].idx = g->erefs->size - 1;
  221. n->edges[!e->dir2].cnt ++;
  222. if(g->edgeadd) g->edgeadd(g->edgeaux, offset_geedgev(g->edges, e));
  223. return e;
  224. }
  225. }
  226. static inline void cut_edge_core_gegraph(GEGraph *g, ge_edge_t *e, int closed_val){
  227. if(e->closed) return;
  228. e->closed = closed_val;
  229. ref_genodev(g->nodes, e->node1)->edges[e->dir1].cnt --;
  230. ref_genodev(g->nodes, e->node2)->edges[!e->dir2].cnt --;
  231. if(g->edgedel) g->edgedel(g->edgeaux, offset_geedgev(g->edges, e));
  232. }
  233. #define cut_edge_gegraph(g, e) cut_edge_core_gegraph(g, e, 1)
  234. static inline void revive_edge_gegraph(GEGraph *g, ge_edge_t *e){
  235. if(e->closed == 0) return;
  236. e->closed = 0;
  237. ref_genodev(g->nodes, e->node1)->edges[e->dir1].cnt ++;
  238. ref_genodev(g->nodes, e->node2)->edges[!e->dir2].cnt ++;
  239. if(g->edgedel) g->edgeadd(g->edgeaux, offset_geedgev(g->edges, e));
  240. }
  241. static inline ge_edge_ref_t* single_edge_gegraph(GEGraph *g, ge_node_t *n, int dir, int *info){
  242. ge_edge_ref_t *f, *ret;
  243. uint64_t idx;
  244. ret = NULL;
  245. if(info){
  246. *info = GEG_TRACE_MSG_ZERO;
  247. if(n->edges[dir].cnt == 0) return NULL;
  248. idx = n->edges[dir].idx;
  249. while(idx){
  250. f = ref_geedgerefv(g->erefs, idx);
  251. idx = f->next;
  252. if(g->edges->buffer[f->idx].closed) continue;
  253. if(ret){ *info = GEG_TRACE_MSG_MORE; return NULL; }
  254. else { *info = GEG_TRACE_MSG_ONE; ret = f; }
  255. }
  256. } else {
  257. if(n->edges[dir].cnt == 0) return NULL;
  258. idx = n->edges[dir].idx;
  259. while(idx){
  260. f = ref_geedgerefv(g->erefs, idx);
  261. idx = f->next;
  262. if(g->edges->buffer[f->idx].closed) continue;
  263. if(ret){ return NULL; }
  264. else { ret = f; }
  265. }
  266. }
  267. return ret;
  268. }
  269. #define count_edges_gegraph(g, n, dir) (n)->edges[dir].cnt
  270. // dir = 2 means either strand
  271. static inline ge_edge_ref_t* edge_node2node_gegraph(GEGraph *g, u8i node1, int dir1, u8i node2, int dir2){
  272. ge_node_t *n;
  273. ge_edge_ref_t *f;
  274. ge_edge_t *e;
  275. uint64_t idx;
  276. int dire;
  277. n = ref_genodev(g->nodes, node1);
  278. if(dir1 > 1){
  279. dir1 = 0; dire = 2;
  280. } else {
  281. dire = dir1 + 1;
  282. }
  283. while(dir1 < dire){
  284. idx = n->edges[dir1].idx;
  285. while(idx){
  286. f = ref_geedgerefv(g->erefs, idx);
  287. idx = f->next;
  288. e = ref_geedgev(g->edges, f->idx);
  289. if(f->flg){
  290. if(e->node1 == node2 && (dir2 > 1? 1 : (dir2 == (!e->dir1)))) return f;
  291. } else {
  292. if(e->node2 == node2 && (dir2 > 1? 1 : (dir2 == e->dir2))) return f;
  293. }
  294. }
  295. dir1 ++;
  296. }
  297. return NULL;
  298. }
  299. static inline void del_node_edges_gegraph(GEGraph *g, ge_node_t *n, int closed_val){
  300. ge_edge_ref_t *f;
  301. ge_edge_t *e;
  302. uint64_t idx;
  303. uint32_t k;
  304. for(k=0;k<2;k++){
  305. idx = n->edges[k].idx;
  306. while(idx){
  307. f = ref_geedgerefv(g->erefs, idx);
  308. idx = f->next;
  309. e = g->edges->buffer + f->idx;
  310. cut_edge_core_gegraph(g, e, closed_val);
  311. }
  312. }
  313. }
  314. static inline void del_node_gegraph(GEGraph *g, ge_node_t *n){
  315. del_node_edges_gegraph(g, n, 1);
  316. n->closed = 1;
  317. if(g->nodedel) g->nodeadd(g->nodeaux, offset_genodev(g->nodes, n));
  318. }
  319. #define geg_beg_iter_edges(g, n, dir, f, e) \
  320. { \
  321. u8i _geg_iter_idx; \
  322. _geg_iter_idx = (n)->edges[dir].idx; \
  323. while(_geg_iter_idx){ \
  324. (f) = ref_geedgerefv((g)->erefs, _geg_iter_idx); \
  325. _geg_iter_idx = (f)->next; \
  326. (e) = (g)->edges->buffer + (f)->idx
  327. #define geg_end_iter_edges() \
  328. } \
  329. }
  330. static inline void print_dot_gegraph(GEGraph *g, FILE *out){
  331. static const char *colors[2][2] = {{"blue", "green"}, {"red", "gray"}};
  332. ge_node_t *n;
  333. ge_edge_t *e;
  334. u8i i;
  335. fprintf(out, "digraph {\n");
  336. for(i=0;i<g->nodes->size;i++){
  337. n = ref_genodev(g->nodes, i);
  338. if(n->closed) continue;
  339. fprintf(out, " N%llu\n", i);
  340. }
  341. for(i=1;i<g->edges->size;i++){
  342. e = ref_geedgev(g->edges, i);
  343. if(e->closed) continue;
  344. fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", (u8i)e->node1, (u8i)e->node2, "+-"[e->dir1], "+-"[e->dir2], e->cov, e->off, colors[e->dir1][e->dir2]);
  345. fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", (u8i)e->node2, (u8i)e->node1, "-+"[e->dir2], "-+"[e->dir1], e->cov, e->off, colors[!e->dir2][!e->dir1]);
  346. }
  347. fprintf(out, "}\n");
  348. fflush(out);
  349. }
  350. static inline void fprint_dot_gegraph(GEGraph *g, char *prefix, char *suffix){
  351. FILE *out;
  352. out = open_file_for_write(prefix, suffix, 1);
  353. print_dot_gegraph(g, out);
  354. fclose(out);
  355. }
  356. #endif