wtdbg-graph.h 132 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958
  1. /*
  2. *
  3. * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __WTDBG_GRAPH_RJ_H
  20. #define __WTDBG_GRAPH_RJ_H
  21. #include "wtdbg.h"
  22. #include "filewriter.h"
  23. #include "pgzf.h"
  24. static inline u8i print_local_dot_graph(Graph *g, char *prefix, char *suffix);
  25. static inline void print_node_edges_cov_graph(Graph *g, FILE *out){
  26. node_t *n;
  27. edge_t *e;
  28. edge_ref_t *f;
  29. u8i idx, nid;
  30. u4i k, i;
  31. u4v *covs;
  32. covs = init_u4v(32);
  33. for(nid=0;nid<g->nodes->size;nid++){
  34. n = ref_nodev(g->nodes, nid);
  35. clear_u4v(covs);
  36. for(k=0;k<2;k++){
  37. idx = n->edges[k].idx;
  38. while(idx){
  39. f = ref_edgerefv(g->erefs, idx);
  40. e = ref_edgev(g->edges, f->idx);
  41. push_u4v(covs, e->cov);
  42. idx = f->next;
  43. }
  44. }
  45. if(covs->size == 0) continue;
  46. sort_array(covs->buffer, covs->size, u4i, num_cmpgt(b, a));
  47. fprintf(out, "NODE_COV\tN%llu\t%u\t%u", nid, n->cov, n->regs.cnt);
  48. for(i=0;i<covs->size;i++){
  49. fprintf(out, "\t%u", covs->buffer[i]);
  50. }
  51. fprintf(out, "\n");
  52. }
  53. free_u4v(covs);
  54. }
  55. static inline void cut_edge_core_graph(Graph *g, edge_t *e, int closed_val){
  56. //if(e->closed == closed_val) return;
  57. if(e->closed) return;
  58. e->closed = closed_val;
  59. ref_nodev(g->nodes, e->node1)->edges[e->dir1].cnt --;
  60. ref_nodev(g->nodes, e->node2)->edges[!e->dir2].cnt --;
  61. }
  62. #define cut_edge_graph(g, e) cut_edge_core_graph(g, e, 1)
  63. static inline void cut_lnk_core_graph(Graph *g, lnk_t *e, int closed_val){
  64. if(e->closed) return;
  65. e->closed = closed_val;
  66. ref_frgv(g->frgs, e->frg1)->lnks[e->dir1].cnt --;
  67. ref_frgv(g->frgs, e->frg2)->lnks[!e->dir2].cnt --;
  68. }
  69. #define cut_lnk_graph(g, e) cut_lnk_core_graph(g, e, 1)
  70. static inline void revive_edge_graph(Graph *g, edge_t *e){
  71. if(e->closed == WT_EDGE_CLOSED_NULL) return;
  72. e->closed = WT_EDGE_CLOSED_NULL;
  73. ref_nodev(g->nodes, e->node1)->edges[e->dir1].cnt ++;
  74. ref_nodev(g->nodes, e->node2)->edges[!e->dir2].cnt ++;
  75. }
  76. static inline void revive_lnk_graph(Graph *g, lnk_t *e){
  77. if(e->closed == WT_EDGE_CLOSED_NULL) return;
  78. e->closed = WT_EDGE_CLOSED_NULL;
  79. ref_frgv(g->frgs, e->frg1)->lnks[e->dir1].cnt ++;
  80. ref_frgv(g->frgs, e->frg2)->lnks[!e->dir2].cnt ++;
  81. }
  82. static inline void del_node_edges_graph(Graph *g, node_t *n){
  83. edge_ref_t *f;
  84. edge_t *e;
  85. u8i idx;
  86. u4i k;
  87. for(k=0;k<2;k++){
  88. idx = n->edges[k].idx;
  89. while(idx){
  90. f = ref_edgerefv(g->erefs, idx);
  91. idx = f->next;
  92. e = g->edges->buffer + f->idx;
  93. cut_edge_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  94. }
  95. }
  96. }
  97. static inline void del_frg_lnks_graph(Graph *g, frg_t *n){
  98. edge_ref_t *f;
  99. lnk_t *e;
  100. u8i idx;
  101. u4i k;
  102. for(k=0;k<2;k++){
  103. idx = n->lnks[k].idx;
  104. while(idx){
  105. f = ref_edgerefv(g->lrefs, idx);
  106. idx = f->next;
  107. e = g->lnks->buffer + f->idx;
  108. cut_lnk_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  109. }
  110. }
  111. }
  112. static inline void del_node_graph(Graph *g, node_t *n){
  113. del_node_edges_graph(g, n);
  114. n->closed = 1;
  115. }
  116. static inline u8i mask_nodes_by_edge_cov_graph(Graph *g, u4i min_node_cov, float min_edge_cov_ratio, FILE *out){
  117. node_t *n;
  118. edge_t *e;
  119. edge_ref_t *f;
  120. u8i idx, nid, ret;
  121. u4i max, k;
  122. ret = 0;
  123. for(nid=0;nid<g->nodes->size;nid++){
  124. n = ref_nodev(g->nodes, nid);
  125. if(n->closed) continue;
  126. //if(n->regs.cnt < min_node_cov) continue;
  127. if(n->cov < min_node_cov) continue;
  128. max = 0;
  129. for(k=0;k<2;k++){
  130. idx = n->edges[k].idx;
  131. while(idx){
  132. f = ref_edgerefv(g->erefs, idx);
  133. e = ref_edgev(g->edges, f->idx);
  134. if(e->cov > max) max = e->cov;
  135. idx = f->next;
  136. }
  137. }
  138. if(max < (u4i)(n->regs.cnt * min_edge_cov_ratio)){
  139. del_node_graph(g, n);
  140. if(out){
  141. fprintf(out, "NODE_EDGE_COV\tN%llu\t%u\t%u\n", nid, n->regs.cnt, max);
  142. }
  143. ret ++;
  144. }
  145. }
  146. return ret;
  147. }
  148. #define MAX_BT_NIDX 0xFFFFFFU
  149. typedef struct {
  150. u8i node:46, visit:1, closed:1, cov:11, fix:1, sub_dir:1;
  151. u8i flag;
  152. u8i bt_dir:1, bt_open:10, bt_nidx:24, bt_score:20, bt_step:8, bt_hit:1;
  153. ptr_ref_t edges[2];
  154. } subnode_t;
  155. define_list(subnodev, subnode_t);
  156. #define subnode_hashcode(E) u64hashcode((E).node)
  157. #define subnode_hashequals(E1, E2) (E1).node == (E2).node
  158. define_hashset(subnodehash, subnode_t, subnode_hashcode, subnode_hashequals);
  159. typedef struct {
  160. subnode_t *node;
  161. u4i cov:28, visit:1, fwd:1, dir:1, closed:1;
  162. int off;
  163. u4i next;
  164. } subedge_t;
  165. define_list(subedgev, subedge_t);
  166. static inline int evaluate_node_connectivity_graph(Graph *g, u8i nid, u4v *rds, subnodehash *nodes, subedgev *edges, ptrrefv *stack){
  167. node_t *nd;
  168. read_t *rd;
  169. reg_t *rg;
  170. subnode_t N, *n, *n1, *n2;
  171. subedge_t *e;
  172. ptr_ref_t *p;
  173. u8i idx, edx, aim;
  174. u4i i, k, k1, k2, cnt;
  175. int exists;
  176. // collect reads containing nid
  177. clear_u4v(rds);
  178. nd = ref_nodev(g->nodes, nid);
  179. for(i=0;i<nd->regs.cnt;i++){
  180. rg = ref_regv(g->regs, nd->regs.idx + i);
  181. push_u4v(rds, rg->rid);
  182. }
  183. // prepare nodes in subgraph
  184. clear_subnodehash(nodes);
  185. clear_subedgev(edges);
  186. next_ref_subedgev(edges);
  187. memset(&N, 0, sizeof(subnode_t));
  188. N.cov = 1;
  189. for(i=0;i<rds->size;i++){
  190. rd = ref_readv(g->reads, rds->buffer[i]);
  191. rg = NULL;
  192. idx = rd->regs.idx;
  193. while(idx){
  194. rg = ref_regv(g->regs, idx);
  195. idx = rg->read_link;
  196. if(rg->closed) continue;
  197. N.node = rg->node;
  198. n = prepare_subnodehash(nodes, N, &exists);
  199. if(exists){
  200. n->cov ++;
  201. } else {
  202. *n = N;
  203. }
  204. }
  205. }
  206. // mask low cov nodes
  207. reset_iter_subnodehash(nodes);
  208. while((n = ref_iter_subnodehash(nodes))){
  209. if(n->cov < g->min_node_cov) n->closed = 1;
  210. }
  211. // build edges
  212. for(i=0;i<rds->size;i++){
  213. rd = ref_readv(g->reads, rds->buffer[i]);
  214. n1 = NULL;
  215. k1 = 0;
  216. idx = rd->regs.idx;
  217. while(idx){
  218. rg = ref_regv(g->regs, idx);
  219. idx = rg->read_link;
  220. if(rg->closed) continue;
  221. N.node = rg->node;
  222. n2 = get_subnodehash(nodes, N);
  223. k2 = rg->dir;
  224. if(n2->closed) continue;
  225. if(n1){
  226. // link n1 to n2
  227. edx = n1->edges[k1].idx;
  228. while(edx){
  229. e = ref_subedgev(edges, edx);
  230. if(e->node == n2 && e->dir == k2){
  231. e->cov ++;
  232. break;
  233. }
  234. edx = e->next;
  235. }
  236. if(edx == 0){
  237. edx = edges->size;
  238. e = next_ref_subedgev(edges);
  239. e->node = n2;
  240. e->dir = k2;
  241. e->cov = 1;
  242. e->next = n1->edges[k1].idx;
  243. n1->edges[k1].idx = edx;
  244. n1->edges[k1].cnt ++;
  245. }
  246. // link rev n2 to rev n1
  247. edx = n2->edges[!k2].idx;
  248. while(edx){
  249. e = ref_subedgev(edges, edx);
  250. if(e->node == n1 && e->dir == !k1){
  251. e->cov ++;
  252. break;
  253. }
  254. edx = e->next;
  255. }
  256. if(edx == 0){
  257. edx = edges->size;
  258. e = next_ref_subedgev(edges);
  259. e->node = n1;
  260. e->dir = !k1;
  261. e->cov = 1;
  262. e->next = n2->edges[!k2].idx;
  263. n2->edges[!k2].idx = edx;
  264. n2->edges[!k2].cnt ++;
  265. }
  266. }
  267. n1 = n2;
  268. k1 = k2;
  269. }
  270. }
  271. // find the nid node
  272. N.node = nid;
  273. n = get_subnodehash(nodes, N);
  274. n->visit = 1;
  275. // checking whether its out-edges collapse into one node
  276. for(k=0;k<2;k++){
  277. if(n->edges[k].cnt > 64) return 0;
  278. if(n->edges[k].cnt < 2) continue;
  279. idx = n->edges[k].idx;
  280. cnt = 0;
  281. while(idx){
  282. e = ref_subedgev(edges, idx);
  283. idx = e->next;
  284. if(e->cov == 1) continue; // don't track low cov out-edges
  285. cnt ++;
  286. }
  287. aim = 0xFFFFFFFFFFFFFFFFLLU >> (64 - cnt);
  288. cnt = 0;
  289. exists = 0;
  290. if(k){
  291. reset_iter_subnodehash(nodes);
  292. while((n1 = ref_iter_subnodehash(nodes))){ n1->flag = 0; }
  293. }
  294. idx = n->edges[k].idx;
  295. while(idx){
  296. e = ref_subedgev(edges, idx);
  297. idx = e->next;
  298. if(e->cov == 1) continue; // don't track low cov out-edges
  299. e->node->flag |= 1LLU << cnt;
  300. cnt ++;
  301. reset_iter_subnodehash(nodes);
  302. while((n1 = ref_iter_subnodehash(nodes))){ n1->visit = 0; }
  303. n->visit = 1;
  304. clear_ptrrefv(stack);
  305. push_ptrrefv(stack, (ptr_ref_t){offset_subnodehash(nodes, e->node), e->dir});
  306. while(stack->size){
  307. p = peer_ptrrefv(stack);
  308. n1 = nodes->array + p->idx;
  309. k1 = p->cnt;
  310. stack->size --;
  311. if(n1->flag == aim){ exists = 1; break; }
  312. if(n1->visit) continue;
  313. n1->visit = 1;
  314. edx = n1->edges[k1].idx;
  315. while(edx){
  316. e = ref_subedgev(edges, edx);
  317. edx = e->next;
  318. if(e->node->visit) continue;
  319. e->node->flag |= n1->flag;
  320. push_ptrrefv(stack, (ptr_ref_t){offset_subnodehash(nodes, e->node), e->dir});
  321. }
  322. if(exists) break;
  323. }
  324. if(exists) break;
  325. }
  326. if(exists == 0) return 0;
  327. }
  328. return 1;
  329. }
  330. static inline void print_subgraph_dot(Graph *g, u8i id, subnodehash *nodes, subedgev *edges, FILE *out){
  331. subnode_t *n;
  332. subedge_t *e;
  333. u8i idx;
  334. int k;
  335. UNUSED(g);
  336. fprintf(out, "digraph N%llu {\n", id);
  337. fprintf(out, " N%llu [style=filled fillcolor=yellow]\n", id);
  338. reset_iter_subnodehash(nodes);
  339. while((n = ref_iter_subnodehash(nodes))){
  340. if(n->closed) continue;
  341. fprintf(out, "N%llu [label=\"N%llu%s(%u)\"]\n", (u8i)n->node, (u8i)n->node, n->closed? "*" : "", n->cov);
  342. for(k=0;k<2;k++){
  343. idx = n->edges[k].idx;
  344. while(idx){
  345. e = ref_subedgev(edges, idx);
  346. idx = e->next;
  347. fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%d\"]\n", (u8i)n->node, (u8i)e->node->node, "+-"[k], "+-"[e->dir], e->cov, e->off);
  348. }
  349. }
  350. }
  351. fprintf(out, "}\n");
  352. }
  353. static inline void fprint_subgraph_dot(Graph *g, u8i id, subnodehash *nodes, subedgev *edges, char *filename){
  354. FILE *out;
  355. out = open_file_for_write(filename, NULL, 1);
  356. print_subgraph_dot(g, id, nodes, edges, out);
  357. fclose(out);
  358. }
  359. thread_beg_def(mrep);
  360. Graph *g;
  361. u8i ret;
  362. u8v *reps;
  363. thread_end_def(mrep);
  364. thread_beg_func(mrep);
  365. subnodehash *nodes;
  366. subedgev *edges;
  367. u4v *rds;
  368. ptrrefv *stack;
  369. u8i nid, tidx, ncpu;
  370. nodes = init_subnodehash(1023);
  371. edges = init_subedgev(32);
  372. rds = init_u4v(32);
  373. stack = init_ptrrefv(32);
  374. tidx = mrep->t_idx;
  375. ncpu = mrep->n_cpu;
  376. thread_beg_loop(mrep);
  377. for(nid=tidx;nid<mrep->g->nodes->size;nid+=ncpu){
  378. if(mrep->g->nodes->buffer[nid].closed) continue;
  379. if(evaluate_node_connectivity_graph(mrep->g, nid, rds, nodes, edges, stack) == 0){
  380. if(0){
  381. print_subgraph_dot(mrep->g, nid, nodes, edges, stdout);
  382. }
  383. mrep->g->nodes->buffer[nid].closed = 1;
  384. push_u8v(mrep->reps, nid);
  385. mrep->ret ++;
  386. }
  387. }
  388. thread_end_loop(mrep);
  389. free_subnodehash(nodes);
  390. free_subedgev(edges);
  391. free_u4v(rds);
  392. free_ptrrefv(stack);
  393. thread_end_func(mrep);
  394. static inline u8i mask_nodes_by_connectivity_graph(Graph *g, int ncpu, FILE *out){
  395. node_t *n;
  396. u8i ret, i;
  397. thread_preprocess(mrep);
  398. ret = 0;
  399. thread_beg_init(mrep, ncpu);
  400. mrep->g = g;
  401. mrep->ret = 0;
  402. mrep->reps = init_u8v(32);
  403. thread_end_init(mrep);
  404. thread_wake_all(mrep);
  405. thread_wait_all(mrep);
  406. thread_beg_close(mrep);
  407. if(out){
  408. for(i=0;i<mrep->reps->size;i++){
  409. n = ref_nodev(g->nodes, mrep->reps->buffer[i]);
  410. fprintf(out, "N%llu\t%u\tconn\n", (u8i)mrep->reps->buffer[i], (u4i)n->regs.cnt);
  411. }
  412. }
  413. ret += mrep->ret;
  414. free_u8v(mrep->reps);
  415. thread_end_close(mrep);
  416. return ret;
  417. }
  418. // remove regs which have no high cov edges with other regs in one read
  419. thread_beg_def(mrdk);
  420. Graph *g;
  421. u4i ridx;
  422. u8v *masks;
  423. thread_end_def(mrdk);
  424. thread_beg_func(mrdk);
  425. Graph *g;
  426. u4i ridx;
  427. u8v *regs;
  428. u4v *gidxs, *gcnts;
  429. UUhash *hash;
  430. UUhash_t *UU;
  431. read_t *rd;
  432. reg_t *reg;
  433. node_t *n;
  434. edge_ref_t *f;
  435. edge_t *e;
  436. u8i idx, fidx, nidx, hidx;
  437. u4i i, k, gid, max;
  438. g = mrdk->g;
  439. regs = init_u8v(32);
  440. gidxs = init_u4v(32);
  441. gcnts = init_u4v(32);
  442. hash = init_UUhash(1023);
  443. thread_beg_loop(mrdk);
  444. clear_u8v(mrdk->masks);
  445. ridx = mrdk->ridx;
  446. if(ridx == MAX_U4) continue;
  447. clear_u8v(regs);
  448. clear_u4v(gidxs);
  449. clear_u4v(gcnts);
  450. clear_UUhash(hash);
  451. rd = ref_readv(g->reads, ridx);
  452. idx = rd->regs.idx;
  453. while(idx){
  454. push_u4v(gidxs, regs->size);
  455. push_u4v(gcnts, 0);
  456. reg = ref_regv(g->regs, idx);
  457. if(reg->closed == 0){
  458. put_UUhash(hash, (UUhash_t){reg->node, regs->size});
  459. push_u8v(regs, idx);
  460. }
  461. idx = reg->read_link;
  462. }
  463. for(i=0;i<regs->size;i++){
  464. idx = regs->buffer[i];
  465. gid = gidxs->buffer[i];
  466. reg = ref_regv(g->regs, idx);
  467. n = ref_nodev(g->nodes, reg->node);
  468. for(k=0;k<2;k++){
  469. fidx = n->edges[k].idx;
  470. while(fidx){
  471. f = ref_edgerefv(g->erefs, fidx);
  472. fidx = f->next;
  473. e = ref_edgev(g->edges, f->idx);
  474. if(e->cov < g->min_edge_cov) continue;
  475. nidx = f->flg? e->node1 : e->node2;
  476. if((UU = get_UUhash(hash, nidx)) == NULL) continue;
  477. if((hidx = UU->val) <= i) continue;
  478. gidxs->buffer[hidx] = gid;
  479. }
  480. }
  481. }
  482. for(i=0;i<gidxs->size;i++) gcnts->buffer[gidxs->buffer[i]] ++;
  483. max = 0;
  484. gid = 0;
  485. for(i=0;i<gcnts->size;i++){
  486. if(gcnts->buffer[i] > max){
  487. max = gcnts->buffer[i];
  488. gid = i;
  489. }
  490. }
  491. for(i=0;i<gidxs->size;i++){
  492. if(gidxs->buffer[i] != gid){
  493. push_u8v(mrdk->masks, regs->buffer[i]);
  494. }
  495. }
  496. thread_end_loop(mrdk);
  497. free_u8v(regs);
  498. free_u4v(gidxs);
  499. free_u4v(gcnts);
  500. free_UUhash(hash);
  501. thread_end_func(mrdk);
  502. static inline u8i mask_read_weak_regs_graph(Graph *g, int ncpu){
  503. u8i i, ret;
  504. u4i j;
  505. thread_preprocess(mrdk);
  506. thread_beg_init(mrdk, ncpu);
  507. mrdk->g = g;
  508. mrdk->ridx = MAX_U4;
  509. mrdk->masks = init_u8v(32);
  510. thread_end_init(mrdk);
  511. ret = 0;
  512. for(i=0;i<g->reads->size+ncpu;i++){
  513. if(i < g->reads->size){
  514. thread_wait_one(mrdk);
  515. } else {
  516. thread_wait_next(mrdk);
  517. }
  518. if(mrdk->masks->size){
  519. for(j=0;j<mrdk->masks->size;j++){
  520. ref_regv(g->regs, mrdk->masks->buffer[j])->closed = 1;
  521. }
  522. ret += mrdk->masks->size;
  523. clear_u8v(mrdk->masks);
  524. }
  525. if(i < g->reads->size){
  526. mrdk->ridx = i;
  527. thread_wake(mrdk);
  528. }
  529. }
  530. thread_beg_close(mrdk);
  531. free_u8v(mrdk->masks);
  532. thread_end_close(mrdk);
  533. return ret;
  534. }
  535. static inline u8i mask_possible_tip_nodes_graph(Graph *g){
  536. node_t *n;
  537. reg_t *r;
  538. u8i ret, i;
  539. u4i j, cnt;
  540. ret = 0;
  541. for(i=0;i<g->nodes->size;i++){
  542. n = ref_nodev(g->nodes, i);
  543. if(n->closed) continue;
  544. cnt = 0;
  545. for(j=0;j<n->regs.cnt;j++){
  546. r = ref_regv(g->regs, n->regs.idx + j);
  547. if(r->read_link == 0) continue;
  548. if(n->regs.idx + j == g->reads->buffer[r->rid].regs.idx) continue;
  549. cnt ++;
  550. }
  551. if(cnt < g->min_edge_cov){
  552. n->closed = 1;
  553. ret ++;
  554. }
  555. }
  556. return ret;
  557. }
  558. static inline void print_node_edges_graph(Graph *g, u8i nid, int dir, FILE *out){
  559. node_t *n;
  560. edge_ref_t *f;
  561. edge_t *e;
  562. u8i idx;
  563. n = ref_nodev(g->nodes, nid);
  564. idx = n->edges[dir].idx;
  565. while(idx){
  566. f = ref_edgerefv(g->erefs, idx);
  567. idx = f->next;
  568. e = ref_edgev(g->edges, f->idx);
  569. if(f->flg){
  570. fprintf(out, "N%llu\t%c\tN%llu\t%c\t%d\t%d\n", (u8i)e->node2, "+-"[!e->dir1], (u8i)e->node1, "+-"[e->dir1], e->cov, e->off);
  571. } else {
  572. fprintf(out, "N%llu\t%c\tN%llu\t%c\t%d\t%d\n", (u8i)e->node1, "+-"[e->dir1], (u8i)e->node2, "+-"[e->dir2], e->cov, e->off);
  573. }
  574. }
  575. }
  576. static inline edge_ref_t* first_living_edge_graph(Graph *g, node_t *n, int dir, int *info){
  577. edge_ref_t *f, *ret;
  578. u8i idx;
  579. ret = NULL;
  580. if(info){
  581. *info = WT_TRACE_MSG_ZERO;
  582. if(n->edges[dir].cnt == 0) return NULL;
  583. idx = n->edges[dir].idx;
  584. while(idx){
  585. f = ref_edgerefv(g->erefs, idx);
  586. idx = f->next;
  587. if(g->edges->buffer[f->idx].closed) continue;
  588. if(ret){ *info = WT_TRACE_MSG_MORE; return NULL; }
  589. else { *info = WT_TRACE_MSG_ONE; ret = f; }
  590. }
  591. } else {
  592. if(n->edges[dir].cnt == 0) return NULL;
  593. idx = n->edges[dir].idx;
  594. while(idx){
  595. f = ref_edgerefv(g->erefs, idx);
  596. idx = f->next;
  597. if(g->edges->buffer[f->idx].closed) continue;
  598. if(ret){ return NULL; }
  599. else { ret = f; }
  600. }
  601. }
  602. return ret;
  603. }
  604. static inline edge_ref_t* first_living_lnk_graph(Graph *g, frg_t *n, int dir, int *info){
  605. edge_ref_t *f, *ret;
  606. u8i idx;
  607. ret = NULL;
  608. if(info){
  609. *info = WT_TRACE_MSG_ZERO;
  610. if(n->lnks[dir].cnt == 0) return NULL;
  611. idx = n->lnks[dir].idx;
  612. while(idx){
  613. f = ref_edgerefv(g->lrefs, idx);
  614. idx = f->next;
  615. if(g->lnks->buffer[f->idx].closed) continue;
  616. if(ret){ *info = WT_TRACE_MSG_MORE; return NULL; }
  617. else { *info = WT_TRACE_MSG_ONE; ret = f; }
  618. }
  619. } else {
  620. if(n->lnks[dir].cnt == 0) return NULL;
  621. idx = n->lnks[dir].idx;
  622. while(idx){
  623. f = ref_edgerefv(g->erefs, idx);
  624. idx = f->next;
  625. if(g->lnks->buffer[f->idx].closed) continue;
  626. if(ret){ return NULL; }
  627. else { ret = f; }
  628. }
  629. }
  630. return ret;
  631. }
  632. #define count_living_edges_graph(g, n, dir) (n)->edges[dir].cnt
  633. #define count_living_lnks_graph(g, n, dir) (n)->lnks[dir].cnt
  634. // dir = 2 means either strand
  635. static inline edge_ref_t* edge_node2node_graph(Graph *g, u8i node1, int dir1, u8i node2, int dir2){
  636. node_t *n;
  637. edge_ref_t *f;
  638. edge_t *e;
  639. u8i idx;
  640. int dire;
  641. n = ref_nodev(g->nodes, node1);
  642. if(dir1 > 1){
  643. dir1 = 0; dire = 2;
  644. } else {
  645. dire = dir1 + 1;
  646. }
  647. while(dir1 < dire){
  648. idx = n->edges[dir1].idx;
  649. while(idx){
  650. f = ref_edgerefv(g->erefs, idx);
  651. idx = f->next;
  652. e = ref_edgev(g->edges, f->idx);
  653. if(f->flg){
  654. if(e->node1 == node2 && (dir2 > 1? 1 : (dir2 == (!e->dir1)))) return f;
  655. } else {
  656. if(e->node2 == node2 && (dir2 > 1? 1 : (dir2 == e->dir2))) return f;
  657. }
  658. }
  659. dir1 ++;
  660. }
  661. return NULL;
  662. }
  663. static inline u8i linear_trace_graph(Graph *g, tracev *path, u8i max_step, int *msg){
  664. trace_t *t;
  665. node_t *n;
  666. edge_t *e;
  667. edge_ref_t *f;
  668. u8i step;
  669. int dir, info;
  670. if(path->size == 0){
  671. if(msg) *msg = 3;
  672. return 0;
  673. }
  674. t = ref_tracev(path, path->size - 1);
  675. step = 0;
  676. while(step < max_step){
  677. f = first_living_edge_graph(g, ref_nodev(g->nodes, t->node), t->dir, &info);
  678. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = info; break; }
  679. e = g->edges->buffer + f->idx;
  680. n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  681. dir = f->flg? !e->dir1 : e->dir2;
  682. t->edges[t->dir] = *f;
  683. t = next_ref_tracev(path);
  684. t->node = n - g->nodes->buffer;
  685. t->dir = dir;
  686. t->edges[!t->dir] = (edge_ref_t){f->idx, !f->flg, 0};
  687. t->edges[t->dir] = EDGE_REF_NULL;
  688. f = first_living_edge_graph(g, n, !dir, &info);
  689. if(info != WT_TRACE_MSG_ONE){ path->size --; if(msg) *msg = -1 - info; break; }
  690. step ++;
  691. }
  692. return step;
  693. }
  694. static inline u8i linear_path_graph(Graph *g, pathv *path, int max_len, int *msg){
  695. path_t *t;
  696. frg_t *n;
  697. lnk_t *e;
  698. edge_ref_t *f;
  699. int len;
  700. int dir, info;
  701. if(path->size == 0){
  702. if(msg) *msg = 3;
  703. return 0;
  704. }
  705. t = ref_pathv(path, path->size - 1);
  706. len = ref_frgv(g->frgs, t->frg)->len;
  707. while(len < max_len){
  708. f = first_living_lnk_graph(g, ref_frgv(g->frgs, t->frg), t->dir, &info);
  709. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = info; break; }
  710. e = g->lnks->buffer + f->idx;
  711. len += e->off;
  712. n = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  713. dir = f->flg? !e->dir1 : e->dir2;
  714. t->lnks[t->dir] = *f;
  715. t = next_ref_pathv(path);
  716. t->frg = n - g->frgs->buffer;
  717. t->dir = dir;
  718. t->lnks[!t->dir] = (edge_ref_t){f->idx, !f->flg, 0};
  719. t->lnks[t->dir] = EDGE_REF_NULL;
  720. f = first_living_lnk_graph(g, n, !dir, &info);
  721. if(info != WT_TRACE_MSG_ONE){ path->size --; if(msg) *msg = -1 - info; break; }
  722. len += n->len;
  723. }
  724. return len;
  725. }
  726. static inline int wander_path_graph(Graph *g, u4i frg_idx, int frg_dir, pathv *heap, u8i vst, int max_len){
  727. path_t T, S;
  728. frg_t *n, *w;
  729. lnk_t *e;
  730. edge_ref_t *f;
  731. u8i idx;
  732. int len;
  733. clear_pathv(heap);
  734. T.off = 0;
  735. T.frg = frg_idx;
  736. T.dir = frg_dir;
  737. ref_frgv(g->frgs, frg_idx)->bt_visit = vst;
  738. push_pathv(heap, T);
  739. while(heap->size){
  740. S = heap->buffer[0];
  741. array_heap_remove(heap->buffer, heap->size, heap->cap, path_t, 0, num_cmp(a.off, b.off));
  742. n = ref_frgv(g->frgs, S.frg);
  743. idx = n->lnks[S.dir].idx;
  744. while(idx){
  745. f = ref_edgerefv(g->lrefs, idx);
  746. idx = f->next;
  747. e = ref_lnkv(g->lnks, f->idx);
  748. if(e->closed) continue;
  749. if(e->weak) continue;
  750. w = ref_frgv(g->frgs, f->flg? e->frg1: e->frg2);
  751. if(w->bt_visit == vst) continue;
  752. w->bt_visit = vst;
  753. len = S.off + e->off + w->len;
  754. if(len >= max_len){
  755. return len;
  756. }
  757. T.frg = f->flg? e->frg1: e->frg2;
  758. T.dir = f->flg? !e->dir1 : e->dir2;
  759. T.off = len;
  760. array_heap_push(heap->buffer, heap->size, heap->cap, path_t, T, num_cmp(a.off, b.off));
  761. }
  762. }
  763. return 0;
  764. }
  765. static inline int cal_offset_traces_graph(Graph *g, tracev *path, u8i beg, u8i end, int offset){
  766. trace_t *t;
  767. node_t *n;
  768. reg_t *r;
  769. edge_t *e;
  770. u8i i;
  771. int off;
  772. off = offset;
  773. for(i=beg;i<end;i++){
  774. t = ref_tracev(path, i);
  775. t->off = off;
  776. n = ref_nodev(g->nodes, t->node);
  777. r = ref_regv(g->regs, n->regs.idx);
  778. if(t->edges[t->dir].idx == EDGE_REF_NULL.idx){
  779. off += r->end - r->beg;
  780. } else {
  781. e = ref_edgev(g->edges, t->edges[t->dir].idx);
  782. off += r->end - r->beg + e->off;
  783. }
  784. }
  785. return off;
  786. }
  787. static inline int cal_offset_paths_graph(Graph *g, pathv *path, u8i beg, u8i end){
  788. path_t *t;
  789. frg_t *n;
  790. lnk_t *e;
  791. u8i i;
  792. int off;
  793. off = 0;
  794. for(i=beg;i<end;i++){
  795. t = ref_pathv(path, i);
  796. t->off = off;
  797. n = ref_frgv(g->frgs, t->frg);
  798. if(t->lnks[t->dir].idx == EDGE_REF_NULL.idx){
  799. off += n->length;
  800. } else {
  801. e = ref_lnkv(g->lnks, t->lnks[t->dir].idx);
  802. off += ((i == beg)? n->length : n->len) + e->off;
  803. }
  804. }
  805. return off;
  806. }
  807. static inline u8i true_linear_unique_trace_graph(Graph *g, tracev *path, u8i max_step, u8i visit, int *msg){
  808. trace_t *t;
  809. node_t *n;
  810. edge_t *e;
  811. edge_ref_t *f;
  812. u8i step;
  813. int dir, info;
  814. if(path->size == 0){
  815. if(msg) *msg = WT_TRACE_MSG_ZERO;
  816. return 0;
  817. }
  818. step = 0;
  819. if(msg) *msg = WT_TRACE_MSG_ONE;
  820. while(step < max_step){
  821. t = ref_tracev(path, path->size - 1);
  822. f = first_living_edge_graph(g, ref_nodev(g->nodes, t->node), !t->dir, &info);
  823. if(info == WT_TRACE_MSG_MORE){ if(path->size > 1) path->size --; if(msg) *msg = -1 - info; break; }
  824. n = ref_nodev(g->nodes, t->node);
  825. n->bt_visit = visit;
  826. f = first_living_edge_graph(g, ref_nodev(g->nodes, t->node), t->dir, &info);
  827. if(info == WT_TRACE_MSG_ZERO){ if(msg) *msg = info; break; }
  828. else if(info == WT_TRACE_MSG_MORE){ if(path->size > 1) path->size --; if(msg) *msg = info; break; }
  829. e = g->edges->buffer + f->idx;
  830. n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  831. if(n->bt_visit == visit){ if(msg) *msg = WT_TRACE_MSG_VISITED; break; }
  832. dir = f->flg? !e->dir1 : e->dir2;
  833. t->edges[t->dir] = *f;
  834. t = next_ref_tracev(path);
  835. t->node = n - g->nodes->buffer;
  836. t->dir = dir;
  837. t->edges[!t->dir] = (edge_ref_t){f->idx, !f->flg, 0};
  838. t->edges[t->dir] = EDGE_REF_NULL;
  839. step ++;
  840. }
  841. return step;
  842. }
  843. static inline u8i true_linear_unique_path_graph(Graph *g, pathv *path, u8i max_step, u8i visit, int *msg){
  844. path_t *t;
  845. frg_t *n;
  846. lnk_t *e;
  847. edge_ref_t *f;
  848. u8i step;
  849. int dir, info;
  850. if(path->size == 0){
  851. if(msg) *msg = WT_TRACE_MSG_ZERO;
  852. return 0;
  853. }
  854. if(msg) *msg = WT_TRACE_MSG_ONE;
  855. step = 0;
  856. while(step < max_step){
  857. t = ref_pathv(path, path->size - 1);
  858. f = first_living_lnk_graph(g, ref_frgv(g->frgs, t->frg), !t->dir, &info);
  859. if(info == WT_TRACE_MSG_MORE){ if(path->size > 1) path->size --; if(msg) *msg = -1 - info; break; }
  860. n = ref_frgv(g->frgs, t->frg);
  861. n->bt_visit = visit;
  862. f = first_living_lnk_graph(g, ref_frgv(g->frgs, t->frg), t->dir, &info);
  863. if(info == WT_TRACE_MSG_ZERO){ if(msg) *msg = info; break; }
  864. else if(info == WT_TRACE_MSG_MORE){ if(path->size > 1) path->size --; if(msg) *msg = info; break; }
  865. e = g->lnks->buffer + f->idx;
  866. n = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  867. if(n->bt_visit == visit){ if(msg) *msg = WT_TRACE_MSG_VISITED; break; }
  868. dir = f->flg? !e->dir1 : e->dir2;
  869. t->lnks[t->dir] = *f;
  870. t = next_ref_pathv(path);
  871. t->frg = n - g->frgs->buffer;
  872. t->dir = dir;
  873. t->lnks[!t->dir] = (edge_ref_t){f->idx, !f->flg, 0};
  874. t->lnks[t->dir] = EDGE_REF_NULL;
  875. step ++;
  876. }
  877. return step;
  878. }
  879. static inline u8i count_linear_trace_graph(Graph *g, trace_t *t, u8i max_step, int *msg){
  880. node_t *n;
  881. edge_t *e;
  882. edge_ref_t *f;
  883. u8i step;
  884. int dir, info;
  885. step = 0;
  886. if(msg) *msg = WT_TRACE_MSG_ONE;
  887. while(step < max_step){
  888. f = first_living_edge_graph(g, ref_nodev(g->nodes, t->node), t->dir, &info);
  889. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = info; break; }
  890. e = g->edges->buffer + f->idx;
  891. n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  892. dir = f->flg? !e->dir1 : e->dir2;
  893. t->node = n - g->nodes->buffer;
  894. t->dir = dir;
  895. f = first_living_edge_graph(g, n, !dir, &info);
  896. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = -1 - info; break; }
  897. step ++;
  898. }
  899. return step;
  900. }
  901. static inline int count_linear_path_graph(Graph *g, path_t *t, int max_len, int *msg){
  902. frg_t *n;
  903. lnk_t *e;
  904. edge_ref_t *f;
  905. int len;
  906. int dir, info;
  907. if(msg) *msg = WT_TRACE_MSG_ONE;
  908. len = ref_frgv(g->frgs, t->frg)->len;
  909. while(len < max_len){
  910. f = first_living_lnk_graph(g, ref_frgv(g->frgs, t->frg), t->dir, &info);
  911. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = info; break; }
  912. e = g->lnks->buffer + f->idx;
  913. len += e->off;
  914. n = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  915. dir = f->flg? !e->dir1 : e->dir2;
  916. t->frg = n - g->frgs->buffer;
  917. t->dir = dir;
  918. f = first_living_lnk_graph(g, n, !dir, &info);
  919. if(info != WT_TRACE_MSG_ONE){ if(msg) *msg = -1 - info; break; }
  920. len += n->len;
  921. }
  922. return len;
  923. }
  924. static inline u4i del_isolated_nodes_graph(Graph *g, FILE *log){
  925. node_t *n;
  926. u4i ret, i;
  927. int f, r;
  928. ret = 0;
  929. for(i=0;i<g->nodes->size;i++){
  930. n = ref_nodev(g->nodes, i);
  931. if(n->closed) continue;
  932. first_living_edge_graph(g, n, 0, &f);
  933. if(f != WT_TRACE_MSG_ZERO) continue;
  934. first_living_edge_graph(g, n, 1, &r);
  935. if(r != WT_TRACE_MSG_ZERO) continue;
  936. n->closed = 1;
  937. if(log) fprintf(log, "DEL_ISO\tN%u\n", i);
  938. ret ++;
  939. }
  940. return ret;
  941. }
  942. static inline u8i cut_binary_edges_graph(Graph *g){
  943. UUhash *hash;
  944. UUhash_t *u;
  945. node_t *n;
  946. edge_ref_t *f;
  947. edge_t *e, *p;
  948. u8i idx, nid, ret;
  949. ret = 0;
  950. hash = init_UUhash(15);
  951. for(nid=0;nid<g->nodes->size;nid++){
  952. n = ref_nodev(g->nodes, nid);
  953. if(n->closed) continue;
  954. clear_UUhash(hash);
  955. idx = n->edges[0].idx;
  956. while(idx){
  957. f = ref_edgerefv(g->erefs, idx);
  958. idx = f->next;
  959. e = ref_edgev(g->edges, f->idx);
  960. if(e->closed < WT_EDGE_CLOSED_LESS){
  961. put_UUhash(hash, (UUhash_t){f->flg? e->node1 : e->node2, f->idx});
  962. }
  963. }
  964. idx = n->edges[1].idx;
  965. while(idx){
  966. f = ref_edgerefv(g->erefs, idx);
  967. idx = f->next;
  968. e = ref_edgev(g->edges, f->idx);
  969. if(e->closed >= WT_EDGE_CLOSED_LESS) continue;
  970. if((u = get_UUhash(hash, f->flg? e->node1 : e->node2)) == NULL) continue;
  971. p = ref_edgev(g->edges, u->val);
  972. if(0){
  973. if(p->cov > e->cov) cut_edge_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  974. else cut_edge_core_graph(g, p, WT_EDGE_CLOSED_HARD);
  975. ret ++;
  976. } else {
  977. cut_edge_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  978. cut_edge_core_graph(g, p, WT_EDGE_CLOSED_HARD);
  979. ret += 2;
  980. }
  981. }
  982. }
  983. free_UUhash(hash);
  984. return ret;
  985. }
  986. static inline u8i cut_binary_lnks_graph(Graph *g, FILE *info){
  987. UUhash *hash;
  988. UUhash_t *u;
  989. frg_t *n;
  990. edge_ref_t *f;
  991. lnk_t *e, *p;
  992. u8i idx, nid, wid, ret;
  993. u4i k;
  994. int exists;
  995. ret = 0;
  996. hash = init_UUhash(15);
  997. for(nid=0;nid<g->frgs->size;nid++){
  998. n = ref_frgv(g->frgs, nid);
  999. if(n->closed) continue;
  1000. clear_UUhash(hash);
  1001. for(k=0;k<2;k++){
  1002. idx = n->lnks[k].idx;
  1003. while(idx){
  1004. f = ref_edgerefv(g->lrefs, idx);
  1005. idx = f->next;
  1006. e = ref_lnkv(g->lnks, f->idx);
  1007. if(e->closed != WT_EDGE_CLOSED_HARD){
  1008. wid = f->flg? e->frg1 : e->frg2;
  1009. u = prepare_UUhash(hash, wid, &exists);
  1010. if(exists){
  1011. p = ref_lnkv(g->lnks, u->val);
  1012. if(info){
  1013. fprintf(info, "BINARY_LINK\tF%d\t%c\tF%d\t%c\t%d\t%d\n", e->frg1, "+-"[e->dir1], e->frg2, "+-"[e->dir2], e->cov, e->off);
  1014. fprintf(info, "BINARY_LINK\tF%d\t%c\tF%d\t%c\t%d\t%d\n", p->frg1, "+-"[p->dir1], p->frg2, "+-"[p->dir2], p->cov, p->off);
  1015. }
  1016. if(p->cov < e->cov){
  1017. cut_lnk_core_graph(g, p, WT_EDGE_CLOSED_HARD);
  1018. u->val = f->idx;
  1019. } else if(p->cov > e->cov){
  1020. cut_lnk_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  1021. } else {
  1022. cut_lnk_core_graph(g, p, WT_EDGE_CLOSED_HARD);
  1023. cut_lnk_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  1024. ret ++;
  1025. }
  1026. ret ++;
  1027. } else {
  1028. u->key = wid;
  1029. u->val = f->idx;
  1030. }
  1031. }
  1032. }
  1033. }
  1034. }
  1035. free_UUhash(hash);
  1036. return ret;
  1037. }
  1038. static inline u8i cut_low_cov_lnks_graph(Graph *g, int low_cov){
  1039. frg_t *n;
  1040. edge_ref_t *f;
  1041. lnk_t *e;
  1042. u8i idx, nid, ret;
  1043. u4i k;
  1044. int max_cov;
  1045. ret = 0;
  1046. for(nid=0;nid<g->frgs->size;nid++){
  1047. n = ref_frgv(g->frgs, nid);
  1048. if(n->closed) continue;
  1049. for(k=0;k<2;k++){
  1050. max_cov = 0;
  1051. idx = n->lnks[k].idx;
  1052. while(idx){
  1053. f = ref_edgerefv(g->lrefs, idx);
  1054. idx = f->next;
  1055. e = ref_lnkv(g->lnks, f->idx);
  1056. if(e->cov > max_cov) max_cov = e->cov;
  1057. }
  1058. if(max_cov <= low_cov || max_cov < (int)g->min_edge_cov) continue;
  1059. idx = n->lnks[k].idx;
  1060. while(idx){
  1061. f = ref_edgerefv(g->lrefs, idx);
  1062. idx = f->next;
  1063. e = ref_lnkv(g->lnks, f->idx);
  1064. if(e->cov <= low_cov){
  1065. cut_lnk_core_graph(g, e, WT_EDGE_CLOSED_HARD);
  1066. ret ++;
  1067. }
  1068. }
  1069. }
  1070. }
  1071. return ret;
  1072. }
  1073. static inline u4i rescue_low_cov_transitive_edges_graph(Graph *g, u8i nid, u8v *edges, UUhash *hash){
  1074. node_t *n, *w, *v;
  1075. edge_ref_t *f, *f2, *f3;
  1076. edge_t *e, *e1, *e2;
  1077. reg_t *r;
  1078. UUhash_t *u;
  1079. u8i idx, nid2, nid3;
  1080. u4i i, k, k2, k3, k4, ret;
  1081. int off1, off2, yes;
  1082. n = ref_nodev(g->nodes, nid);
  1083. if(n->closed) return 0;
  1084. ret = 0;
  1085. for(k=0;k<2;k++){
  1086. clear_UUhash(hash);
  1087. clear_u8v(edges);
  1088. idx = n->edges[k].idx;
  1089. while(idx){
  1090. f = ref_edgerefv(g->erefs, idx);
  1091. e = ref_edgev(g->edges, f->idx);
  1092. //if(e->closed < WT_EDGE_CLOSED_LESS){
  1093. if(e->closed == 0){
  1094. put_UUhash(hash, (UUhash_t){f->flg? e->node1: e->node2, idx});
  1095. } else if(e->closed == WT_EDGE_CLOSED_LESS){
  1096. push_u8v(edges, idx);
  1097. }
  1098. idx = f->next;
  1099. }
  1100. if(edges->size <= 1) continue;
  1101. for(i=0;i<edges->size;i++){
  1102. f = ref_edgerefv(g->erefs, edges->buffer[i]);
  1103. e = ref_edgev(g->edges, f->idx);
  1104. //if(e->status) continue;
  1105. if(e->closed != WT_EDGE_CLOSED_LESS) continue;
  1106. if(f->flg){ nid2 = e->node1; k2 = !e->dir1; }
  1107. else { nid2 = e->node2; k2 = e->dir2; }
  1108. yes = 0;
  1109. w = ref_nodev(g->nodes, nid2);
  1110. idx = w->edges[k2].idx;
  1111. while(idx){
  1112. f2 = ref_edgerefv(g->erefs, idx);
  1113. idx = f2->next;
  1114. if(f2->flg){ nid3 = g->edges->buffer[f2->idx].node1; k3 = !g->edges->buffer[f2->idx].dir1; }
  1115. else { nid3 = g->edges->buffer[f2->idx].node2; k3 = g->edges->buffer[f2->idx].dir2; }
  1116. e2 = ref_edgev(g->edges, f2->idx);
  1117. //if(e2->closed >= WT_EDGE_CLOSED_LESS) continue;
  1118. if(e2->closed) continue;
  1119. if((u = get_UUhash(hash, nid3)) == NULL) continue;
  1120. f3 = ref_edgerefv(g->erefs, u->val);
  1121. e1 = ref_edgev(g->edges, f3->idx);
  1122. k4 = f3->flg? !e1->dir1 : e1->dir2;
  1123. if(k3 != k4) continue;
  1124. v = ref_nodev(g->nodes, nid3);
  1125. off1 = off2 = (g->regs->buffer[n->regs.idx].end - g->regs->buffer[n->regs.idx].beg) + (g->regs->buffer[v->regs.idx].end - g->regs->buffer[v->regs.idx].beg);
  1126. off1 += e1->off;
  1127. off2 += e->off + e2->off + (g->regs->buffer[w->regs.idx].end - g->regs->buffer[w->regs.idx].beg);
  1128. r = ref_regv(g->regs, w->regs.idx);
  1129. //if(e->off + e2->off + (r->end - r->beg) >= longest) continue;
  1130. if(num_diff(off1, off2) >= num_min(off1, off2)) continue;
  1131. yes = 1;
  1132. //revive_edge_graph(g, e);
  1133. e->flag |= (1 << f->flg);
  1134. ret ++;
  1135. break;
  1136. }
  1137. if(0){
  1138. if(yes) continue;
  1139. idx = w->edges[!k2].idx;
  1140. while(idx){
  1141. f2 = ref_edgerefv(g->erefs, idx);
  1142. idx = f2->next;
  1143. if(f2->flg){ nid3 = g->edges->buffer[f2->idx].node1; k3 = !g->edges->buffer[f2->idx].dir1; }
  1144. else { nid3 = g->edges->buffer[f2->idx].node2; k3 = g->edges->buffer[f2->idx].dir2; }
  1145. e2 = ref_edgev(g->edges, f2->idx);
  1146. if(e2->closed >= WT_EDGE_CLOSED_LESS) continue;
  1147. if((u = get_UUhash(hash, nid3)) == NULL) continue;
  1148. f3 = ref_edgerefv(g->erefs, u->val);
  1149. e1 = ref_edgev(g->edges, f3->idx);
  1150. k4 = f3->flg? !e1->dir1 : e1->dir2;
  1151. if(k3 != k4) continue;
  1152. v = ref_nodev(g->nodes, nid3);
  1153. off1 = off2 = (g->regs->buffer[n->regs.idx].end - g->regs->buffer[n->regs.idx].beg) + (g->regs->buffer[w->regs.idx].end - g->regs->buffer[w->regs.idx].beg);
  1154. off1 += e->off;
  1155. off2 += e1->off + e2->off + (g->regs->buffer[v->regs.idx].end - g->regs->buffer[v->regs.idx].beg);
  1156. r = ref_regv(g->regs, w->regs.idx);
  1157. if(num_diff(off1, off2) >= num_min(off1, off2)) continue;
  1158. yes = 1;
  1159. //revive_edge_graph(g, e);
  1160. e->flag |= (1 << f->flg);
  1161. ret ++;
  1162. break;
  1163. }
  1164. }
  1165. }
  1166. }
  1167. return ret;
  1168. }
  1169. static inline u8i rescue_low_cov_edges_graph(Graph *g){
  1170. u8v *edges;
  1171. UUhash *hash;
  1172. u8i i, nid, ret;
  1173. ret = 0;
  1174. edges = init_u8v(32);
  1175. hash = init_UUhash(13);
  1176. for(i=0;i<g->edges->size;i++) g->edges->buffer[i].flag = 0;
  1177. for(nid=0;nid<g->nodes->size;nid++){
  1178. rescue_low_cov_transitive_edges_graph(g, nid, edges, hash);
  1179. }
  1180. for(i=0;i<g->edges->size;i++){
  1181. //if(g->edges->buffer[i].flag == 3){
  1182. if(g->edges->buffer[i].flag == 3){
  1183. revive_edge_graph(g, g->edges->buffer + i);
  1184. ret ++;
  1185. }
  1186. g->edges->buffer[i].flag = 0;
  1187. }
  1188. free_u8v(edges);
  1189. free_UUhash(hash);
  1190. return ret;
  1191. }
  1192. static inline u8i rescue_high_cov_edges_graph(Graph *g, u4i max_step, u4i cov_cutoff){
  1193. tracev *path;
  1194. trace_t *t;
  1195. node_t *v, *w;
  1196. edge_t *e;
  1197. edge_ref_t *f;
  1198. u8i node, vst, idx, fidx[3], ret;
  1199. u4i k, d, i, cov[2];
  1200. for(node=0;node<g->nodes->size;node++){
  1201. v = ref_nodev(g->nodes, node);
  1202. v->bt_visit = 0;
  1203. }
  1204. ret = 0;
  1205. vst = 0;
  1206. path = init_tracev(8);
  1207. for(node=0;node<g->nodes->size;node++){
  1208. v = ref_nodev(g->nodes, node);
  1209. if(v->closed) continue;
  1210. for(k=0;k<2;k++){
  1211. if(v->edges[k].cnt != 1) continue;
  1212. idx = v->edges[k].idx;
  1213. cov[0] = cov[1] = 0;
  1214. fidx[0] = fidx[1] = fidx[2] = 0;
  1215. while(idx){
  1216. f = ref_edgerefv(g->erefs, idx);
  1217. idx = f->next;
  1218. e = ref_edgev(g->edges, f->idx);
  1219. if(e->closed){
  1220. if(e->cov > cov[1]){
  1221. cov[1] = e->cov;
  1222. fidx[1] = offset_edgerefv(g->erefs, f);
  1223. }
  1224. } else {
  1225. cov[0] = e->cov;
  1226. fidx[0] = offset_edgerefv(g->erefs, f);
  1227. if(cov[0] >= cov_cutoff) break;
  1228. }
  1229. }
  1230. if(cov[0] >= cov_cutoff){
  1231. continue;
  1232. }
  1233. if(cov[0] >= cov[1]){
  1234. continue;
  1235. }
  1236. clear_tracev(path);
  1237. t = next_ref_tracev(path);
  1238. t->node = node;
  1239. t->dir = k;
  1240. linear_trace_graph(g, path, max_step, NULL);
  1241. if(path->size <= 2){
  1242. continue;
  1243. }
  1244. vst ++;
  1245. for(i=1;i<path->size;i++){
  1246. t = ref_tracev(path, i);
  1247. w = ref_nodev(g->nodes, t->node);
  1248. w->bt_visit = vst;
  1249. }
  1250. cov[1] = 0;
  1251. fidx[1] = 0;
  1252. idx = v->edges[k].idx;
  1253. while(idx){
  1254. f = ref_edgerefv(g->erefs, idx);
  1255. idx = f->next;
  1256. e = ref_edgev(g->edges, f->idx);
  1257. if(e->closed == 0) continue;
  1258. w = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1259. if(w->bt_visit == vst){
  1260. if(e->cov > cov[0]){
  1261. if(e->cov > cov[1]){
  1262. cov[1] = e->cov;
  1263. fidx[1] = offset_edgerefv(g->erefs, f);
  1264. }
  1265. }
  1266. }
  1267. }
  1268. if(fidx[1] == 0){
  1269. continue;
  1270. }
  1271. f = ref_edgerefv(g->erefs, fidx[1]);
  1272. e = ref_edgev(g->edges, f->idx);
  1273. if(f->flg){
  1274. w = ref_nodev(g->nodes, e->node1);
  1275. d = e->dir1;
  1276. } else {
  1277. w = ref_nodev(g->nodes, e->node2);
  1278. d = !e->dir2;
  1279. }
  1280. f = first_living_edge_graph(g, w, d, NULL); // assert f != NULL
  1281. if(f == NULL){
  1282. continue;
  1283. }
  1284. fidx[2] = offset_edgerefv(g->erefs, f);
  1285. {
  1286. e = ref_edgev(g->edges, ref_edgerefv(g->erefs, fidx[0])->idx);
  1287. cut_edge_graph(g, e);
  1288. e = ref_edgev(g->edges, ref_edgerefv(g->erefs, fidx[2])->idx);
  1289. cut_edge_graph(g, e);
  1290. e = ref_edgev(g->edges, ref_edgerefv(g->erefs, fidx[1])->idx);
  1291. revive_edge_graph(g, e);
  1292. ret ++;
  1293. }
  1294. }
  1295. }
  1296. free_tracev(path);
  1297. return ret;
  1298. }
  1299. static inline u8i cut_relative_low_cov_edges_graph(Graph *g, float lowf){
  1300. node_t *n;
  1301. edge_t *e;
  1302. edge_ref_t *f;
  1303. u4v *ecovs, *emaxs;
  1304. u8i idx, x, ret;
  1305. u4i k, max, min, val, i;
  1306. for(idx=0;idx<g->edges->size;idx++){
  1307. e = ref_edgev(g->edges, idx);
  1308. e->flag = 0;
  1309. }
  1310. ecovs = init_u4v(64);
  1311. emaxs = init_u4v(64);
  1312. ret = 0;
  1313. for(idx=0;idx<g->nodes->size;idx++){
  1314. n = ref_nodev(g->nodes, idx);
  1315. if(n->closed) continue;
  1316. clear_u4v(ecovs);
  1317. max = 0;
  1318. min = WT_MAX_EDGE_COV;
  1319. for(k=0;k<2;k++){
  1320. x = n->edges[k].idx;
  1321. while(x){
  1322. f = ref_edgerefv(g->erefs, x);
  1323. x = f->next;
  1324. e = ref_edgev(g->edges, f->idx);
  1325. if(e->closed) continue;
  1326. if(e->cov > max) max = e->cov;
  1327. if(e->cov < min) min = e->cov;
  1328. push_u4v(ecovs, (Int(e->off) << 12) | (e->cov));
  1329. }
  1330. }
  1331. if(min >= max * lowf){
  1332. continue;
  1333. }
  1334. min = max * lowf;
  1335. sort_array(ecovs->buffer, ecovs->size, u4i, num_cmpgt(Int(a) >> 12, Int(b) >> 12));
  1336. clear_u4v(emaxs);
  1337. max = 0;
  1338. for(i=ecovs->size;i>0;i--){
  1339. val = ecovs->buffer[i - 1] & 0xFFF;
  1340. if(val > max) max = val;
  1341. push_u4v(emaxs, max);
  1342. }
  1343. reverse_u4v(emaxs);
  1344. for(k=0;k<2;k++){
  1345. x = n->edges[k].idx;
  1346. while(x){
  1347. f = ref_edgerefv(g->erefs, x);
  1348. x = f->next;
  1349. e = ref_edgev(g->edges, f->idx);
  1350. if(e->closed) continue;
  1351. if(e->cov >= min) continue;
  1352. for(i=0;i<ecovs->size;i++){
  1353. if(e->off >= (Int(ecovs->buffer[i]) >> 12)){
  1354. break;
  1355. }
  1356. }
  1357. if(i == ecovs->size) continue; // Never happen
  1358. if(e->cov < lowf * emaxs->buffer[i]){
  1359. cut_edge_graph(g, e);
  1360. ret ++;
  1361. }
  1362. }
  1363. }
  1364. }
  1365. free_u4v(ecovs);
  1366. free_u4v(emaxs);
  1367. return ret;
  1368. }
  1369. static inline u4i rescue_low_cov_tip_edges_core(Graph *g, u8i nid){
  1370. node_t *n, *w, *ww;
  1371. edge_t *e, *ee;
  1372. edge_ref_t *f;
  1373. u8i idx, wid;
  1374. u4i k, dir, ret;
  1375. n = ref_nodev(g->nodes, nid);
  1376. if(n->closed) return 0;
  1377. if(n->edges[0].cnt == 0 && n->edges[1].cnt == 0) return 0;
  1378. ret = 0;
  1379. for(k=0;k<2;k++){
  1380. if(n->edges[k].cnt) continue;
  1381. idx = n->edges[k].idx;
  1382. ee = NULL;
  1383. ww = NULL;
  1384. while(idx){
  1385. f = ref_edgerefv(g->erefs, idx);
  1386. idx = f->next;
  1387. e = ref_edgev(g->edges, f->idx);
  1388. if(f->flg){
  1389. wid = e->node1; dir = !e->dir1;
  1390. } else {
  1391. wid = e->node2; dir = e->dir2;
  1392. }
  1393. w = ref_nodev(g->nodes, wid);
  1394. //if(w->edges[!dir].cnt) continue;
  1395. if(w->edges[dir].cnt == 0) continue;
  1396. if(ee == NULL || e->cov > ee->cov || (e->cov == ee->cov && w->regs.cnt > ww->regs.cnt)){ ee = e; ww = w; }
  1397. }
  1398. if(ee){ revive_edge_graph(g, ee); ret ++; }
  1399. }
  1400. return ret;
  1401. }
  1402. static inline u8i rescue_low_cov_tip_edges_graph(Graph *g){
  1403. u8v *edges;
  1404. UUhash *hash;
  1405. u8i nid, ret;
  1406. ret = 0;
  1407. edges = init_u8v(32);
  1408. hash = init_UUhash(13);
  1409. for(nid=0;nid<g->nodes->size;nid++){
  1410. ret += rescue_low_cov_tip_edges_core(g, nid);
  1411. }
  1412. free_u8v(edges);
  1413. free_UUhash(hash);
  1414. return ret;
  1415. }
  1416. static inline int rescue_mercy_edge_core_graph(Graph *g, u4i rid, BitVec *tips[2]){
  1417. read_t *rd;
  1418. edge_t *e;
  1419. edge_ref_t *f;
  1420. reg_t *r, *s;
  1421. u8i idx;
  1422. int c, d, ret;
  1423. rd = ref_readv(g->reads, rid);
  1424. if(rd->regs.cnt < 2) return 0;
  1425. idx = rd->regs.idx;
  1426. s = NULL;
  1427. c = 0;
  1428. ret = 0;
  1429. while(idx){
  1430. r = ref_regv(g->regs, idx);
  1431. idx = r->read_link;
  1432. if(r->closed) continue;
  1433. if(!(r->beg >= rd->clps[0] && r->end <= rd->clps[1])) continue;
  1434. if(g->nodes->buffer[r->node].closed) continue;
  1435. d = 0;
  1436. if(get_bitvec(tips[0], r->node)){
  1437. d |= 1;
  1438. }
  1439. if(get_bitvec(tips[1], r->node)){
  1440. d |= 2;
  1441. }
  1442. if(d == 3) continue;
  1443. if(s){
  1444. if(d == (1 << (!r->dir))){
  1445. f = edge_node2node_graph(g, s->node, (0b100 >> c) & 0x01, r->node, !((0b100 >> d) & 0x01));
  1446. if(f){
  1447. e = ref_edgev(g->edges, f->idx);
  1448. revive_edge_graph(g, e);
  1449. zero_bitvec(tips[e->dir1], e->node1);
  1450. zero_bitvec(tips[!e->dir2], e->node1);
  1451. ret ++;
  1452. #ifdef __DEBUG__
  1453. } else {
  1454. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  1455. #endif
  1456. }
  1457. s = NULL;
  1458. continue;
  1459. } else {
  1460. s = NULL;
  1461. }
  1462. }
  1463. if(d == (1 << (r->dir))){
  1464. s = r;
  1465. c = d;
  1466. }
  1467. }
  1468. return ret;
  1469. }
  1470. static inline u8i rescue_mercy_edges_graph(Graph *g){
  1471. BitVec *tips[2];
  1472. node_t *n;
  1473. u8i i, ret;
  1474. ret = 0;
  1475. tips[0] = init_bitvec(g->nodes->size);
  1476. tips[1] = init_bitvec(g->nodes->size);
  1477. for(i=0;i<g->nodes->size;i++){
  1478. n = ref_nodev(g->nodes, i);
  1479. if(n->closed) continue;
  1480. if(n->edges[0].cnt == 0) one_bitvec(tips[0], i);
  1481. if(n->edges[1].cnt == 0) one_bitvec(tips[1], i);
  1482. }
  1483. for(i=0;i<g->reads->size;i++){
  1484. ret += rescue_mercy_edge_core_graph(g, i, tips);
  1485. }
  1486. free_bitvec(tips[0]);
  1487. free_bitvec(tips[1]);
  1488. return ret;
  1489. }
  1490. static inline u4i rescue_weak_tip_lnks_core(Graph *g, u8i nid){
  1491. frg_t *n, *w, *ww;
  1492. lnk_t *e, *ee;
  1493. edge_ref_t *f;
  1494. u8i idx, wid;
  1495. u4i k, dir, ret;
  1496. n = ref_frgv(g->frgs, nid);
  1497. if(n->closed) return 0;
  1498. if(n->lnks[0].cnt == 0 && n->lnks[1].cnt == 0) return 0;
  1499. ret = 0;
  1500. for(k=0;k<2;k++){
  1501. if(n->lnks[k].cnt) continue;
  1502. idx = n->lnks[k].idx;
  1503. ee = NULL;
  1504. ww = NULL;
  1505. while(idx){
  1506. f = ref_edgerefv(g->lrefs, idx);
  1507. idx = f->next;
  1508. e = ref_lnkv(g->lnks, f->idx);
  1509. if(e->closed) continue;
  1510. if(e->weak == 0) continue;
  1511. if(f->flg){
  1512. wid = e->frg1; dir = !e->dir1;
  1513. } else {
  1514. wid = e->frg2; dir = e->dir2;
  1515. }
  1516. w = ref_frgv(g->frgs, wid);
  1517. if(w->lnks[!dir].cnt) continue;
  1518. if(ee == NULL){ ee = e; ww = w; }
  1519. else { ee = NULL; break; }
  1520. }
  1521. if(ee){ revive_lnk_graph(g, ee); ret ++; }
  1522. }
  1523. return ret;
  1524. }
  1525. static inline u8i rescue_weak_tip_lnks2_graph(Graph *g){
  1526. u8i nid, ret;
  1527. ret = 0;
  1528. for(nid=0;nid<g->frgs->size;nid++){
  1529. ret += rescue_weak_tip_lnks_core(g, nid);
  1530. }
  1531. return ret;
  1532. }
  1533. static inline u8i rescue_weak_tip_lnks_graph(Graph *g){
  1534. pathv *heap;
  1535. u8v *weaks[2];
  1536. frg_t *n;
  1537. lnk_t *e;
  1538. edge_ref_t *f;
  1539. u8i nid, i, vst, idx, eidx, ret;
  1540. u4i k;
  1541. int max_len;
  1542. weaks[0] = init_u8v(g->frgs->size);
  1543. weaks[1] = init_u8v(g->frgs->size);
  1544. ret = 0;
  1545. for(nid=0;nid<g->frgs->size;nid++){
  1546. n = ref_frgv(g->frgs, nid);
  1547. n->bt_visit = 0;
  1548. }
  1549. heap = init_pathv(32);
  1550. vst = 1;
  1551. for(nid=0;nid<g->frgs->size;nid++){
  1552. n = ref_frgv(g->frgs, nid);
  1553. for(k=0;k<2;k++){
  1554. if(n->lnks[k].cnt){
  1555. eidx = 0;
  1556. } else {
  1557. idx = n->lnks[k].idx;
  1558. eidx = 0;
  1559. while(idx){
  1560. f = ref_edgerefv(g->lrefs, idx);
  1561. e = ref_lnkv(g->lnks, f->idx);
  1562. if(e->closed && e->weak){
  1563. if(eidx == 0) eidx = f->idx;
  1564. else eidx = MAX_VALUE_U8;
  1565. }
  1566. idx = f->next;
  1567. }
  1568. }
  1569. if(eidx != 0 && eidx != MAX_VALUE_U8){
  1570. e = ref_lnkv(g->lnks, eidx);
  1571. max_len = wander_path_graph(g, nid, k, heap, vst, e->off);
  1572. if(max_len >= e->off){
  1573. eidx |= 1LLU << 63;
  1574. }
  1575. }
  1576. push_u8v(weaks[k], eidx);
  1577. }
  1578. }
  1579. free_pathv(heap);
  1580. for(k=0;k<2;k++){
  1581. for(i=0;i<weaks[k]->size;i++){
  1582. if(weaks[k]->buffer[i] == 0 || weaks[k]->buffer[i] == MAX_VALUE_U8) continue;
  1583. e = ref_lnkv(g->lnks, weaks[k]->buffer[i] & (MAX_VALUE_U8 >> 1));
  1584. if(i != e->frg1) continue;
  1585. if((weaks[0]->buffer[e->frg2] & (MAX_VALUE_U8 >> 1)) == (weaks[k]->buffer[i] & (MAX_VALUE_U8 >> 1))){
  1586. if(!(weaks[0]->buffer[e->frg2] & (1LLU < 63)) || !(weaks[k]->buffer[i] & (1LLU << 63))){
  1587. ret ++;
  1588. revive_lnk_graph(g, e);
  1589. }
  1590. }
  1591. if((weaks[1]->buffer[e->frg2] & (MAX_VALUE_U8 >> 1)) == (weaks[k]->buffer[i] & (MAX_VALUE_U8 >> 1))){
  1592. if(!(weaks[1]->buffer[e->frg2] & (1LLU < 63)) || !(weaks[k]->buffer[i] & (1LLU << 63))){
  1593. ret ++;
  1594. revive_lnk_graph(g, e);
  1595. }
  1596. }
  1597. }
  1598. }
  1599. free_u8v(weaks[0]);
  1600. free_u8v(weaks[1]);
  1601. return ret;
  1602. }
  1603. static inline int _scoring_edge_orders(Graph *g, u8i fidx){
  1604. edge_ref_t *f;
  1605. edge_t *e;
  1606. node_t *n;
  1607. int score;
  1608. f = ref_edgerefv(g->erefs, fidx);
  1609. e = ref_edgev(g->edges, f->idx);
  1610. n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1611. score = e->off + (n->regs.cnt * -5) + (e->cov * -5);
  1612. return score;
  1613. }
  1614. static inline u4i reduce_transitive_edges_core_graph(Graph *g, u8i nid, u8v *edges, UUhash *hash, u4i closed_val){
  1615. node_t *n, *w, *v;
  1616. edge_ref_t *f, *f2, *f3;
  1617. edge_t *e, *e1, *e2;
  1618. UUhash_t *u;
  1619. u8i idx, nid2, nid3;
  1620. u4i i, k, k2, k3, k4, ret;
  1621. int off1, off2;
  1622. n = ref_nodev(g->nodes, nid);
  1623. if(n->closed) return 0;
  1624. ret = 0;
  1625. for(k=0;k<2;k++){
  1626. clear_UUhash(hash);
  1627. clear_u8v(edges);
  1628. idx = n->edges[k].idx;
  1629. while(idx){
  1630. f = ref_edgerefv(g->erefs, idx);
  1631. e = ref_edgev(g->edges, f->idx);
  1632. if(e->closed < closed_val){
  1633. put_UUhash(hash, (UUhash_t){f->flg? e->node1: e->node2, (idx << 1) | e->closed});
  1634. push_u8v(edges, idx);
  1635. }
  1636. idx = f->next;
  1637. }
  1638. if(edges->size <= 1) continue;
  1639. // sort the edges by composition of e->off and e->cov
  1640. //sort_array(edges->buffer, edges->size, u8i, num_cmpgt(_scoring_edge_orders(g, a), _scoring_edge_orders(g, b)));
  1641. for(i=0;i<edges->size;i++){
  1642. f = ref_edgerefv(g->erefs, edges->buffer[i]);
  1643. e = ref_edgev(g->edges, f->idx);
  1644. if(f->flg){ nid2 = e->node1; k2 = !e->dir1; }
  1645. else { nid2 = e->node2; k2 = e->dir2; }
  1646. w = ref_nodev(g->nodes, nid2);
  1647. idx = w->edges[k2].idx;
  1648. while(idx){
  1649. f2 = ref_edgerefv(g->erefs, idx);
  1650. idx = f2->next;
  1651. if(f2->flg){ nid3 = g->edges->buffer[f2->idx].node1; k3 = !g->edges->buffer[f2->idx].dir1; }
  1652. else { nid3 = g->edges->buffer[f2->idx].node2; k3 = g->edges->buffer[f2->idx].dir2; }
  1653. e2 = ref_edgev(g->edges, f2->idx);
  1654. //if(e2->closed) continue;
  1655. if(e2->closed >= closed_val) continue;
  1656. if((u = get_UUhash(hash, nid3)) == NULL) continue;
  1657. if(u->val & 0x01) continue; // already deleted
  1658. f3 = ref_edgerefv(g->erefs, u->val >> 1);
  1659. e1 = ref_edgev(g->edges, f3->idx);
  1660. k4 = f3->flg? !e1->dir1 : e1->dir2;
  1661. if(k3 != k4) continue;
  1662. v = ref_nodev(g->nodes, nid3);
  1663. off1 = off2 = (g->regs->buffer[n->regs.idx].end - g->regs->buffer[n->regs.idx].beg) + (g->regs->buffer[v->regs.idx].end - g->regs->buffer[v->regs.idx].beg);
  1664. off1 += e1->off;
  1665. off2 += e->off + e2->off + (g->regs->buffer[w->regs.idx].end - g->regs->buffer[w->regs.idx].beg);
  1666. // check whether off1 and off2 diff too much
  1667. if(num_diff(off1, off2) >= num_min(off1, off2)) continue;
  1668. u->val |= 1;
  1669. }
  1670. }
  1671. reset_iter_UUhash(hash);
  1672. while((u = ref_iter_UUhash(hash))){
  1673. if(u->val & 0x01){
  1674. e = ref_edgev(g->edges, ref_edgerefv(g->erefs, u->val >> 1)->idx);
  1675. if(e->closed == WT_EDGE_CLOSED_NULL){
  1676. cut_edge_graph(g, e);
  1677. ret ++;
  1678. }
  1679. }
  1680. }
  1681. }
  1682. return ret;
  1683. }
  1684. static inline u8i reduce_transitive_edges_graph(Graph *g){
  1685. u8v *edges;
  1686. UUhash *hash;
  1687. u8i nid, ret;
  1688. ret = 0;
  1689. edges = init_u8v(32);
  1690. hash = init_UUhash(13);
  1691. for(nid=0;nid<g->nodes->size;nid++){
  1692. ret += reduce_transitive_edges_core_graph(g, nid, edges, hash, 2);
  1693. }
  1694. free_u8v(edges);
  1695. free_UUhash(hash);
  1696. return ret;
  1697. }
  1698. static inline void set_init_ends_graph(Graph *g){
  1699. node_t *n;
  1700. u8i nid;
  1701. for(nid=0;nid<g->nodes->size;nid++){
  1702. n = ref_nodev(g->nodes, nid);
  1703. if(n->edges[0].cnt == 0 || n->edges[1].cnt == 0){
  1704. n->init_end = 1;
  1705. }
  1706. }
  1707. }
  1708. // node_t->unvisit is used to indicate vacant, inplay and eliminated
  1709. static inline u4i myers_transitive_reduction_core_graph(Graph *g, u8i nid, float _fuzz, int closed){
  1710. node_t *n, *w, *x;
  1711. edge_ref_t *f, *f2;
  1712. edge_t *e, *e2;
  1713. u8i idx, idx2;
  1714. u4i k, k2, ret;
  1715. int longest, len, fuzz;
  1716. n = ref_nodev(g->nodes, nid);
  1717. if(n->closed) return 0;
  1718. ret = 0;
  1719. for(k=0;k<2;k++){
  1720. longest = 0;
  1721. idx = n->edges[k].idx;
  1722. while(idx){
  1723. f = ref_edgerefv(g->erefs, idx);
  1724. e = ref_edgev(g->edges, f->idx);
  1725. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1726. if(e->closed <= closed){
  1727. w = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1728. w->unvisit = 1; // inplay
  1729. len = e->off + g->reglen;
  1730. if(len > longest) longest = len;
  1731. }
  1732. idx = f->next;
  1733. }
  1734. fuzz = (int)_fuzz;
  1735. if(longest * (_fuzz - fuzz) > fuzz) fuzz = longest * (_fuzz - fuzz);
  1736. longest += fuzz;
  1737. idx = n->edges[k].idx;
  1738. while(idx){
  1739. f = ref_edgerefv(g->erefs, idx);
  1740. e = ref_edgev(g->edges, f->idx);
  1741. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1742. if(e->closed <= closed){
  1743. w = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1744. if(w->unvisit == 1){
  1745. k2 = f->flg? !e->dir1 : e->dir2;
  1746. idx2 = w->edges[k2].idx;
  1747. while(idx2){
  1748. f2 = ref_edgerefv(g->erefs, idx2);
  1749. e2 = ref_edgev(g->edges, f2->idx);
  1750. // TODO: check the strand
  1751. //if(e2->closed == 0 && !(e2->flag & (1 << f2->flg))){
  1752. if(e2->closed <= closed){
  1753. x = ref_nodev(g->nodes, f2->flg? e2->node1 : e2->node2);
  1754. if(x->unvisit == 1){
  1755. len = e->off + e2->off + g->reglen;
  1756. if(len <= longest){
  1757. x->unvisit = 2; // eliminated
  1758. }
  1759. }
  1760. }
  1761. idx2 = f2->next;
  1762. }
  1763. }
  1764. }
  1765. idx = f->next;
  1766. }
  1767. idx = n->edges[k].idx;
  1768. while(idx){
  1769. f = ref_edgerefv(g->erefs, idx);
  1770. e = ref_edgev(g->edges, f->idx);
  1771. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1772. if(e->closed <= closed){
  1773. w = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1774. //if(w->unvisit == 1){
  1775. k2 = f->flg? !e->dir1 : e->dir2;
  1776. idx2 = w->edges[k2].idx;
  1777. while(idx2){
  1778. f2 = ref_edgerefv(g->erefs, idx2);
  1779. e2 = ref_edgev(g->edges, f2->idx);
  1780. //if(e2->closed == 0 && !(e2->flag & (1 << f2->flg))){
  1781. if(e2->closed <= closed){
  1782. x = ref_nodev(g->nodes, f2->flg? e2->node1 : e2->node2);
  1783. if(x->unvisit == 1){
  1784. if(e2->off <= fuzz || idx2 == w->edges[k2].idx){
  1785. x->unvisit = 2; // eliminated
  1786. }
  1787. }
  1788. }
  1789. idx2 = f2->next;
  1790. }
  1791. //}
  1792. }
  1793. idx = f->next;
  1794. }
  1795. idx = n->edges[k].idx;
  1796. while(idx){
  1797. f = ref_edgerefv(g->erefs, idx);
  1798. e = ref_edgev(g->edges, f->idx);
  1799. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1800. if(e->closed <= closed){
  1801. w = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  1802. if(w->unvisit == 2){
  1803. e->flag |= 1 << f->flg;
  1804. ret ++;
  1805. }
  1806. w->unvisit = 0;
  1807. }
  1808. idx = f->next;
  1809. }
  1810. }
  1811. return ret;
  1812. }
  1813. static inline u8i myers_transitive_reduction_graph(Graph *g, float fuzz){
  1814. node_t *n;
  1815. edge_t *e;
  1816. u8i nid, eid, ret;
  1817. for(nid=0;nid<g->nodes->size;nid++){
  1818. n = ref_nodev(g->nodes, nid);
  1819. n->unvisit = 0; // vacant
  1820. }
  1821. for(eid=0;eid<g->edges->size;eid++){
  1822. e = ref_edgev(g->edges, eid);
  1823. e->flag = 0;
  1824. }
  1825. for(nid=0;nid<g->nodes->size;nid++){
  1826. n = ref_nodev(g->nodes, nid);
  1827. if(n->closed) continue;
  1828. myers_transitive_reduction_core_graph(g, nid, fuzz, WT_EDGE_CLOSED_NULL);
  1829. }
  1830. ret = 0;
  1831. for(eid=0;eid<g->edges->size;eid++){
  1832. e = ref_edgev(g->edges, eid);
  1833. if(e->flag == 3){
  1834. cut_edge_graph(g, e);
  1835. ret ++;
  1836. }
  1837. e->flag = 0;
  1838. }
  1839. return ret;
  1840. }
  1841. static inline u4i myers_transitive_reduction_core_frg_graph(Graph *g, u8i nid, float _fuzz){
  1842. frg_t *n, *w, *x;
  1843. edge_ref_t *f, *f2;
  1844. lnk_t *e, *e2;
  1845. u8i idx, idx2;
  1846. u4i k, k2, ret;
  1847. int longest, len, fuzz;
  1848. n = ref_frgv(g->frgs, nid);
  1849. if(n->closed) return 0;
  1850. ret = 0;
  1851. for(k=0;k<2;k++){
  1852. longest = 0;
  1853. idx = n->lnks[k].idx;
  1854. while(idx){
  1855. f = ref_edgerefv(g->lrefs, idx);
  1856. e = ref_lnkv(g->lnks, f->idx);
  1857. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1858. if(e->closed == 0){
  1859. w = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  1860. w->unvisit = 1; // inplay
  1861. len = e->off + g->reglen;
  1862. if(len > longest) longest = len;
  1863. }
  1864. idx = f->next;
  1865. }
  1866. fuzz = (int)_fuzz;
  1867. if(longest * (_fuzz - fuzz) > fuzz) fuzz = longest * (_fuzz - fuzz);
  1868. longest += fuzz;
  1869. idx = n->lnks[k].idx;
  1870. while(idx){
  1871. f = ref_edgerefv(g->lrefs, idx);
  1872. e = ref_lnkv(g->lnks, f->idx);
  1873. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1874. if(e->closed == 0){
  1875. w = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  1876. if(w->unvisit == 1){
  1877. k2 = f->flg? !e->dir1 : e->dir2;
  1878. idx2 = w->lnks[k2].idx;
  1879. while(idx2){
  1880. f2 = ref_edgerefv(g->lrefs, idx2);
  1881. e2 = ref_lnkv(g->lnks, f2->idx);
  1882. // TODO: check the strand
  1883. //if(e2->closed == 0 && !(e2->flag & (1 << f2->flg))){
  1884. if(e2->closed == 0){
  1885. x = ref_frgv(g->frgs, f2->flg? e2->frg1 : e2->frg2);
  1886. if(x->unvisit == 1){
  1887. len = e->off + e2->off + g->reglen;
  1888. if(len <= longest){
  1889. x->unvisit = 2; // eliminated
  1890. }
  1891. }
  1892. }
  1893. idx2 = f2->next;
  1894. }
  1895. }
  1896. }
  1897. idx = f->next;
  1898. }
  1899. idx = n->lnks[k].idx;
  1900. while(idx){
  1901. f = ref_edgerefv(g->lrefs, idx);
  1902. e = ref_lnkv(g->lnks, f->idx);
  1903. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1904. if(e->closed == 0){
  1905. w = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  1906. //if(w->unvisit == 1){
  1907. k2 = f->flg? !e->dir1 : e->dir2;
  1908. idx2 = w->lnks[k2].idx;
  1909. while(idx2){
  1910. f2 = ref_edgerefv(g->lrefs, idx2);
  1911. e2 = ref_lnkv(g->lnks, f2->idx);
  1912. //if(e2->closed == 0 && !(e2->flag & (1 << f2->flg))){
  1913. if(e2->closed == 0){
  1914. x = ref_frgv(g->frgs, f2->flg? e2->frg1 : e2->frg2);
  1915. if(x->unvisit == 1){
  1916. if(e2->off <= fuzz || idx2 == w->lnks[k2].idx){
  1917. x->unvisit = 2; // eliminated
  1918. }
  1919. }
  1920. }
  1921. idx2 = f2->next;
  1922. }
  1923. //}
  1924. }
  1925. idx = f->next;
  1926. }
  1927. idx = n->lnks[k].idx;
  1928. while(idx){
  1929. f = ref_edgerefv(g->lrefs, idx);
  1930. e = ref_lnkv(g->lnks, f->idx);
  1931. //if(e->closed == 0 && !(e->flag & (1 << f->flg))){
  1932. if(e->closed == 0){
  1933. w = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  1934. if(w->unvisit == 2){
  1935. e->flag |= 1 << f->flg;
  1936. ret ++;
  1937. }
  1938. w->unvisit = 0;
  1939. }
  1940. idx = f->next;
  1941. }
  1942. }
  1943. return ret;
  1944. }
  1945. static inline u8i myers_transitive_reduction_frg_graph(Graph *g, float fuzz){
  1946. frg_t *n;
  1947. lnk_t *e;
  1948. u8i nid, eid, ret;
  1949. for(nid=0;nid<g->frgs->size;nid++){
  1950. n = ref_frgv(g->frgs, nid);
  1951. n->unvisit = 0; // vacant
  1952. }
  1953. for(eid=0;eid<g->lnks->size;eid++){
  1954. e = ref_lnkv(g->lnks, eid);
  1955. e->flag = 0;
  1956. }
  1957. for(nid=0;nid<g->frgs->size;nid++){
  1958. n = ref_frgv(g->frgs, nid);
  1959. if(n->closed) continue;
  1960. myers_transitive_reduction_core_frg_graph(g, nid, fuzz);
  1961. }
  1962. ret = 0;
  1963. for(eid=0;eid<g->lnks->size;eid++){
  1964. e = ref_lnkv(g->lnks, eid);
  1965. if(e->flag == 3){
  1966. cut_lnk_graph(g, e);
  1967. ret ++;
  1968. }
  1969. e->flag = 0;
  1970. }
  1971. return ret;
  1972. }
  1973. static inline u4i detach_repetitive_frg_core_graph(Graph *g, u8i nid, u4i max_dist, u8i visit, u8v *nds, u8v *heap){
  1974. frg_t *n, *w, *x;
  1975. edge_ref_t *f;
  1976. lnk_t *e;
  1977. u8i idx, wid, xid, eid;
  1978. u4i k, i, j, bidx, bcnt[2];
  1979. int off;
  1980. n = ref_frgv(g->frgs, nid);
  1981. n->bt_visit = visit;
  1982. clear_u8v(nds);
  1983. clear_u8v(heap);
  1984. for(k=0;k<2;k++){
  1985. bcnt[k] = 0;
  1986. idx = n->lnks[k].idx;
  1987. while(idx){
  1988. f = ref_edgerefv(g->lrefs, idx);
  1989. e = ref_lnkv(g->lnks, f->idx);
  1990. idx = f->next;
  1991. if(e->closed) continue;
  1992. wid = f->flg? e->frg1 : e->frg2;
  1993. w = ref_frgv(g->frgs, wid);
  1994. if(w->bt_visit == visit){
  1995. // TODO: remove the fprintf after debug
  1996. fprintf(stderr, " -- F%llu:%c -> F%llu:%c already visited in %s -- %s:%d --\n", nid, "+-"[k], wid, "+-"[w->rep_dir], __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  1997. return 0;
  1998. }
  1999. push_u8v(nds, wid);
  2000. w->bt_visit = visit;
  2001. w->rep_dir = f->flg? !e->dir1 : e->dir2;
  2002. w->bt_idx = (bcnt[k] << 1) | k;
  2003. bcnt[k] ++;
  2004. off = n->len / 2 + e->off;
  2005. if(off < 0) off = 0;
  2006. w->rep_idx = off + w->len; // rep_idx is used as dist
  2007. if(w->rep_idx < max_dist){
  2008. array_heap_push(heap->buffer, heap->size, heap->cap, u8i, wid, num_cmp(g->frgs->buffer[a].rep_idx, g->frgs->buffer[b].rep_idx));
  2009. }
  2010. }
  2011. }
  2012. if(bcnt[0] == 0 || bcnt[1] == 0) return 0;
  2013. if(bcnt[0] < 2 && bcnt[1] < 2) return 0;
  2014. // extending branches to max_dist in length
  2015. while(heap->size){
  2016. wid = heap->buffer[0];
  2017. array_heap_remove(heap->buffer, heap->size, heap->cap, u8i, 0, num_cmp(g->frgs->buffer[a].rep_idx, g->frgs->buffer[b].rep_idx));
  2018. w = ref_frgv(g->frgs, wid);
  2019. idx = w->lnks[w->rep_dir].idx;
  2020. while(idx){
  2021. f = ref_edgerefv(g->lrefs, idx);
  2022. e = ref_lnkv(g->lnks, f->idx);
  2023. idx = f->next;
  2024. if(e->closed) continue;
  2025. xid = f->flg? e->frg1 : e->frg2;
  2026. x = ref_frgv(g->frgs, xid);
  2027. if(x->bt_visit == visit) continue;
  2028. off = w->rep_idx + e->off;
  2029. if(off < 0) off = 0;
  2030. if(off > (int)max_dist) continue;
  2031. push_u8v(nds, xid);
  2032. x->bt_visit = visit;
  2033. x->rep_dir = f->flg? !e->dir1 : e->dir2;
  2034. x->bt_idx = w->bt_idx;
  2035. x->rep_idx = off + x->len;
  2036. if(x->rep_idx < max_dist){
  2037. array_heap_push(heap->buffer, heap->size, heap->cap, u8i, xid, num_cmp(g->frgs->buffer[a].rep_idx, g->frgs->buffer[b].rep_idx));
  2038. }
  2039. }
  2040. }
  2041. // find cross links
  2042. encap_and_zeros_u8v(heap, bcnt[0] * bcnt[1]); // reuse heap as matrix
  2043. for(i=0;i<nds->size;i++){
  2044. wid = nds->buffer[i];
  2045. w = ref_frgv(g->frgs, wid);
  2046. idx = w->lnks[!w->rep_dir].idx;
  2047. while(idx){
  2048. f = ref_edgerefv(g->lrefs, idx);
  2049. e = ref_lnkv(g->lnks, f->idx);
  2050. idx = f->next;
  2051. if(e->closed == WT_EDGE_CLOSED_HARD) continue;
  2052. xid = f->flg? e->frg1 : e->frg2;
  2053. if(xid == nid) continue;
  2054. x = ref_frgv(g->frgs, xid);
  2055. if(x->bt_visit != visit) continue;
  2056. if(((w->bt_idx ^ x->bt_idx) & 0x01) == 0) continue;
  2057. if(w->bt_idx & 0x01){
  2058. if(x->bt_idx & 0x01){
  2059. continue;
  2060. } else {
  2061. bidx = (x->bt_idx >> 1) * bcnt[1] + (w->bt_idx >> 1);
  2062. }
  2063. } else {
  2064. if(x->bt_idx & 0x01){
  2065. bidx = (w->bt_idx >> 1) * bcnt[1] + (x->bt_idx >> 1);
  2066. } else {
  2067. continue;
  2068. }
  2069. }
  2070. eid = heap->buffer[bidx];
  2071. if(eid == 0 || e->cov > g->lnks->buffer[eid].cov){
  2072. heap->buffer[bidx] = eid = f->idx;
  2073. }
  2074. }
  2075. }
  2076. // find one-left to one-right links
  2077. clear_u8v(nds); // will reuse nds to store bidx
  2078. for(i=0;i<bcnt[0];i++){
  2079. k = bcnt[1];
  2080. for(j=0;j<bcnt[1];j++){
  2081. if(heap->buffer[i * bcnt[1] + j]){
  2082. if(k < bcnt[1]){
  2083. k = bcnt[1];
  2084. break;
  2085. } else {
  2086. k = j;
  2087. }
  2088. }
  2089. }
  2090. for(j=0;k<bcnt[1]&&j<bcnt[0];j++){
  2091. if(j == i) continue;
  2092. if(heap->buffer[j * bcnt[1] + k]){
  2093. k = bcnt[1];
  2094. break;
  2095. }
  2096. }
  2097. if(k == bcnt[1]) continue;
  2098. push_u8v(nds, i * bcnt[1] + k);
  2099. }
  2100. // detach repeats
  2101. for(i=0;i<nds->size;i++){
  2102. //bid[0] = nds->buffer[i] / bcnt[0];
  2103. //bid[1] = nds->buffer[i] % bcnt[0];
  2104. for(k=0;k<2;k++){
  2105. idx = n->lnks[k].idx;
  2106. bidx = k? (nds->buffer[i] % bcnt[0]) : (nds->buffer[i] / bcnt[0]);
  2107. while(idx){
  2108. f = ref_edgerefv(g->lrefs, idx);
  2109. e = ref_lnkv(g->lnks, f->idx);
  2110. idx = f->next;
  2111. if(e->closed) continue;
  2112. wid = f->flg? e->frg1 : e->frg2;
  2113. w = ref_frgv(g->frgs, wid);
  2114. if((w->bt_idx >> 1) == bidx){
  2115. //cut_lnk_graph(g, e);
  2116. cut_lnk_core_graph(g, e, WT_EDGE_CLOSED_HARD); // in case of loop cut and revive
  2117. }
  2118. }
  2119. }
  2120. }
  2121. for(i=0;i<nds->size;i++){
  2122. eid = heap->buffer[nds->buffer[i]];
  2123. e = ref_lnkv(g->lnks, eid);
  2124. revive_lnk_graph(g, e);
  2125. }
  2126. return nds->size;
  2127. }
  2128. static inline u4i detach_repetitive_frg_graph(Graph *g, u4i max_dist){
  2129. u8v *nds;
  2130. u8v *heap;
  2131. frg_t *n;
  2132. u8i nid, visit;
  2133. u4i ret;
  2134. nds = init_u8v(32);
  2135. heap = init_u8v(32);
  2136. ret = 0;
  2137. for(nid=0;nid<g->frgs->size;nid++){
  2138. n = ref_frgv(g->frgs, nid);
  2139. n->bt_visit = 0;
  2140. }
  2141. visit = 0;
  2142. ret = 0;
  2143. for(nid=0;nid<g->frgs->size;nid++){
  2144. n = ref_frgv(g->frgs, nid);
  2145. if(n->lnks[0].cnt == 0 || n->lnks[1].cnt == 0) continue;
  2146. if(n->lnks[0].cnt < 2 && n->lnks[1].cnt < 2) continue;
  2147. ret += detach_repetitive_frg_core_graph(g, nid, max_dist, ++visit, nds, heap);
  2148. }
  2149. free_u8v(nds);
  2150. free_u8v(heap);
  2151. for(nid=0;nid<g->frgs->size;nid++){
  2152. n = ref_frgv(g->frgs, nid);
  2153. n->bt_visit = 0;
  2154. }
  2155. return ret;
  2156. }
  2157. static inline u4i reduce_transitive_lnks_core_graph(Graph *g, u8i nid, u8v *lnks, UUhash *hash, u4i closed_val){
  2158. frg_t *n, *w, *v;
  2159. edge_ref_t *f, *f2, *f3;
  2160. lnk_t *e, *e1, *e2;
  2161. UUhash_t *u;
  2162. u8i idx, nid2, nid3;
  2163. u4i i, k, k2, k3, k4, ret;
  2164. int off1, off2;
  2165. n = ref_frgv(g->frgs, nid);
  2166. if(n->closed) return 0;
  2167. ret = 0;
  2168. for(k=0;k<2;k++){
  2169. clear_UUhash(hash);
  2170. clear_u8v(lnks);
  2171. idx = n->lnks[k].idx;
  2172. while(idx){
  2173. f = ref_edgerefv(g->lrefs, idx);
  2174. e = ref_lnkv(g->lnks, f->idx);
  2175. if(e->closed < closed_val){
  2176. put_UUhash(hash, (UUhash_t){f->flg? e->frg1: e->frg2, (idx << 1) | e->closed});
  2177. push_u8v(lnks, idx);
  2178. }
  2179. idx = f->next;
  2180. }
  2181. if(lnks->size <= 1) continue;
  2182. // sort the lnks by composition of e->off and e->cov
  2183. //sort_array(lnks->buffer, lnks->size, u8i, num_cmpgt(_scoring_lnk_orders(g, a), _scoring_lnk_orders(g, b)));
  2184. for(i=0;i<lnks->size;i++){
  2185. f = ref_edgerefv(g->lrefs, lnks->buffer[i]);
  2186. e = ref_lnkv(g->lnks, f->idx);
  2187. if(f->flg){ nid2 = e->frg1; k2 = !e->dir1; }
  2188. else { nid2 = e->frg2; k2 = e->dir2; }
  2189. w = ref_frgv(g->frgs, nid2);
  2190. idx = w->lnks[k2].idx;
  2191. while(idx){
  2192. f2 = ref_edgerefv(g->lrefs, idx);
  2193. idx = f2->next;
  2194. if(f2->flg){ nid3 = g->lnks->buffer[f2->idx].frg1; k3 = !g->lnks->buffer[f2->idx].dir1; }
  2195. else { nid3 = g->lnks->buffer[f2->idx].frg2; k3 = g->lnks->buffer[f2->idx].dir2; }
  2196. e2 = ref_lnkv(g->lnks, f2->idx);
  2197. //if(e2->closed) continue;
  2198. if(e2->closed >= closed_val) continue;
  2199. if((u = get_UUhash(hash, nid3)) == NULL) continue;
  2200. if(u->val & 0x01) continue; // already deleted
  2201. f3 = ref_edgerefv(g->lrefs, u->val >> 1);
  2202. e1 = ref_lnkv(g->lnks, f3->idx);
  2203. k4 = f3->flg? !e1->dir1 : e1->dir2;
  2204. if(k3 != k4) continue;
  2205. v = ref_frgv(g->frgs, nid3);
  2206. off1 = off2 = n->len + v->len;
  2207. off1 += e1->off;
  2208. off2 += e->off + e2->off + w->len;
  2209. // check whether off1 and off2 diff too much
  2210. if(num_diff(off1, off2) >= num_min(off1, off2)) continue;
  2211. u->val |= 1;
  2212. }
  2213. }
  2214. reset_iter_UUhash(hash);
  2215. while((u = ref_iter_UUhash(hash))){
  2216. if(u->val & 0x01){
  2217. e = ref_lnkv(g->lnks, ref_edgerefv(g->lrefs, u->val >> 1)->idx);
  2218. if(e->closed == WT_EDGE_CLOSED_NULL){
  2219. cut_lnk_graph(g, e);
  2220. ret ++;
  2221. }
  2222. }
  2223. }
  2224. }
  2225. return ret;
  2226. }
  2227. static inline u8i reduce_transitive_lnks_graph(Graph *g){
  2228. u8v *lnks;
  2229. UUhash *hash;
  2230. u8i nid, ret;
  2231. ret = 0;
  2232. lnks = init_u8v(32);
  2233. hash = init_UUhash(13);
  2234. for(nid=0;nid<g->frgs->size;nid++){
  2235. ret += reduce_transitive_lnks_core_graph(g, nid, lnks, hash, 2);
  2236. }
  2237. free_u8v(lnks);
  2238. free_UUhash(hash);
  2239. return ret;
  2240. }
  2241. static inline u8i trim_tip_core_graph(Graph *g, uint16_t max_step, tracev *path, u8i nid, int hard_trim){
  2242. trace_t *t, T;
  2243. node_t *n;
  2244. edge_ref_t *f;
  2245. edge_t *e;
  2246. u8i ret, idx;
  2247. u4i i, dir, step, step2, found, n_in;
  2248. int msg1, msg2;
  2249. if(g->cut_tip == 0) return 0;
  2250. ret = 0;
  2251. n = ref_nodev(g->nodes, nid);
  2252. if(n->closed) return 0;
  2253. if(n->edges[0].cnt == 1 && n->edges[1].cnt == 0){
  2254. dir = 0;
  2255. } else if(n->edges[0].cnt == 0 && n->edges[1].cnt == 1){
  2256. dir = 1;
  2257. } else {
  2258. return 0;
  2259. }
  2260. clear_tracev(path);
  2261. t = next_ref_tracev(path);
  2262. t->node = nid;
  2263. t->edges[0] = EDGE_REF_NULL;
  2264. t->edges[1] = EDGE_REF_NULL;
  2265. t->dir = dir;
  2266. msg1 = WT_TRACE_MSG_ZERO;
  2267. step = linear_trace_graph(g, path, max_step, &msg1) + 1;
  2268. if(step > max_step) return 0;
  2269. //if(msg1 != -1 - WT_TRACE_MSG_MORE && msg1 != WT_TRACE_MSG_MORE) return 0;
  2270. if(msg1 == WT_TRACE_MSG_MORE){
  2271. if(!hard_trim) return 0;
  2272. path->size --;
  2273. } else if(msg1 == -1 - WT_TRACE_MSG_MORE){
  2274. t = ref_tracev(path, path->size); // please see linear_trace_graph
  2275. n = ref_nodev(g->nodes, t->node);
  2276. dir = !t->dir;
  2277. n_in = 0;
  2278. idx = n->edges[dir].idx;
  2279. found = 0;
  2280. while(idx){
  2281. f = ref_edgerefv(g->erefs, idx);
  2282. idx = f->next;
  2283. if(f->idx == t->edges[dir].idx) continue;
  2284. e = g->edges->buffer + f->idx;
  2285. if(e->closed) continue;
  2286. n_in ++;
  2287. T.node = f->flg? e->node1 : e->node2;
  2288. T.dir = f->flg? !e->dir1 : e->dir2;
  2289. step2 = count_linear_trace_graph(g, &T, step + 1, &msg2) + 1;
  2290. if(msg2 != WT_TRACE_MSG_ZERO) step2 ++;
  2291. if(step2 > step){
  2292. found = 1;
  2293. break;
  2294. } else if(step2 == step && (e->cov > g->edges->buffer[t->edges[dir].idx].cov || (step == 1 && e->cov == g->edges->buffer[t->edges[dir].idx].cov && e->off >= g->edges->buffer[t->edges[dir].idx].off))){
  2295. found = 1;
  2296. break;
  2297. }
  2298. //if(step2 + 1 >= step && msg2 != WT_TRACE_MSG_ZERO){ found = 1; break; }
  2299. }
  2300. if(!found) return 0;
  2301. } else return 0;
  2302. for(i=0;i<path->size;i++){
  2303. del_node_graph(g, ref_nodev(g->nodes, path->buffer[i].node));
  2304. //del_node_edges_graph(g, ref_nodev(g->nodes, path->buffer[i].node));
  2305. ret ++;
  2306. }
  2307. return ret;
  2308. }
  2309. static inline u8i trim_tips_graph(Graph *g, uint16_t max_step, int hard_trim){
  2310. tracev *path;
  2311. node_t *n;
  2312. u8i ret, nid;
  2313. ret = 0;
  2314. path = init_tracev(32);
  2315. for(nid=0;nid<g->nodes->size;nid++){
  2316. n = ref_nodev(g->nodes, nid);
  2317. if(n->closed) continue;
  2318. if(trim_tip_core_graph(g, max_step, path, nid, hard_trim)) ret ++;
  2319. }
  2320. free_tracev(path);
  2321. return ret;
  2322. }
  2323. // careful sharp_tip -> blunt_tip -> sharp_tip and so on
  2324. static inline u4i trim_blunt_tip_core_graph(Graph *g, u8i nid){
  2325. node_t *n;
  2326. int k;
  2327. if(g->cut_tip == 0) return 0;
  2328. n = ref_nodev(g->nodes, nid);
  2329. if(n->edges[0].cnt && n->edges[1].cnt) return 0;
  2330. k = (n->edges[0].cnt == 0);
  2331. if(n->edges[k].cnt < 2) return 0;
  2332. del_node_graph(g, n);
  2333. return 1;
  2334. }
  2335. static inline u8i trim_blunt_tips_graph(Graph *g){
  2336. node_t *n;
  2337. u8i ret, nid;
  2338. ret = 0;
  2339. for(nid=0;nid<g->nodes->size;nid++){
  2340. n = ref_nodev(g->nodes, nid);
  2341. if(n->closed) continue;
  2342. if(trim_blunt_tip_core_graph(g, nid)) ret ++;
  2343. }
  2344. return ret;
  2345. }
  2346. static inline u8i trim_frgtip_core_graph(Graph *g, int max_len, pathv *path, u8i nid){
  2347. path_t *t, T;
  2348. frg_t *n;
  2349. edge_ref_t *f;
  2350. lnk_t *e;
  2351. u8i ret, idx;
  2352. u4i i, dir, found, n_in;
  2353. int len, len2;
  2354. int msg1, msg2;
  2355. if(g->cut_tip == 0) return 0;
  2356. ret = 0;
  2357. n = ref_frgv(g->frgs, nid);
  2358. if(n->closed) return 0;
  2359. first_living_lnk_graph(g, n, 0, &msg1);
  2360. first_living_lnk_graph(g, n, 1, &msg2);
  2361. if(msg1 != WT_TRACE_MSG_ZERO){
  2362. if(msg2 != WT_TRACE_MSG_ZERO) return 0;
  2363. dir = 0;
  2364. } else if(msg2 != WT_TRACE_MSG_ZERO){
  2365. dir = 1;
  2366. } else return 0;
  2367. clear_pathv(path);
  2368. t = next_ref_pathv(path);
  2369. t->frg = nid;
  2370. t->lnks[0] = EDGE_REF_NULL;
  2371. t->lnks[1] = EDGE_REF_NULL;
  2372. t->dir = dir;
  2373. len = linear_path_graph(g, path, max_len, &msg1) + 1;
  2374. if(len > max_len) return 0;
  2375. //if(msg1 != -1 - WT_TRACE_MSG_MORE && msg1 != WT_TRACE_MSG_MORE) return 0;
  2376. if(msg1 == WT_TRACE_MSG_MORE){
  2377. path->size --;
  2378. return 0;
  2379. } else if(msg1 == -1 - WT_TRACE_MSG_MORE){
  2380. t = ref_pathv(path, path->size); // please see linear_path_graph
  2381. n = ref_frgv(g->frgs, t->frg);
  2382. dir = !t->dir;
  2383. n_in = 0;
  2384. idx = n->lnks[dir].idx;
  2385. found = 0;
  2386. while(idx){
  2387. f = ref_edgerefv(g->lrefs, idx);
  2388. idx = f->next;
  2389. if(f->idx == t->lnks[dir].idx) continue;
  2390. e = g->lnks->buffer + f->idx;
  2391. if(e->closed) continue;
  2392. n_in ++;
  2393. T.frg = f->flg? e->frg1 : e->frg2;
  2394. T.dir = f->flg? !e->dir1 : e->dir2;
  2395. len2 = count_linear_path_graph(g, &T, len + 1, &msg2) + 1;
  2396. if(msg2 != WT_TRACE_MSG_ZERO) len2 ++;
  2397. if(len2 >= len){ found = 1; break; }
  2398. }
  2399. if(!found) return 0;
  2400. } else return 0;
  2401. for(i=0;i<path->size;i++){
  2402. del_frg_lnks_graph(g, ref_frgv(g->frgs, path->buffer[i].frg));
  2403. ret ++;
  2404. }
  2405. return ret;
  2406. }
  2407. static inline u8i trim_frgtips_graph(Graph *g, int max_len){
  2408. pathv *path;
  2409. frg_t *n;
  2410. u8i ret, nid;
  2411. ret = 0;
  2412. path = init_pathv(32);
  2413. for(nid=0;nid<g->frgs->size;nid++){
  2414. n = ref_frgv(g->frgs, nid);
  2415. if(n->closed) continue;
  2416. if(trim_frgtip_core_graph(g, max_len, path, nid)) ret ++;
  2417. }
  2418. free_pathv(path);
  2419. return ret;
  2420. }
  2421. typedef struct {
  2422. node_t *n;
  2423. edge_t *e; // incoming edge
  2424. u8i dir:1, ind:1, step:8, bt:16, ending:16, keep:2;
  2425. int score:20;
  2426. } bt_t;
  2427. define_list(btv, bt_t);
  2428. #define WT_MAX_BTIDX 0xFFFF
  2429. static inline u4i pop_bubble_backtrace_graph(Graph *g, btv *bts, u4i idx){
  2430. bt_t *bt;
  2431. u4i ret;
  2432. while(idx){
  2433. bt = ref_btv(bts, idx);
  2434. bt->step = 0;
  2435. idx = bt->bt;
  2436. }
  2437. ret = 0;
  2438. for(idx=1;idx<bts->size;idx++){
  2439. bt = ref_btv(bts, idx);
  2440. if(bt->step == 0) continue;
  2441. cut_edge_graph(g, bt->e);
  2442. ret ++;
  2443. }
  2444. return ret;
  2445. }
  2446. typedef struct {
  2447. frg_t *n;
  2448. lnk_t *e; // incoming edge
  2449. u8i dir:1, ind:1, step:8, bt:16, ending:16, score:22;
  2450. } frg_bt_t;
  2451. define_list(frgbtv, frg_bt_t);
  2452. u4i pop_frg_bubble_backtrace_graph(Graph *g, frgbtv *bts, u4i idx){
  2453. frg_bt_t *bt;
  2454. u4i ret;
  2455. while(idx){
  2456. bt = ref_frgbtv(bts, idx);
  2457. bt->step = 0;
  2458. idx = bt->bt;
  2459. }
  2460. ret = 0;
  2461. for(idx=1;idx<bts->size;idx++){
  2462. bt = ref_frgbtv(bts, idx);
  2463. if(bt->step == 0) continue;
  2464. cut_lnk_graph(g, bt->e);
  2465. ret ++;
  2466. }
  2467. return ret;
  2468. }
  2469. static inline u4i pop_bubble2_backtrace_graph(Graph *g, btv *bts, u4i _idx){
  2470. bt_t *bt;
  2471. u4i ret, i, idx;
  2472. for(i=1;i<bts->size;i++){
  2473. bt = ref_btv(bts, i);
  2474. bt->keep = 2;
  2475. }
  2476. idx = _idx;
  2477. while(idx){
  2478. bt = ref_btv(bts, idx);
  2479. bt->keep = 1;
  2480. idx = bt->bt;
  2481. }
  2482. for(i=1;i<bts->size;i++){
  2483. bt = ref_btv(bts, i);
  2484. if(bt->keep == 1) continue;
  2485. if(bt->ending == 0) continue;
  2486. if(bts->buffer[bt->ending].keep == 1){
  2487. idx = i;
  2488. while(idx){
  2489. bt = ref_btv(bts, idx);
  2490. if(bt->keep != 2) break;
  2491. idx = bt->bt;
  2492. bt->keep = 0;
  2493. }
  2494. }
  2495. }
  2496. ret = 0;
  2497. for(idx=1;idx<bts->size;idx++){
  2498. bt = ref_btv(bts, idx);
  2499. if(bt->keep) continue;
  2500. cut_edge_graph(g, bt->e);
  2501. ret ++;
  2502. }
  2503. return ret;
  2504. }
  2505. static inline u4i pop_frg_bubble2_backtrace_graph(Graph *g, frgbtv *bts, u4i _idx){
  2506. frg_bt_t *bt;
  2507. u4i ret, i, idx;
  2508. for(i=1;i<bts->size;i++){
  2509. bt = ref_frgbtv(bts, i);
  2510. if(bt->ending == _idx){
  2511. idx = i;
  2512. while(idx){
  2513. bt = ref_frgbtv(bts, idx);
  2514. bt->step = 0;
  2515. idx = bt->bt;
  2516. }
  2517. }
  2518. }
  2519. idx = _idx;
  2520. while(idx){
  2521. bt = ref_frgbtv(bts, idx);
  2522. bt->step = 1;
  2523. idx = bt->bt;
  2524. }
  2525. ret = 0;
  2526. for(idx=1;idx<bts->size;idx++){
  2527. bt = ref_frgbtv(bts, idx);
  2528. if(bt->step != 0) continue;
  2529. cut_lnk_graph(g, bt->e);
  2530. ret ++;
  2531. }
  2532. return ret;
  2533. }
  2534. static inline u4i safe_cut_redundant_edges_graph(Graph *g, btv *bts, bt_t *b1, bt_t *b2){
  2535. u4i ret;
  2536. ret = 0;
  2537. if(0){
  2538. ret = 1;
  2539. cut_edge_graph(g, b1->e);
  2540. b1 = ref_btv(bts, b1->bt);
  2541. }
  2542. while(1){
  2543. if(b1->step >= b2->step){
  2544. if(b1 == b2) break;
  2545. ret ++;
  2546. cut_edge_graph(g, b1->e);
  2547. b1 = ref_btv(bts, b1->bt);
  2548. } else {
  2549. b2 = ref_btv(bts, b2->bt);
  2550. }
  2551. }
  2552. return ret;
  2553. }
  2554. static inline u4i safe_cut_redundant_lnks_graph(Graph *g, frgbtv *bts, frg_bt_t *b1, frg_bt_t *b2){
  2555. u4i ret;
  2556. ret = 0;
  2557. if(0){
  2558. ret = 1;
  2559. cut_lnk_graph(g, b1->e);
  2560. b1 = ref_frgbtv(bts, b1->bt);
  2561. }
  2562. while(1){
  2563. if(b1->step >= b2->step){
  2564. if(b1 == b2) break;
  2565. ret ++;
  2566. cut_lnk_graph(g, b1->e);
  2567. b1 = ref_frgbtv(bts, b1->bt);
  2568. } else {
  2569. b2 = ref_frgbtv(bts, b2->bt);
  2570. }
  2571. }
  2572. return ret;
  2573. }
  2574. static inline u4i pop_bubble_core_graph(Graph *g, uint16_t max_step, btv *bts, u4v *heap, u8i nid, u4i dir, u8i visit, int safe){
  2575. bt_t *bt, *tb;
  2576. node_t *n;
  2577. edge_ref_t *f;
  2578. edge_t *e;
  2579. u8i ret, idx;
  2580. u4i bidx, i, lst, unclosed;
  2581. ret = 0;
  2582. n = ref_nodev(g->nodes, nid);
  2583. if(n->closed) return 0;
  2584. if(count_living_edges_graph(g, n, dir) < 2) return 0;
  2585. clear_btv(bts);
  2586. next_ref_btv(bts);
  2587. bt = next_ref_btv(bts);
  2588. bt->n = n;
  2589. bt->e = NULL;
  2590. bt->dir = dir;
  2591. bt->ind = safe? 0 : 1;
  2592. bt->step = 0;
  2593. bt->bt = 0;
  2594. bt->score = 0;
  2595. bt->ending = 0;
  2596. clear_u4v(heap);
  2597. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, bts->size - 1, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2598. n->bt_visit = visit;
  2599. n->bt_idx = bts->size - 1;
  2600. n->single_in = 1;
  2601. unclosed = 0;
  2602. while(heap->size && heap->size < WT_MAX_BTIDX){
  2603. bidx = array_heap_pop(heap->buffer, heap->size, heap->cap, u4i, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2604. encap_btv(bts, bts->buffer[bidx].n->edges[bts->buffer[bidx].dir].cnt);
  2605. bt = ref_btv(bts, bidx);
  2606. if(bt->step >= max_step) return 0;
  2607. if(bt->ind && bt->n->single_in == 0) bt->ind = 0;
  2608. lst = bts->size;
  2609. idx = bt->n->edges[bt->dir].idx;
  2610. while(idx){
  2611. f = ref_edgerefv(g->erefs, idx);
  2612. idx = f->next;
  2613. e = g->edges->buffer + f->idx;
  2614. if(e->closed) continue;
  2615. tb = next_ref_btv(bts);
  2616. tb->n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  2617. if(tb->n == bts->buffer[1].n) return 0;
  2618. tb->e = e;
  2619. tb->dir = f->flg? !e->dir1 : e->dir2;
  2620. tb->step = bt->step + 1;
  2621. tb->bt = bidx;
  2622. tb->ind = 0;
  2623. //tb->score = bt->score + num_min(0, e->cov - 20); // set normal e->cov = 20
  2624. tb->score = bt->score + e->cov;
  2625. tb->ending = 0;
  2626. }
  2627. if(bt->ind && (bt->bt == 0 || lst + 1 == bts->size)){
  2628. for(i=lst;i<bts->size;i++) bts->buffer[i].ind = 1;
  2629. }
  2630. for(i=lst;i<bts->size;i++){
  2631. tb = ref_btv(bts, i);
  2632. if(tb->n->bt_visit != visit){
  2633. tb->n->bt_visit = visit;
  2634. tb->n->unvisit = count_living_edges_graph(g, tb->n, !tb->dir);
  2635. if(tb->n->unvisit == 1) tb->n->single_in = 1;
  2636. else tb->n->single_in = 0;
  2637. tb->n->bt_idx = i;
  2638. unclosed ++;
  2639. } else {
  2640. tb->ending = tb->n->bt_idx;
  2641. if(tb->dir == bts->buffer[tb->n->bt_idx].dir){
  2642. if(tb->ind && bts->buffer[tb->n->bt_idx].ind){
  2643. if(tb->step == bts->buffer[tb->n->bt_idx].step){
  2644. return safe_cut_redundant_edges_graph(g, bts, tb, ref_btv(bts, tb->n->bt_idx));
  2645. } else {
  2646. return safe_cut_redundant_edges_graph(g, bts, ref_btv(bts, tb->n->bt_idx), tb);
  2647. }
  2648. } else if(tb->ind){
  2649. return safe_cut_redundant_edges_graph(g, bts, tb, ref_btv(bts, tb->n->bt_idx));
  2650. } else if(bts->buffer[tb->n->bt_idx].ind){
  2651. return safe_cut_redundant_edges_graph(g, bts, ref_btv(bts, tb->n->bt_idx), tb);
  2652. }
  2653. } else {
  2654. // circle
  2655. return 0;
  2656. }
  2657. }
  2658. tb->n->unvisit --;
  2659. if(tb->n->unvisit == 0){
  2660. //if(num_cmpgt(tb->score, bts->buffer[tb->n->bt_idx].score)){
  2661. if(tb->step > bts->buffer[tb->n->bt_idx].step){
  2662. bts->buffer[tb->n->bt_idx].ending = i;
  2663. tb->n->bt_idx = i;
  2664. tb->ending = 0;
  2665. }
  2666. if(count_living_edges_graph(g, tb->n, tb->dir)){
  2667. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, tb->n->bt_idx, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2668. }
  2669. unclosed --;
  2670. }
  2671. }
  2672. if(heap->size == 1 && unclosed == 0){
  2673. return pop_bubble2_backtrace_graph(g, bts, heap->buffer[0]);
  2674. }
  2675. }
  2676. return 0;
  2677. }
  2678. static inline u8i pop_bubbles_graph(Graph *g, uint16_t max_step, int safe){
  2679. btv *bts;
  2680. u4v *heap;
  2681. node_t *n;
  2682. u8i nid, visit, ret, _ret;
  2683. int dir;
  2684. ret = 0;
  2685. for(nid=0;nid<g->nodes->size;nid++) g->nodes->buffer[nid].bt_visit = 0;
  2686. bts = init_btv(32);
  2687. heap = init_u4v(32);
  2688. visit = 0;
  2689. for(nid=0;nid<g->nodes->size;nid++){
  2690. n = ref_nodev(g->nodes, nid);
  2691. if(n->closed) continue;
  2692. for(dir=0;dir<2;dir++){
  2693. _ret = pop_bubble_core_graph(g, max_step, bts, heap, nid, dir, ++visit, safe);
  2694. if(_ret) ret ++;
  2695. }
  2696. }
  2697. free_btv(bts);
  2698. free_u4v(heap);
  2699. return ret;
  2700. }
  2701. static inline u4i pop_frg_bubble_core_graph(Graph *g, uint16_t max_step, frgbtv *bts, u4v *heap, u8i nid, u4i dir, u8i visit){
  2702. frg_bt_t *bt, *tb;
  2703. frg_t *n;
  2704. edge_ref_t *f;
  2705. lnk_t *e;
  2706. u8i ret, idx;
  2707. u4i bidx, i, lst, unclosed;
  2708. ret = 0;
  2709. n = ref_frgv(g->frgs, nid);
  2710. if(n->closed) return 0;
  2711. if(count_living_lnks_graph(g, n, dir) < 2) return 0;
  2712. clear_frgbtv(bts);
  2713. next_ref_frgbtv(bts);
  2714. bt = next_ref_frgbtv(bts);
  2715. bt->n = n;
  2716. bt->e = NULL;
  2717. bt->dir = dir;
  2718. bt->ind = 1;
  2719. //bt->ind = 0;
  2720. bt->step = 0;
  2721. bt->bt = 0;
  2722. bt->score = 0;
  2723. bt->ending = 0;
  2724. clear_u4v(heap);
  2725. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, bts->size - 1, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2726. n->bt_visit = visit;
  2727. n->bt_idx = bts->size - 1;
  2728. n->single_in = 1;
  2729. unclosed = 0;
  2730. while(heap->size && heap->size < WT_MAX_BTIDX){
  2731. bidx = array_heap_pop(heap->buffer, heap->size, heap->cap, u4i, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2732. encap_frgbtv(bts, bts->buffer[bidx].n->lnks[bts->buffer[bidx].dir].cnt);
  2733. bt = ref_frgbtv(bts, bidx);
  2734. if(bt->step >= max_step) return 0;
  2735. if(bt->ind && bt->n->single_in == 0) bt->ind = 0;
  2736. lst = bts->size;
  2737. idx = bt->n->lnks[bt->dir].idx;
  2738. while(idx){
  2739. f = ref_edgerefv(g->lrefs, idx);
  2740. idx = f->next;
  2741. e = g->lnks->buffer + f->idx;
  2742. if(e->closed) continue;
  2743. tb = next_ref_frgbtv(bts);
  2744. tb->n = ref_frgv(g->frgs, f->flg? e->frg1 : e->frg2);
  2745. if(tb->n == bts->buffer[1].n) return 0;
  2746. tb->e = e;
  2747. tb->dir = f->flg? !e->dir1 : e->dir2;
  2748. tb->step = bt->step + 1;
  2749. tb->bt = bidx;
  2750. tb->ind = 0;
  2751. tb->score = bt->score + e->cov;
  2752. tb->ending = 0;
  2753. }
  2754. if(bt->ind && (bt->bt == 0 || lst + 1 == bts->size)){
  2755. for(i=lst;i<bts->size;i++) bts->buffer[i].ind = 1;
  2756. }
  2757. for(i=lst;i<bts->size;i++){
  2758. tb = ref_frgbtv(bts, i);
  2759. if(tb->n->bt_visit != visit){
  2760. tb->n->bt_visit = visit;
  2761. tb->n->unvisit = count_living_lnks_graph(g, tb->n, !tb->dir);
  2762. if(tb->n->unvisit == 1) tb->n->single_in = 1;
  2763. else tb->n->single_in = 0;
  2764. tb->n->bt_idx = i;
  2765. unclosed ++;
  2766. } else {
  2767. tb->ending = tb->n->bt_idx;
  2768. if(tb->dir == bts->buffer[tb->n->bt_idx].dir){
  2769. if(tb->ind && bts->buffer[tb->n->bt_idx].ind){
  2770. if(tb->step == bts->buffer[tb->n->bt_idx].step){
  2771. return safe_cut_redundant_lnks_graph(g, bts, tb, ref_frgbtv(bts, tb->n->bt_idx));
  2772. } else {
  2773. return safe_cut_redundant_lnks_graph(g, bts, ref_frgbtv(bts, tb->n->bt_idx), tb);
  2774. }
  2775. } else if(tb->ind){
  2776. return safe_cut_redundant_lnks_graph(g, bts, tb, ref_frgbtv(bts, tb->n->bt_idx));
  2777. } else if(bts->buffer[tb->n->bt_idx].ind){
  2778. return safe_cut_redundant_lnks_graph(g, bts, ref_frgbtv(bts, tb->n->bt_idx), tb);
  2779. }
  2780. }
  2781. }
  2782. tb->n->unvisit --;
  2783. if(tb->n->unvisit == 0){
  2784. if(tb->step > bts->buffer[tb->n->bt_idx].step) tb->n->bt_idx = i;
  2785. if(count_living_lnks_graph(g, tb->n, tb->dir)){
  2786. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, tb->n->bt_idx, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2787. }
  2788. unclosed --;
  2789. }
  2790. }
  2791. if(heap->size == 1 && unclosed == 0){
  2792. return pop_frg_bubble2_backtrace_graph(g, bts, heap->buffer[0]);
  2793. }
  2794. }
  2795. return 0;
  2796. }
  2797. static inline u8i pop_frg_bubbles_graph(Graph *g, uint16_t max_step){
  2798. frgbtv *bts;
  2799. u4v *heap;
  2800. frg_t *n;
  2801. u8i nid, visit, ret, _ret;
  2802. int dir;
  2803. ret = 0;
  2804. for(nid=0;nid<g->frgs->size;nid++) g->frgs->buffer[nid].bt_visit = 0;
  2805. bts = init_frgbtv(32);
  2806. heap = init_u4v(32);
  2807. visit = 0;
  2808. for(nid=0;nid<g->frgs->size;nid++){
  2809. n = ref_frgv(g->frgs, nid);
  2810. if(n->closed) continue;
  2811. for(dir=0;dir<2;dir++){
  2812. _ret = pop_frg_bubble_core_graph(g, max_step, bts, heap, nid, dir, ++visit);
  2813. if(_ret) ret ++;
  2814. }
  2815. }
  2816. free_frgbtv(bts);
  2817. free_u4v(heap);
  2818. return ret;
  2819. }
  2820. static inline u4i resolve_yarn_core_graph(Graph *g, u4i max_step, btv *bts, u4v *heap, u8i nid, u4i dir, u8i visit){
  2821. bt_t *bt, *tb;
  2822. node_t *n, *m;
  2823. edge_ref_t *f;
  2824. edge_t *e;
  2825. u8i ret, idx, tip_idx;
  2826. u4i bidx, i, lst, tip;
  2827. int n_in;
  2828. ret = 0;
  2829. n = ref_nodev(g->nodes, nid);
  2830. if(n->closed) return 0;
  2831. if(count_living_edges_graph(g, n, dir) < 2) return 0;
  2832. clear_btv(bts);
  2833. next_ref_btv(bts);
  2834. bt = next_ref_btv(bts);
  2835. bt->n = n;
  2836. bt->e = NULL;
  2837. bt->dir = dir;
  2838. bt->step = 0;
  2839. bt->bt = 0;
  2840. bt->score = 0;
  2841. bt->ending = 0;
  2842. clear_u4v(heap);
  2843. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, bts->size - 1, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2844. n->bt_visit = visit;
  2845. n->bt_idx = bts->size - 1;
  2846. tip = 0; tip_idx = WT_MAX_BTIDX;
  2847. n_in = 1;
  2848. while(heap->size && bts->size < WT_MAX_BTIDX){
  2849. bidx = array_heap_pop(heap->buffer, heap->size, heap->cap, u4i, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2850. encap_btv(bts, bts->buffer[bidx].n->edges[bts->buffer[bidx].dir].cnt);
  2851. bt = ref_btv(bts, bidx);
  2852. if(bt->step >= max_step){
  2853. return 0;
  2854. }
  2855. lst = bts->size;
  2856. idx = bt->n->edges[bt->dir].idx;
  2857. while(idx){
  2858. f = ref_edgerefv(g->erefs, idx);
  2859. idx = f->next;
  2860. e = g->edges->buffer + f->idx;
  2861. if(e->closed) continue;
  2862. tb = next_ref_btv(bts);
  2863. tb->n = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  2864. //if(tb->n == bts->buffer[1].n){
  2865. //return 0;
  2866. //}
  2867. tb->e = e;
  2868. tb->dir = f->flg? !e->dir1 : e->dir2;
  2869. tb->step = bt->step + 1;
  2870. tb->bt = bidx;
  2871. tb->score = bt->score + e->cov;
  2872. tb->ending = 0;
  2873. }
  2874. for(i=lst;i<bts->size;i++){
  2875. tb = ref_btv(bts, i);
  2876. if(tb->n->bt_visit != visit){
  2877. tb->n->bt_visit = visit;
  2878. tb->n->unvisit = count_living_edges_graph(g, tb->n, !tb->dir);
  2879. n_in += tb->n->unvisit - 1;
  2880. tb->n->bt_idx = i;
  2881. if(count_living_edges_graph(g, tb->n, tb->dir)){
  2882. array_heap_push(heap->buffer, heap->size, heap->cap, u4i, tb->n->bt_idx, num_cmpx(bts->buffer[a].step, bts->buffer[b].step, bts->buffer[b].score, bts->buffer[a].score));
  2883. } else if(tip == 0){
  2884. tip = 1;
  2885. tip_idx = i;
  2886. } else {
  2887. return 0;
  2888. }
  2889. } else {
  2890. tb->ending = tb->n->bt_idx;
  2891. n_in --;
  2892. }
  2893. }
  2894. if(n_in == 1){
  2895. if(heap->size == 0 && tip == 1){
  2896. return pop_bubble_backtrace_graph(g, bts, tip? tip_idx : heap->buffer[0]);
  2897. } else if(heap->size == 1 && tip == 0){
  2898. tip_idx = heap->buffer[0];
  2899. tb = ref_btv(bts, heap->buffer[0]);
  2900. idx = tb->n->edges[tb->dir].idx;
  2901. while(idx){
  2902. f = ref_edgerefv(g->erefs, idx);
  2903. idx = f->next;
  2904. e = g->edges->buffer + f->idx;
  2905. if(e->closed) continue;
  2906. m = ref_nodev(g->nodes, f->flg? e->node1 : e->node2);
  2907. if(m->bt_visit == visit) continue;
  2908. else tip ++;
  2909. }
  2910. if(tip == 0) return 0;
  2911. return pop_bubble_backtrace_graph(g, bts, tip_idx);
  2912. }
  2913. }
  2914. }
  2915. return 0;
  2916. }
  2917. // very complicated local region, like yarn, but with Single In edge and Single Out edges
  2918. static inline u8i resolve_yarns_graph(Graph *g, u4i max_step){
  2919. btv *bts;
  2920. u4v *heap;
  2921. tracev *path;
  2922. trace_t *t;
  2923. node_t *n;
  2924. u8i nid, visit, ret, _ret;
  2925. int dir;
  2926. ret = 0;
  2927. for(nid=0;nid<g->nodes->size;nid++) g->nodes->buffer[nid].bt_visit = 0;
  2928. bts = init_btv(32);
  2929. heap = init_u4v(32);
  2930. path = init_tracev(4);
  2931. visit = 0;
  2932. #if DEBUG
  2933. if(max_step == 1000000){ // never happen
  2934. print_local_dot_graph(g, "1.dot", NULL);
  2935. }
  2936. #endif
  2937. for(nid=0;nid<g->nodes->size;nid++){
  2938. n = ref_nodev(g->nodes, nid);
  2939. if(n->closed) continue;
  2940. if(n->edges[0].cnt <= 1 && n->edges[1].cnt > 1){
  2941. if(n->edges[0].cnt == 1){
  2942. clear_tracev(path);
  2943. t = next_ref_tracev(path);
  2944. t->node = nid;
  2945. t->dir = 0;
  2946. if(linear_trace_graph(g, path, 4, NULL) < 4){
  2947. // solve yarn from linear nodes
  2948. continue;
  2949. }
  2950. }
  2951. dir = 1;
  2952. } else if(n->edges[1].cnt <= 1 && n->edges[0].cnt > 1){
  2953. if(n->edges[0].cnt == 1){
  2954. clear_tracev(path);
  2955. t = next_ref_tracev(path);
  2956. t->node = nid;
  2957. t->dir = 1;
  2958. if(linear_trace_graph(g, path, 4, NULL) < 4){
  2959. // solve yarn from linear nodes
  2960. continue;
  2961. }
  2962. }
  2963. dir = 0;
  2964. } else continue;
  2965. _ret = resolve_yarn_core_graph(g, max_step, bts, heap, nid, dir, ++visit);
  2966. if(_ret) ret ++;
  2967. }
  2968. free_btv(bts);
  2969. free_u4v(heap);
  2970. free_tracev(path);
  2971. return ret;
  2972. }
  2973. static inline u8i remove_boomerangs_frg_graph(Graph *g, u4i max_frg_len){
  2974. frg_t *frg;
  2975. u8i i, ret;
  2976. ret = 0;
  2977. for(i=0;i<g->frgs->size;i++){
  2978. frg = ref_frgv(g->frgs, i);
  2979. if(frg->closed) continue;
  2980. if(frg->len > max_frg_len) continue;
  2981. if(frg->lnks[0].cnt == 0 && frg->lnks[1].cnt > 1){
  2982. } else if(frg->lnks[1].cnt == 0 && frg->lnks[0].cnt > 1){
  2983. } else continue;
  2984. ret ++;
  2985. del_frg_lnks_graph(g, frg);
  2986. }
  2987. return ret;
  2988. }
  2989. static inline u8i cut_weak_branches_frg_graph(Graph *g){
  2990. frg_t *frg1, *frg2;
  2991. lnk_t *lnk;
  2992. u8v *cuts;
  2993. u8i i, ret;
  2994. cuts = init_u8v(32);
  2995. for(i=0;i<g->lnks->size;i++){
  2996. lnk = ref_lnkv(g->lnks, i);
  2997. if(lnk->weak == 0) continue;
  2998. frg1 = ref_frgv(g->frgs, lnk->frg1);
  2999. frg2 = ref_frgv(g->frgs, lnk->frg2);
  3000. if(frg1->lnks[lnk->dir1].cnt > 1){
  3001. push_u8v(cuts, i);
  3002. } else if(frg2->lnks[!lnk->dir2].cnt > 1){
  3003. push_u8v(cuts, i);
  3004. }
  3005. }
  3006. ret = cuts->size;
  3007. for(i=0;i<cuts->size;i++){
  3008. cut_lnk_graph(g, ref_lnkv(g->lnks, cuts->buffer[i]));
  3009. }
  3010. free_u8v(cuts);
  3011. return ret;
  3012. }
  3013. static inline u8i mask_all_branching_nodes_graph(Graph *g){
  3014. node_t *n;
  3015. u8i node, ret;
  3016. ret = 0;
  3017. for(node=0;node<g->nodes->size;node++){
  3018. n = ref_nodev(g->nodes, node);
  3019. if(n->closed) continue;
  3020. if(n->edges[0].cnt > 1 || n->edges[1].cnt > 1){
  3021. n->rep_idx = 1;
  3022. ret ++;
  3023. } else {
  3024. n->rep_idx = 0;
  3025. }
  3026. }
  3027. for(node=0;node<g->nodes->size;node++){
  3028. n = ref_nodev(g->nodes, node);
  3029. if(n->closed) continue;
  3030. if(n->rep_idx == 0) continue;
  3031. del_node_graph(g, n);
  3032. }
  3033. return ret;
  3034. }
  3035. static inline u8i gen_unitigs_graph(Graph *g){
  3036. tracev *path;
  3037. u4v *lens;
  3038. trace_t *t;
  3039. node_t *n;
  3040. u8i nid, nutg, i;
  3041. for(i=0;i<g->utgs->size;i++) free_tracev(g->utgs->buffer[i]);
  3042. clear_vplist(g->utgs);
  3043. lens = init_u4v(1024);
  3044. nutg = 0;
  3045. for(nid=0;nid<g->nodes->size;nid++){
  3046. g->nodes->buffer[nid].bt_visit = 0;
  3047. g->nodes->buffer[nid].rep_idx = MAX_REP_IDX;
  3048. }
  3049. for(nid=0;nid<g->nodes->size;nid++){
  3050. n = ref_nodev(g->nodes, nid);
  3051. if(n->closed) continue;
  3052. if(n->bt_visit) continue;
  3053. path = init_tracev(4);
  3054. nutg ++;
  3055. t = next_ref_tracev(path);
  3056. t->node = nid;
  3057. t->edges[0] = EDGE_REF_NULL;
  3058. t->edges[1] = EDGE_REF_NULL;
  3059. t->dir = 0;
  3060. true_linear_unique_trace_graph(g, path, 0xFFFFFFFFFFFFFFFFLLU, nutg, NULL);
  3061. reverse_tracev(path);
  3062. for(i=0;i<path->size;i++) path->buffer[i].dir = !path->buffer[i].dir;
  3063. true_linear_unique_trace_graph(g, path, 0xFFFFFFFFFFFFFFFFLLU, nutg, NULL);
  3064. push_u4v(lens, cal_offset_traces_graph(g, path, 0, path->size, 0) * KBM_BIN_SIZE);
  3065. for(i=0;i<path->size;i++){
  3066. ref_nodev(g->nodes, path->buffer[i].node)->rep_idx = g->utgs->size;
  3067. }
  3068. push_vplist(g->utgs, path);
  3069. }
  3070. fprintf(KBM_LOGF, "[%s] ", date()); num_n50(lens, KBM_LOGF); fprintf(KBM_LOGF, "\n");
  3071. free_u4v(lens);
  3072. return nutg;
  3073. }
  3074. static inline seqletv* path2seqlets_graph(Graph *g, pathv *path){
  3075. seqletv *qs;
  3076. path_t *p1, *p2;
  3077. frg_t *frg1, *frg2;
  3078. edge_ref_t *f;
  3079. lnk_t *l;
  3080. trace_t *t1, *t2;
  3081. b8i off;
  3082. int len;
  3083. u4i i, j;
  3084. qs = init_seqletv(4);
  3085. cal_offset_paths_graph(g, path, 0, path->size);
  3086. p1 = NULL;
  3087. frg1 = NULL;
  3088. for(i=0;i<path->size;i++){
  3089. p2 = ref_pathv(path, i);
  3090. frg2 = ref_frgv(g->frgs, p2->frg);
  3091. cal_offset_traces_graph(g, g->traces, frg2->toff, frg2->toff + frg2->tcnt, 0);
  3092. p2->tx = 0;
  3093. p2->ty = frg2->tcnt - 1;
  3094. if(p1){
  3095. f = p1->lnks + p1->dir;
  3096. l = ref_lnkv(g->lnks, f->idx);
  3097. if(f->flg){
  3098. if(p1->dir){
  3099. p1->tx = l->tidx2;
  3100. } else {
  3101. p1->ty = frg1->tcnt - 1 - l->tidx2;
  3102. }
  3103. if(p2->dir){
  3104. p2->ty = frg2->tcnt - 1 - l->tidx1;
  3105. } else {
  3106. p2->tx = l->tidx1;
  3107. }
  3108. } else {
  3109. if(p1->dir){
  3110. p1->tx = l->tidx1;
  3111. } else {
  3112. p1->ty = frg1->tcnt - 1 - l->tidx1;
  3113. }
  3114. if(p2->dir){
  3115. p2->ty = frg2->tcnt - 1 - l->tidx2;
  3116. } else {
  3117. p2->tx = l->tidx2;
  3118. }
  3119. }
  3120. if(p1->ty >= frg1->tcnt || p2->ty >= frg2->tcnt){
  3121. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  3122. }
  3123. off += l->off;
  3124. }
  3125. p1 = p2;
  3126. frg1 = frg2;
  3127. }
  3128. p1 = NULL;
  3129. frg1 = NULL;
  3130. for(i=0;i<path->size;i++){
  3131. p2 = ref_pathv(path, i);
  3132. frg2 = ref_frgv(g->frgs, p2->frg);
  3133. if(p1){
  3134. t1 = ref_tracev(g->traces, frg1->toff + (p1->dir? p1->tx : p1->ty));
  3135. t2 = ref_tracev(g->traces, frg2->toff + (p2->dir? p2->ty : p2->tx));
  3136. off = p1->off + (p1->dir? (int)(frg1->len - (t1->off + g->reglen)) : t1->off);
  3137. f = p1->lnks + p1->dir;
  3138. l = ref_lnkv(g->lnks, f->idx);
  3139. len = l->off + (p1->dir? t1->off + g->reglen : frg1->len - t1->off) + (p2->dir? frg2->len - t2->off : t2->off + g->reglen);
  3140. push_seqletv(qs, (seqlet_t){t1->node, p1->dir ^ t1->dir, t2->node, p2->dir ^ t2->dir, off, len});
  3141. }
  3142. if(p2->dir){
  3143. //for(j=p2->tx+1;j<=p2->ty;j++){
  3144. //t1 = ref_tracev(g->traces, frg2->toff + frg2->tcnt - 1 - j);
  3145. //t2 = ref_tracev(g->traces, frg2->toff + frg2->tcnt - 0 - j);
  3146. for(j=p2->ty;j>p2->tx;j--){
  3147. t1 = ref_tracev(g->traces, frg2->toff + j - 1);
  3148. t2 = ref_tracev(g->traces, frg2->toff + j);
  3149. off = p2->off + frg2->len - (t2->off + g->reglen);
  3150. len = t2->off - t1->off + g->reglen;
  3151. push_seqletv(qs, (seqlet_t){t2->node, !t2->dir, t1->node, !t1->dir, off, len});
  3152. }
  3153. } else {
  3154. for(j=p2->tx+1;j<=p2->ty;j++){
  3155. t1 = ref_tracev(g->traces, frg2->toff + j - 1);
  3156. t2 = ref_tracev(g->traces, frg2->toff + j);
  3157. off = p2->off + t1->off;
  3158. len = t2->off - t1->off + g->reglen;
  3159. push_seqletv(qs, (seqlet_t){t1->node, t1->dir, t2->node, t2->dir, off, len});
  3160. }
  3161. }
  3162. p1 = p2;
  3163. frg1 = frg2;
  3164. }
  3165. return qs;
  3166. }
  3167. static inline u8i gen_contigs_graph(Graph *g, FILE *out){
  3168. pathv *path;
  3169. seqletv *qs;
  3170. seqlet_t *q;
  3171. path_t *t;
  3172. frg_t *n;
  3173. u8i nid, nctg, i, off;
  3174. for(i=0;i<g->ctgs->size;i++) free_tracev(g->ctgs->buffer[i]);
  3175. clear_vplist(g->ctgs);
  3176. nctg = 0;
  3177. for(nid=0;nid<g->frgs->size;nid++) g->frgs->buffer[nid].bt_visit = 0;
  3178. path = init_pathv(4);
  3179. for(nid=0;nid<g->frgs->size;nid++){
  3180. n = ref_frgv(g->frgs, nid);
  3181. if(n->closed) continue;
  3182. if(n->bt_visit) continue;
  3183. nctg ++;
  3184. clear_pathv(path);
  3185. t = next_ref_pathv(path);
  3186. t->frg = nid;
  3187. t->lnks[0] = EDGE_REF_NULL;
  3188. t->lnks[1] = EDGE_REF_NULL;
  3189. t->dir = 0;
  3190. true_linear_unique_path_graph(g, path, 0xFFFFFFFFFFFFFFFFLLU, nctg, NULL);
  3191. reverse_pathv(path);
  3192. for(i=0;i<path->size;i++) path->buffer[i].dir = !path->buffer[i].dir;
  3193. true_linear_unique_path_graph(g, path, 0xFFFFFFFFFFFFFFFFLLU, nctg, NULL);
  3194. qs = path2seqlets_graph(g, path);
  3195. if(qs->size + 1 < (u4i)g->min_ctg_nds){
  3196. free_seqletv(qs);
  3197. continue;
  3198. }
  3199. q = ref_seqletv(qs, qs->size - 1);
  3200. if(((int)q->off + (int)q->len) * KBM_BIN_SIZE < (int)g->min_ctg_len){
  3201. free_seqletv(qs);
  3202. continue;
  3203. }
  3204. off = 0;
  3205. for(i=0;i<qs->size;i++){
  3206. q = ref_seqletv(qs, i);
  3207. q->off = off;
  3208. off += q->len - g->reglen;
  3209. }
  3210. if(out){
  3211. for(i=0;i<path->size;i++){
  3212. t = ref_pathv(path, i);
  3213. fprintf(out, "ctg%d\tF%d\t%c\t%d\n", (int)g->ctgs->size, t->frg, "+-*@"[t->dir], t->off * KBM_BIN_SIZE);
  3214. }
  3215. }
  3216. push_vplist(g->ctgs, qs);
  3217. }
  3218. free_pathv(path);
  3219. g->major_nctg = g->ctgs->size;
  3220. return nctg;
  3221. }
  3222. static inline u8i gen_complex_contigs_graph(Graph *g){
  3223. tracev *ts;
  3224. trace_t *t;
  3225. seqletv *qs;
  3226. seqlet_t *q;
  3227. node_t *n;
  3228. edge_ref_t *f;
  3229. edge_t *e;
  3230. u8i i, idx, mi, cnt;
  3231. u4i j, k, mk, mc;
  3232. int mf;
  3233. for(i=0;i<g->nodes->size;i++) g->nodes->buffer[i].unvisit = 1;
  3234. cnt = 0;
  3235. for(i=0;i<g->utgs->size;i++){
  3236. ts = (tracev*)get_vplist(g->utgs, i);
  3237. for(j=0;j<ts->size;j++){
  3238. t = ref_tracev(ts, j);
  3239. n = ref_nodev(g->nodes, t->node);
  3240. n->unvisit = 0;
  3241. }
  3242. }
  3243. for(i=0;i<g->nodes->size;i++){
  3244. n = ref_nodev(g->nodes, i);
  3245. if(n->unvisit == 0) continue;
  3246. if(n->regs.cnt < g->min_node_cov){
  3247. n->unvisit = 0;
  3248. continue;
  3249. }
  3250. }
  3251. cnt = 0;
  3252. for(i=0;i<g->nodes->size;i++){
  3253. n = ref_nodev(g->nodes, i);
  3254. if(n->unvisit == 0) continue;
  3255. for(k=0;k<2;k++){
  3256. idx = n->edges[k].idx;
  3257. mi = mk = mc = 0; mf = MAX_VALUE_B4;
  3258. while(idx){
  3259. f = ref_edgerefv(g->erefs, idx);
  3260. idx = f->next;
  3261. e = g->edges->buffer + f->idx;
  3262. if(e->node1 == e->node2) continue;
  3263. if(e->cov < g->min_edge_cov) continue;
  3264. if(e->off < mf){
  3265. mi = f - g->erefs->buffer; mk = k; mc = e->cov; mf = e->off;
  3266. }
  3267. }
  3268. if(mf == MAX_VALUE_B4) continue;
  3269. f = ref_edgerefv(g->erefs, mi);
  3270. if(f->flg) continue;
  3271. e = ref_edgev(g->edges, f->idx);
  3272. if(g->nodes->buffer[e->node2].unvisit == 0) continue;
  3273. qs = init_seqletv(1);
  3274. q = next_ref_seqletv(qs);
  3275. q->node1 = i;
  3276. q->dir1 = k;
  3277. q->node2 = e->node2;
  3278. q->dir2 = e->dir2;
  3279. q->off = 0;
  3280. q->len = g->reglen * 2 + e->off;
  3281. push_vplist(g->ctgs, qs);
  3282. cnt ++;
  3283. }
  3284. }
  3285. return cnt;
  3286. }
  3287. static inline void n50_stat_contigs_graph(Graph *g){
  3288. seqletv *qs;
  3289. seqlet_t *q;
  3290. u4v *lens;
  3291. int len;
  3292. u8i i;
  3293. lens = init_u4v(g->major_nctg + 1);
  3294. for(i=0;i<g->major_nctg;i++){
  3295. qs = (seqletv*)get_vplist(g->ctgs, i);
  3296. q = ref_seqletv(qs, qs->size - 1);
  3297. len = ((int)q->off + (int)q->len) * KBM_BIN_SIZE;
  3298. if(len <= 0){
  3299. fprintf(stderr, " -- seqlet[ctg%llu off=%d, len=%d, sum=%d] in %s -- %s:%d --\n", i, (int)q->off, (int)q->len, len, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  3300. continue;
  3301. }
  3302. push_u4v(lens, len);
  3303. }
  3304. fprintf(KBM_LOGF, "[%s] Estimated: ", date()); num_n50(lens, KBM_LOGF); fprintf(KBM_LOGF, "\n");
  3305. free_u4v(lens);
  3306. }
  3307. // after gen_contigs
  3308. static inline u4i count_isolated_reads_graph(Graph *g){
  3309. seqletv *ts;
  3310. seqlet_t *t;
  3311. node_t *n;
  3312. read_t *rd;
  3313. reg_t *r;
  3314. u8i i, j, cnt, idx;
  3315. int fnd;
  3316. for(i=0;i<g->nodes->size;i++) g->nodes->buffer[i].unvisit = 1;
  3317. cnt = 0;
  3318. for(i=0;i<g->ctgs->size;i++){
  3319. ts = (seqletv*)get_vplist(g->ctgs, i);
  3320. for(j=0;j<ts->size;j++){
  3321. t = ref_seqletv(ts, j);
  3322. n = ref_nodev(g->nodes, t->node1);
  3323. n->unvisit = 0;
  3324. n = ref_nodev(g->nodes, t->node2);
  3325. n->unvisit = 0;
  3326. }
  3327. }
  3328. for(i=0;i<g->reads->size;i++){
  3329. rd = ref_readv(g->reads, i);
  3330. fnd = 0;
  3331. idx = rd->regs.idx;
  3332. while(idx){
  3333. r = ref_regv(g->regs, idx);
  3334. idx = r->read_link;
  3335. n = ref_nodev(g->nodes, r->node);
  3336. if(n->unvisit == 0){ fnd = 1; break; }
  3337. }
  3338. if(fnd == 0) cnt ++;
  3339. }
  3340. return cnt;
  3341. }
  3342. static inline void print_dot_subgraph(Graph *g, subnodehash *nodes, subedgev *edges, FILE *out){
  3343. subnode_t *n1, *n2;
  3344. subedge_t *e;
  3345. u4i k, idx;
  3346. fprintf(out, "digraph {\n");
  3347. fprintf(out, " rankdir=LR\n");
  3348. reset_iter_subnodehash(nodes);
  3349. while((n1 = ref_iter_subnodehash(nodes))){
  3350. if(n1->closed) continue;
  3351. fprintf(out, "N%llu [label=\"N%llu(%llu) %d:%d:%d\" style=filled fillcolor=\"%s\" color=\"%s\"]\n", (u8i)n1->node, (u8i)n1->node, (u8i)g->nodes->buffer[n1->node].rep_idx, n1->cov, n1->visit, n1->bt_open, n1->fix? "yellow" : "white", n1->visit? "green" : (n1->cov > 2? "blue" : "black"));
  3352. }
  3353. reset_iter_subnodehash(nodes);
  3354. while((n1 = ref_iter_subnodehash(nodes))){
  3355. if(n1->closed) continue;
  3356. for(k=0;k<2;k++){
  3357. idx = n1->edges[k].idx;
  3358. while(idx){
  3359. e = ref_subedgev(edges, idx);
  3360. idx = e->next;
  3361. if(e->fwd == 0) continue;
  3362. if(e->closed) continue;
  3363. n2 = e->node;
  3364. fprintf(out, " N%llu -> N%llu [label=\"%c%c:%d:%d\" color=\"%s\" %s]\n", (u8i)n1->node, (u8i)n2->node, "+-"[k], "+-"[e->dir], e->cov, e->visit, e->cov > 1? "blue" : "black", e->visit? "style=dashed":"");
  3365. }
  3366. }
  3367. }
  3368. fprintf(out, "}\n");
  3369. fflush(out);
  3370. }
  3371. static inline void fprintf_dot_subgraph(Graph *g, subnodehash *nodes, subedgev *edges, char *name_prefix, char *name_suffix){
  3372. FILE *out;
  3373. out = open_file_for_write(name_prefix, name_suffix, 1);
  3374. print_dot_subgraph(g, nodes, edges, out);
  3375. fclose(out);
  3376. }
  3377. typedef struct {
  3378. u4i node:31, dir:1;
  3379. u4i flag;
  3380. u4i prev;
  3381. u4i step;
  3382. int score;
  3383. } sg_heap_t;
  3384. define_list(sgheapv, sg_heap_t);
  3385. typedef struct {
  3386. u4i node:31, dir:1;
  3387. u4i group:30, solid:1, closed:1;
  3388. } sg_tip_t;
  3389. define_list(sgtipv, sg_tip_t);
  3390. static inline subedge_t* find_edge_subgraph(subnodehash *nodes, subedgev *edges, u4i node1, int dir1, u4i node2, int dir2){
  3391. subnode_t *n;
  3392. subedge_t *e;
  3393. u8i idx;
  3394. n = ref_subnodehash(nodes, node1);
  3395. idx = n->edges[dir1].idx;
  3396. e = ref_subedgev(edges, idx);
  3397. if(offset_subnodehash(nodes, e->node) == node2 && e->dir == dir2){
  3398. return e;
  3399. }
  3400. while((idx = e->next)){
  3401. e = ref_subedgev(edges, idx);
  3402. if(offset_subnodehash(nodes, e->node) == node2 && e->dir == dir2){
  3403. return e;
  3404. }
  3405. }
  3406. return NULL;
  3407. }
  3408. static inline int cut_edge_core_subgraph(subnodehash *nodes, subedgev *edges, u4i node1, int dir1, u4i node2, int dir2){
  3409. subnode_t *n;
  3410. subedge_t *e, *p;
  3411. u8i idx;
  3412. n = ref_subnodehash(nodes, node1);
  3413. idx = n->edges[dir1].idx;
  3414. e = ref_subedgev(edges, idx);
  3415. if(offset_subnodehash(nodes, e->node) == node2 && e->dir == dir2){
  3416. e->closed = 1;
  3417. n->edges[dir1].idx = e->next;
  3418. n->edges[dir1].cnt --;
  3419. return 1;
  3420. }
  3421. while((idx = e->next)){
  3422. p = e;
  3423. e = ref_subedgev(edges, idx);
  3424. if(offset_subnodehash(nodes, e->node) == node2 && e->dir == dir2){
  3425. e->closed = 1;
  3426. p->next = e->next;
  3427. n->edges[dir1].cnt --;
  3428. return 1;
  3429. }
  3430. }
  3431. return 0;
  3432. }
  3433. static inline int cut_edge_subgraph(subnodehash *nodes, subedgev *edges, u4i node1, int dir1, u4i node2, int dir2){
  3434. return cut_edge_core_subgraph(nodes, edges, node1, dir1, node2, dir2)
  3435. + cut_edge_core_subgraph(nodes, edges, node2, !dir2, node1, !dir1);
  3436. }
  3437. static inline u4i unitigs2frgs_graph(Graph *g, int ncpu){
  3438. frg_t *frg;
  3439. node_t *n;
  3440. tracev *ts;
  3441. trace_t *t;
  3442. u4i i, j, tid, ret;
  3443. {
  3444. clear_frgv(g->frgs);
  3445. clear_lnkv(g->lnks);
  3446. clear_edgerefv(g->lrefs);
  3447. clear_tracev(g->traces);
  3448. }
  3449. ret = 0;
  3450. for(tid=0;tid<g->utgs->size;tid++){
  3451. ret ++;
  3452. frg = next_ref_frgv(g->frgs);
  3453. frg->toff = g->traces->size;
  3454. frg->lnks[0] = PTR_REF_NULL;
  3455. frg->lnks[1] = PTR_REF_NULL;
  3456. frg->closed = 0;
  3457. ts = (tracev*)get_vplist(g->utgs, tid);
  3458. frg->tx = 0;
  3459. frg->ty = ts->size;
  3460. for(i=frg->tx;i<frg->ty;i++) ts->buffer[i].cov = 0;
  3461. append_array_tracev(g->traces, ts->buffer + frg->tx, frg->ty - frg->tx);
  3462. frg->tcnt = frg->ty - frg->tx;
  3463. frg->length = frg->len = cal_offset_traces_graph(g, g->traces, frg->toff + frg->tx, frg->toff + frg->ty, 0);
  3464. }
  3465. psort_array(g->frgs->buffer, g->frgs->size, frg_t, ncpu, num_cmpgt(b.length, a.length));
  3466. for(i=0;i<g->nodes->size;i++){
  3467. n = ref_nodev(g->nodes, i);
  3468. n->unvisit = 1;
  3469. n->rep_idx = MAX_REP_IDX;
  3470. n->bt_visit = 0;
  3471. }
  3472. for(i=0;i<g->frgs->size;i++){
  3473. frg = ref_frgv(g->frgs, i);
  3474. for(j=frg->tx;j<frg->ty;j++){
  3475. t = ref_tracev(g->traces, frg->toff + j);
  3476. n = ref_nodev(g->nodes, t->node);
  3477. n->rep_idx = i;
  3478. n->rep_dir = t->dir;
  3479. n->bt_visit = j;
  3480. }
  3481. }
  3482. return ret;
  3483. }
  3484. static inline int scan_rd_lnk_core(Graph *g, u4i rid, lnk_t *lnk, u8v *regids){
  3485. read_t *rd;
  3486. reg_t *r, *rl, *rr;
  3487. node_t *n, *nl, *nr;
  3488. frg_t *fl, *fr;
  3489. trace_t *tl, *tr;
  3490. u8i idx;
  3491. u4i i, tmp;
  3492. rd = ref_readv(g->reads, rid);
  3493. idx = rd->regs.idx;
  3494. clear_u8v(regids);
  3495. while(idx){
  3496. push_u8v(regids, idx);
  3497. idx = ref_regv(g->regs, idx)->read_link;
  3498. }
  3499. if(regids->size < 2) return 0;
  3500. rl = NULL; nl = NULL;
  3501. for(i=0;i<regids->size;i++){
  3502. r = ref_regv(g->regs, regids->buffer[i]);
  3503. n = ref_nodev(g->nodes, r->node);
  3504. if(n->rep_idx == MAX_REP_IDX) continue;
  3505. if(rl && n->rep_idx != nl->rep_idx) break;
  3506. rl = r;
  3507. nl = n;
  3508. }
  3509. if(i == regids->size) return 0;
  3510. rr = NULL; nr = NULL;
  3511. for(i=regids->size;i>0;i--){
  3512. r = ref_regv(g->regs, regids->buffer[i - 1]);
  3513. n = ref_nodev(g->nodes, r->node);
  3514. if(n->rep_idx == MAX_REP_IDX) continue;
  3515. if(rr && n->rep_idx != nr->rep_idx) break;
  3516. rr = r;
  3517. nr = n;
  3518. }
  3519. fl = ref_frgv(g->frgs, nl->rep_idx);
  3520. tl = ref_tracev(g->traces, fl->toff + nl->bt_visit);
  3521. fr = ref_frgv(g->frgs, nr->rep_idx);
  3522. tr = ref_tracev(g->traces, fr->toff + nr->bt_visit);
  3523. lnk->frg1 = nl->rep_idx;
  3524. lnk->frg2 = nr->rep_idx;
  3525. if(lnk->frg1 == lnk->frg2) return 0;
  3526. lnk->dir1 = rl->dir ^ nl->rep_dir;
  3527. lnk->dir2 = rr->dir ^ nr->rep_dir;
  3528. lnk->tidx1 = lnk->dir1? nl->bt_visit : fl->tcnt - 1 - nl->bt_visit;
  3529. lnk->tidx2 = lnk->dir2? fr->tcnt - 1 - nr->bt_visit : nr->bt_visit;
  3530. lnk->cov = 1; // directed link
  3531. //ln->weak = 0;
  3532. //lnk->closed = 0;
  3533. lnk->off = rr->beg - rl->end;
  3534. lnk->off -= lnk->dir1? tl->off : ((b4i)fl->len) - (tl->off + rl->end - rl->beg);
  3535. lnk->off -= lnk->dir2? ((b4i)fr->len) - (tr->off + rr->end - rr->beg) : tr->off;
  3536. if(lnk->off + (int)g->reglen < 0) return 0;
  3537. if(lnk->frg1 > lnk->frg2){
  3538. swap_tmp(lnk->frg1, lnk->frg2, tmp);
  3539. swap_tmp(lnk->dir1, lnk->dir2, tmp);
  3540. lnk->dir1 = !lnk->dir1;
  3541. lnk->dir2 = !lnk->dir2;
  3542. swap_tmp(lnk->tidx1, lnk->tidx2, tmp);
  3543. }
  3544. return 1;
  3545. }
  3546. static inline int scan_nd_lnk_core(Graph *g, u8i nid, lnk_t *lnk){
  3547. node_t *n, *w;
  3548. edge_ref_t *f;
  3549. edge_t *e;
  3550. frg_t *fl, *fr;
  3551. trace_t *tl, *tr;
  3552. u8i idx, wid;
  3553. u4i fids[2], dirs[2], dir, covs[2], tidx[2], k;
  3554. int offs[2];
  3555. n = ref_nodev(g->nodes, nid);
  3556. if(n->rep_idx != MAX_REP_IDX) return 0;
  3557. for(k=0;k<2;k++){
  3558. fids[k] = 0;
  3559. dirs[k] = 0;
  3560. covs[k] = 0;
  3561. tidx[k] = 0;
  3562. offs[k] = 0;
  3563. idx = n->edges[k].idx;
  3564. while(idx){
  3565. f = ref_edgerefv(g->erefs, idx);
  3566. idx = f->next;
  3567. e = ref_edgev(g->edges, f->idx);
  3568. wid = f->flg? e->node1 : e->node2;
  3569. w = ref_nodev(g->nodes, wid);
  3570. if(w->rep_idx == MAX_REP_IDX) continue;
  3571. dir = f->flg? !e->dir1 : e->dir2;
  3572. dir = w->rep_dir ^ dir;
  3573. if(fids[k] == 0){
  3574. fids[k] = w->rep_idx;
  3575. dirs[k] = dir;
  3576. covs[k] = 1;
  3577. tidx[k] = w->bt_visit; // bt_visit is used to store nodes's tidx on frg
  3578. offs[k] = e->off;
  3579. } else if(fids[k] == w->rep_idx && dirs[k] == dir){
  3580. covs[k] ++;
  3581. } else {
  3582. fids[k] = MAX_U4;
  3583. break;
  3584. }
  3585. }
  3586. if(fids[k] == 0) return 0;
  3587. if(fids[k] == MAX_U4) return 0;
  3588. if(covs[k] < g->min_edge_cov) return 0;
  3589. }
  3590. if(fids[0] == fids[1]) return 0;
  3591. lnk->cov = 0; // indirected link
  3592. if(fids[0] < fids[1]){
  3593. lnk->frg1 = fids[0];
  3594. lnk->frg2 = fids[1];
  3595. lnk->dir1 = !dirs[0];
  3596. lnk->dir2 = dirs[1];
  3597. lnk->tidx1 = tidx[0];
  3598. lnk->tidx2 = tidx[1];
  3599. } else {
  3600. lnk->frg1 = fids[1];
  3601. lnk->frg2 = fids[0];
  3602. lnk->dir1 = !dirs[1];
  3603. lnk->dir2 = dirs[0];
  3604. lnk->tidx1 = tidx[1];
  3605. lnk->tidx2 = tidx[0];
  3606. }
  3607. lnk->off = offs[0] + offs[1] + g->reglen;
  3608. fl = ref_frgv(g->frgs, lnk->frg1);
  3609. fr = ref_frgv(g->frgs, lnk->frg2);
  3610. tl = ref_tracev(g->traces, fl->toff + lnk->tidx1);
  3611. tr = ref_tracev(g->traces, fr->toff + lnk->tidx2);
  3612. lnk->off -= lnk->dir1? tl->off : ((int)fl->len) - (int)(tl->off + g->reglen);
  3613. lnk->off -= lnk->dir2? ((int)fr->len) - (int)(tr->off + g->reglen) : tr->off;
  3614. if(lnk->off + (int)g->reglen < 0) return 0;
  3615. return 1;
  3616. }
  3617. thread_beg_def(mlnk);
  3618. Graph *g;
  3619. lnkv *lnks;
  3620. int task;
  3621. thread_end_def(mlnk);
  3622. thread_beg_func(mlnk);
  3623. u8v *regids;
  3624. u8i nid;
  3625. u4i rid;
  3626. lnk_t LNK;
  3627. memset(&LNK, 0, sizeof(lnk_t));
  3628. LNK.cov = 1;
  3629. LNK.weak = 0;
  3630. LNK.closed = 0;
  3631. regids = init_u8v(32);
  3632. thread_beg_loop(mlnk);
  3633. if(mlnk->task == 1){
  3634. for(rid=mlnk->t_idx;rid<mlnk->g->reads->size;rid+=mlnk->n_cpu){
  3635. if(scan_rd_lnk_core(mlnk->g, rid, &LNK, regids)){
  3636. push_lnkv(mlnk->lnks, LNK);
  3637. }
  3638. }
  3639. } else if(mlnk->task == 2){
  3640. for(nid=mlnk->t_idx;nid<mlnk->g->nodes->size;nid+=mlnk->n_cpu){
  3641. if(scan_nd_lnk_core(mlnk->g, nid, &LNK)){
  3642. push_lnkv(mlnk->lnks, LNK);
  3643. }
  3644. }
  3645. }
  3646. thread_end_loop(mlnk);
  3647. free_u8v(regids);
  3648. thread_end_func(mlnk);
  3649. static inline u4i gen_lnks_graph(Graph *g, int ncpu, FILE *log){
  3650. frg_t *frg;
  3651. trace_t *t;
  3652. node_t *n;
  3653. lnkv *lnks;
  3654. lnk_t *l;
  3655. edge_ref_t F1, F2;
  3656. u8i lst, idx;
  3657. u4i i, j, m, v1, v2, cnt, cov;
  3658. int x;
  3659. thread_preprocess(mlnk);
  3660. clear_lnkv(g->lnks);
  3661. memset(next_ref_lnkv(g->lnks), 0, sizeof(lnk_t));
  3662. clear_edgerefv(g->lrefs);
  3663. push_edgerefv(g->lrefs, EDGE_REF_NULL);
  3664. for(i=0;i<g->nodes->size;i++){
  3665. n = ref_nodev(g->nodes, i);
  3666. n->unvisit = 1;
  3667. n->rep_idx = MAX_REP_IDX;
  3668. n->bt_visit = 0;
  3669. }
  3670. for(i=0;i<g->frgs->size;i++){
  3671. frg = ref_frgv(g->frgs, i);
  3672. for(j=frg->tx;j<frg->ty;j++){
  3673. t = ref_tracev(g->traces, frg->toff + j);
  3674. n = ref_nodev(g->nodes, t->node);
  3675. n->rep_idx = i;
  3676. n->rep_dir = t->dir;
  3677. n->bt_visit = j;
  3678. }
  3679. }
  3680. thread_beg_init(mlnk, ncpu);
  3681. mlnk->g = g;
  3682. mlnk->lnks = init_lnkv(1024);
  3683. mlnk->task = 0;
  3684. thread_end_init(mlnk);
  3685. thread_apply_all(mlnk, mlnk->task = 1);
  3686. //thread_apply_all(mlnk, mlnk->task = 2);
  3687. lnks = init_lnkv(1024);
  3688. thread_beg_close(mlnk);
  3689. append_lnkv(lnks, mlnk->lnks);
  3690. free_lnkv(mlnk->lnks);
  3691. thread_end_close(mlnk);
  3692. psort_array(lnks->buffer, lnks->size, lnk_t, ncpu, num_cmpgtx(a.key, b.key, a.off, b.off));
  3693. if(log){
  3694. for(i=0;i<lnks->size;i++){
  3695. l = ref_lnkv(lnks, i);
  3696. fprintf(log, "F%d[%c:%d] -> F%d[%c:%d] = %d, cov=%d\n", l->frg1, "+-"[l->dir1], l->tidx1, l->frg2, "+-"[l->dir2], l->tidx2, l->off, l->cov);
  3697. }
  3698. }
  3699. cov = 0;
  3700. for(i=j=0;i<=lnks->size;i++){
  3701. if(i == lnks->size || lnks->buffer[i].key != lnks->buffer[j].key){
  3702. push_edgerefv(g->lrefs, (edge_ref_t){g->lnks->size, 0, 0});
  3703. push_edgerefv(g->lrefs, (edge_ref_t){g->lnks->size, 1, 0});
  3704. l = next_ref_lnkv(g->lnks);
  3705. m = (((u8i)i) + j) / 2;
  3706. if(cov && lnks->buffer[m].cov == 0){
  3707. for(v1=1;v1+j<=m;v1++){
  3708. if(lnks->buffer[m-v1].cov){
  3709. v1 |= 0x80000000U;
  3710. break;
  3711. }
  3712. }
  3713. for(v2=1;m+v2<i;v2++){
  3714. if(lnks->buffer[m+v2].cov){
  3715. v2 |= 0x80000000U;
  3716. break;
  3717. }
  3718. }
  3719. if(v1 & 0x80000000U){
  3720. if(v2 & 0x80000000U){
  3721. if((v1 & 0x7FFFFFFFU) <= (v2 & 0x7FFFFFFFU)){
  3722. m = m - (v1 & 0x7FFFFFFFU);
  3723. } else {
  3724. m = m + (v2 & 0x7FFFFFFFU);
  3725. }
  3726. } else {
  3727. m = m - (v1 & 0x7FFFFFFFU);
  3728. }
  3729. } else {
  3730. if(v2 & 0x80000000U){
  3731. m = m + (v2 & 0x7FFFFFFFU);
  3732. } else {
  3733. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  3734. }
  3735. }
  3736. }
  3737. *l = lnks->buffer[m];
  3738. l->cov = cov;
  3739. if(l->cov < g->max_node_cov_sg){
  3740. l->weak = 1;
  3741. //l->closed = WT_EDGE_CLOSED_LESS;
  3742. l->closed = 0;
  3743. } else {
  3744. l->weak = 0;
  3745. l->closed = 0;
  3746. }
  3747. j = i;
  3748. cov = lnks->buffer[i].cov;
  3749. } else {
  3750. cov += lnks->buffer[i].cov;
  3751. }
  3752. }
  3753. free_lnkv(lnks);
  3754. // sort lrefs
  3755. psort_array(g->lrefs->buffer + 1, g->lrefs->size - 1, edge_ref_t, ncpu, num_cmpgt(
  3756. (a.flg? ((g->lnks->buffer[a.idx].frg2 << 1) | !g->lnks->buffer[a.idx].dir2) : ((g->lnks->buffer[a.idx].frg1 << 1) | g->lnks->buffer[a.idx].dir1)),
  3757. (b.flg? ((g->lnks->buffer[b.idx].frg2 << 1) | !g->lnks->buffer[b.idx].dir2) : ((g->lnks->buffer[b.idx].frg1 << 1) | g->lnks->buffer[b.idx].dir1))
  3758. ));
  3759. push_edgerefv(g->lrefs, (edge_ref_t){g->lnks->size, 0, 0}); memset(next_ref_lnkv(g->lnks), 0, sizeof(lnk_t)); g->lnks->size --;
  3760. g->lrefs->size --;
  3761. F1.idx = g->lnks->size; F1.flg = 0;
  3762. cnt = 0;
  3763. // update frg->lnks
  3764. for(lst=idx=1;idx<=g->lrefs->size;idx++){
  3765. if(g->lrefs->buffer[idx].flg){
  3766. F2.idx = g->lnks->buffer[g->lrefs->buffer[idx].idx].frg2;
  3767. F2.flg = !g->lnks->buffer[g->lrefs->buffer[idx].idx].dir2;
  3768. } else {
  3769. F2.idx = g->lnks->buffer[g->lrefs->buffer[idx].idx].frg1;
  3770. F2.flg = g->lnks->buffer[g->lrefs->buffer[idx].idx].dir1;
  3771. }
  3772. if(F1.idx == F2.idx && F1.flg == F2.flg) continue;
  3773. if(lst < idx){
  3774. frg = ref_frgv(g->frgs, F1.idx);
  3775. frg->lnks[F1.flg].idx = lst;
  3776. for(x=lst;x+1<(int)idx;x++){
  3777. g->lrefs->buffer[x].next = x + 1;
  3778. if(g->lnks->buffer[g->lrefs->buffer[x].idx].closed == WT_EDGE_CLOSED_NULL) frg->lnks[F1.flg].cnt ++;
  3779. }
  3780. if(g->lnks->buffer[g->lrefs->buffer[x].idx].closed == WT_EDGE_CLOSED_NULL) frg->lnks[F1.flg].cnt ++;
  3781. }
  3782. lst = idx;
  3783. F1 = F2;
  3784. }
  3785. return g->lnks->size - 1;
  3786. }
  3787. static inline int gen_seq_traces_graph(Graph *g, tracev *path, String *seq){
  3788. trace_t *t1, *t2;
  3789. reg_t *reg, *r1, *r2;
  3790. edge_t *e;
  3791. u4i i;
  3792. int inc, found;
  3793. clear_string(seq);
  3794. t1 = NULL;
  3795. for(i=0;i<path->size;i++){
  3796. t2 = ref_tracev(path, i);
  3797. if(t1){
  3798. inc = 0;
  3799. r1 = ref_regv(g->regs, ref_nodev(g->nodes, t1->node)->regs.idx);
  3800. r2 = ref_regv(g->regs, ref_nodev(g->nodes, t2->node)->regs.idx);
  3801. e = ref_edgev(g->edges, t1->edges[t1->dir].idx);
  3802. do {
  3803. inc = 0;
  3804. found = 0;
  3805. while(r1->node == t1->node && r2->node == t2->node){
  3806. if(r1->rid > r2->rid){
  3807. r2 ++;
  3808. } else if(r1->rid < r2->rid){
  3809. r1 ++;
  3810. } else {
  3811. if(r1->beg < r2->beg){
  3812. if(t1->dir ^ r1->dir){ r1++; r2++; continue; }
  3813. inc = (r2->beg - r1->end) * KBM_BIN_SIZE;
  3814. if(inc <= 0) break;
  3815. encap_string(seq, inc);
  3816. seq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[r1->rid].binoff * KBM_BIN_SIZE+ (r1->end * KBM_BIN_SIZE), inc, seq->string + seq->size);
  3817. seq->size += inc;
  3818. } else {
  3819. if(!(t1->dir ^ r1->dir)){ r1++; r2++; continue; }
  3820. inc = (r1->beg - r2->end) * KBM_BIN_SIZE;
  3821. if(inc <= 0) break;
  3822. encap_string(seq, inc);
  3823. revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[r1->rid].seqoff + r2->end) * KBM_BIN_SIZE, inc, seq->string + seq->size);
  3824. seq->size += inc;
  3825. }
  3826. inc = 0;
  3827. found = 1; break;
  3828. }
  3829. }
  3830. if(found == 0){ inc = e->off; break; }
  3831. } while(0);
  3832. if(inc > 0){ inc = 0; while(inc++ < e->off * KBM_BIN_SIZE) add_char_string(seq, 'N'); }
  3833. else if(inc < 0){
  3834. if(seq->size + inc < 0) seq->size = 0;
  3835. else seq->size += inc;
  3836. seq->string[seq->size] = '\0';
  3837. }
  3838. }
  3839. t1 = t2;
  3840. reg = ref_regv(g->regs, ref_nodev(g->nodes, t1->node)->regs.idx);
  3841. inc = (reg->end - reg->beg) * KBM_BIN_SIZE;
  3842. encap_string(seq, inc);
  3843. if(t1->dir ^ reg->dir) revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, inc, seq->string + seq->size);
  3844. else seq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, inc, seq->string + seq->size);
  3845. seq->size += inc;
  3846. }
  3847. return seq->size;
  3848. }
  3849. typedef struct {
  3850. u8i rid:26, dir:1, beg:18, end:18, view:1;
  3851. } lay_reg_t;
  3852. define_list(layregv, lay_reg_t);
  3853. typedef struct {
  3854. seqlet_t edge;
  3855. u8i roff:48, rcnt:16;
  3856. } lay_t;
  3857. define_list(layv, lay_t);
  3858. static inline void gen_lay_regs_core_graph(Graph *g, seqlet_t *q, layregv *regs, BitVec *rdbits, int closed){
  3859. node_t *n1, *n2;
  3860. reg_t *r1, *r2;
  3861. u4i rid, beg, end;
  3862. n1 = ref_nodev(g->nodes, q->node1);
  3863. n2 = ref_nodev(g->nodes, q->node2);
  3864. r1 = ref_regv(g->regs, n1->regs.idx);
  3865. r2 = ref_regv(g->regs, n2->regs.idx);
  3866. while(r1->node == q->node1 && r2->node == q->node2){
  3867. if(r1->rid > r2->rid){
  3868. r2 ++;
  3869. } else if(r1->rid < r2->rid){
  3870. r1 ++;
  3871. } else {
  3872. rid = r1->rid;
  3873. if((closed || (r1->closed == 0 && r2->closed == 0)) && (rdbits == NULL || get_bitvec(rdbits, rid))){
  3874. if(r1->beg < r2->beg){
  3875. if(q->dir1 ^ r1->dir){ r1 ++; r2 ++; continue; }
  3876. beg = r1->beg; end = r2->end;
  3877. push_layregv(regs, (lay_reg_t){rid, 0, beg, end, 0});
  3878. } else {
  3879. if(!(q->dir1 ^ r1->dir)){ r1 ++; r2 ++; continue; }
  3880. beg = r2->beg; end = r1->end;
  3881. push_layregv(regs, (lay_reg_t){rid, 1, beg, end, 0});
  3882. }
  3883. }
  3884. r1 ++; r2 ++;
  3885. }
  3886. }
  3887. }
  3888. typedef struct {
  3889. u4i rid, closed;
  3890. u8i nodes[2];
  3891. } readreg_t;
  3892. define_list(readregv, readreg_t);
  3893. static inline u4i densify_seqlet_graph(Graph *g, seqlet_t *q, seqletv *qs, int minoff, readregv *rds, subnodehash *nodes, subedgev *edges, subnodev *heap, FILE *log){
  3894. seqlet_t *q2;
  3895. node_t *nd;
  3896. read_t *rd;
  3897. reg_t *rg;
  3898. readreg_t *rr;
  3899. subnode_t N, *n, *n1, *n2;
  3900. subedge_t *e;
  3901. u8i idx, edx;
  3902. u4i i, k, k1, k2, d, cov, flg, ret;
  3903. int exists, off;
  3904. if(q->len < minoff){
  3905. push_seqletv(qs, *q);
  3906. return 1;
  3907. }
  3908. clear_readregv(rds);
  3909. for(k=0;k<2;k++){
  3910. nd = ref_nodev(g->nodes, k? q->node2 : q->node1);
  3911. d = k? !q->dir2 : q->dir1;
  3912. for(i=0;i<nd->regs.cnt;i++){
  3913. rg = ref_regv(g->regs, nd->regs.idx + i);
  3914. rr = next_ref_readregv(rds);
  3915. rr->rid = rg->rid;
  3916. rr->closed = 0;
  3917. rr->nodes[d ^ rg->dir] = rg->node;
  3918. rr->nodes[!d ^ rg->dir] = MAX_U8;
  3919. }
  3920. }
  3921. sort_array(rds->buffer, rds->size, readreg_t, num_cmpgt(a.rid, b.rid));
  3922. for(i=1;i<rds->size;i++){
  3923. rr = ref_readregv(rds, i - 1);
  3924. if(rr->rid == rds->buffer[i].rid){
  3925. if(rds->buffer[i].nodes[0] != MAX_U8) rr->nodes[0] = rds->buffer[i].nodes[0];
  3926. if(rds->buffer[i].nodes[1] != MAX_U8) rr->nodes[1] = rds->buffer[i].nodes[1];
  3927. rds->buffer[i].closed = 1; // max occ of rid is 2
  3928. }
  3929. }
  3930. // prepare nodes in subgraph
  3931. clear_subnodehash(nodes);
  3932. clear_subedgev(edges);
  3933. next_ref_subedgev(edges);
  3934. memset(&N, 0, sizeof(subnode_t));
  3935. N.cov = 1;
  3936. N.bt_nidx = MAX_BT_NIDX;
  3937. cov = 0;
  3938. for(i=0;i<rds->size;i++){
  3939. rr = ref_readregv(rds, i);
  3940. if(rr->closed) continue;
  3941. if(rr->nodes[0] != MAX_U8 && rr->nodes[1] != MAX_U8){
  3942. cov ++;
  3943. }
  3944. rd = ref_readv(g->reads, rr->rid);
  3945. flg = (rr->nodes[0] == MAX_U8);
  3946. idx = rd->regs.idx;
  3947. while(idx){
  3948. rg = ref_regv(g->regs, idx);
  3949. idx = rg->read_link;
  3950. if(flg == 0){
  3951. if(rg->node == rr->nodes[0]){
  3952. flg = 1;
  3953. }
  3954. }
  3955. if(!flg) continue;
  3956. N.node = rg->node;
  3957. n = prepare_subnodehash(nodes, N, &exists);
  3958. if(exists){
  3959. n->cov ++;
  3960. } else {
  3961. *n = N;
  3962. }
  3963. if(rg->node == rr->nodes[1]){
  3964. flg = 0;
  3965. break;
  3966. }
  3967. }
  3968. }
  3969. // mask low cov nodes
  3970. reset_iter_subnodehash(nodes);
  3971. while((n = ref_iter_subnodehash(nodes))){
  3972. if(n->cov < cov) n->closed = 1;
  3973. }
  3974. // build edges
  3975. for(i=0;i<rds->size;i++){
  3976. rr = ref_readregv(rds, i);
  3977. if(rr->closed) continue;
  3978. rd = ref_readv(g->reads, rr->rid);
  3979. flg = (rr->nodes[0] == MAX_U8);
  3980. n1 = NULL;
  3981. k1 = 0;
  3982. off = 0;
  3983. idx = rd->regs.idx;
  3984. while(idx){
  3985. rg = ref_regv(g->regs, idx);
  3986. idx = rg->read_link;
  3987. if(flg == 0){
  3988. if(rg->node == rr->nodes[0]){
  3989. flg = 1;
  3990. }
  3991. }
  3992. if(!flg) continue;
  3993. do {
  3994. N.node = rg->node;
  3995. n2 = get_subnodehash(nodes, N);
  3996. k2 = rg->dir;
  3997. if(n2->closed) break;
  3998. if(n1){
  3999. // link n1 to n2
  4000. edx = n1->edges[k1].idx;
  4001. while(edx){
  4002. e = ref_subedgev(edges, edx);
  4003. if(e->node == n2 && e->dir == k2){
  4004. e->cov ++;
  4005. break;
  4006. }
  4007. edx = e->next;
  4008. }
  4009. if(edx == 0){
  4010. edx = edges->size;
  4011. e = next_ref_subedgev(edges);
  4012. e->node = n2;
  4013. e->dir = k2;
  4014. e->cov = 1;
  4015. e->off = rg->beg - off;
  4016. e->next = n1->edges[k1].idx;
  4017. n1->edges[k1].idx = edx;
  4018. n1->edges[k1].cnt ++;
  4019. }
  4020. // link rev n2 to rev n1
  4021. edx = n2->edges[!k2].idx;
  4022. while(edx){
  4023. e = ref_subedgev(edges, edx);
  4024. if(e->node == n1 && e->dir == !k1){
  4025. e->cov ++;
  4026. break;
  4027. }
  4028. edx = e->next;
  4029. }
  4030. if(edx == 0){
  4031. edx = edges->size;
  4032. e = next_ref_subedgev(edges);
  4033. e->node = n1;
  4034. e->dir = !k1;
  4035. e->cov = 1;
  4036. e->off = rg->beg - off;
  4037. e->next = n2->edges[!k2].idx;
  4038. n2->edges[!k2].idx = edx;
  4039. n2->edges[!k2].cnt ++;
  4040. }
  4041. }
  4042. } while(0);
  4043. n1 = n2;
  4044. k1 = k2;
  4045. off = rg->end;
  4046. if(rg->node == rr->nodes[1]){
  4047. flg = 0;
  4048. break;
  4049. }
  4050. }
  4051. }
  4052. // searching a most dense path from q->node1 to q->node2
  4053. N.node = q->node1;
  4054. n = get_subnodehash(nodes, N);
  4055. n->visit = 0;
  4056. n->bt_step = 0;
  4057. n->bt_score = 0;
  4058. n->bt_dir = !q->dir1;
  4059. n->bt_nidx = MAX_BT_NIDX;
  4060. // NB: below codes cannot grant to find the most dense path, but just useful in many cases
  4061. clear_subnodev(heap);
  4062. array_heap_push(heap->buffer, heap->size, heap->cap, subnode_t, *n, num_cmp(a.bt_score, b.bt_score));
  4063. while(heap->size){
  4064. N = heap->buffer[0];
  4065. array_heap_remove(heap->buffer, heap->size, heap->cap, subnode_t, 0, num_cmp(a.bt_score, b.bt_score));
  4066. n1 = get_subnodehash(nodes, N);
  4067. if(n1->visit){
  4068. if(n1->node == q->node2){
  4069. if(n1->bt_step < N.bt_step){
  4070. *n1 = N;
  4071. n1->visit = 1;
  4072. }
  4073. }
  4074. continue;
  4075. } else {
  4076. *n1 = N;
  4077. n1->visit = 1;
  4078. }
  4079. k1 = !n1->bt_dir;
  4080. idx = n1->edges[k1].idx;
  4081. while(idx){
  4082. e = ref_subedgev(edges, idx);
  4083. idx = e->next;
  4084. n2 = e->node;
  4085. if(n2->bt_step >= n1->bt_step + 1){ // init n2->bt_step = 0
  4086. continue;
  4087. }
  4088. if(n2->visit && n2->node != q->node2){
  4089. continue;
  4090. }
  4091. N = *n2;
  4092. N.bt_dir = !e->dir;
  4093. N.bt_nidx = offset_subnodehash(nodes, n1);
  4094. N.bt_step = n1->bt_step + 1;
  4095. N.bt_score = n1->bt_score + g->reglen + num_max(0, e->off);
  4096. array_heap_push(heap->buffer, heap->size, heap->cap, subnode_t, N, num_cmp(a.bt_score, b.bt_score));
  4097. }
  4098. }
  4099. N.node = q->node2;
  4100. n = get_subnodehash(nodes, N);
  4101. if(n->visit == 0){
  4102. #if DEBUG
  4103. if(nodes->count == 1000000){
  4104. fprint_subgraph_dot(g, 0, nodes, edges, "1.dot");
  4105. }
  4106. #endif
  4107. push_seqletv(qs, *q);
  4108. return 1;
  4109. }
  4110. if(n->bt_dir == q->dir2){
  4111. push_seqletv(qs, *q);
  4112. return 1;
  4113. }
  4114. n1 = NULL;
  4115. while(1){
  4116. if(n->bt_nidx == MAX_BT_NIDX){
  4117. push_seqletv(qs, *q);
  4118. return 1;
  4119. }
  4120. n2 = ref_subnodehash(nodes, n->bt_nidx);
  4121. n->bt_dir = !n->bt_dir;
  4122. n->bt_nidx = n1? offset_subnodehash(nodes, n1) : MAX_BT_NIDX;
  4123. if(n->node == q->node1){
  4124. break;
  4125. }
  4126. n1 = n;
  4127. n = n2;
  4128. }
  4129. ret = 0;
  4130. while(1){
  4131. n2 = ref_subnodehash(nodes, n->bt_nidx);
  4132. ret ++;
  4133. q2 = next_ref_seqletv(qs);
  4134. q2->node1 = n->node;
  4135. q2->dir1 = n->bt_dir;
  4136. q2->node2 = n2->node;
  4137. q2->dir2 = n2->bt_dir;
  4138. q2->off = 0;
  4139. off = 0;
  4140. edx = n->edges[n->bt_dir].idx;
  4141. while(edx){
  4142. e = ref_subedgev(edges, edx);
  4143. edx = e->next;
  4144. if(e->node->node == q2->node2){
  4145. off = e->off;
  4146. break;
  4147. }
  4148. }
  4149. q2->len = 2 * g->reglen + off;
  4150. if(n2->node == q->node2){
  4151. break;
  4152. }
  4153. n = n2;
  4154. }
  4155. if(ret > 1 && log){
  4156. fprintf(log, "N%llu -> N%llu edge_len=%d densified into %u\n", (u8i)q->node1, (u8i)q->node2, q->len, ret);
  4157. }
  4158. return ret;
  4159. }
  4160. thread_beg_def(mlay);
  4161. Graph *g;
  4162. seqletv *path;
  4163. u8i div_idx;
  4164. u8i pb, pe;
  4165. layv *lays;
  4166. layregv *regs;
  4167. int all_regs;
  4168. FILE *log;
  4169. thread_end_def(mlay);
  4170. thread_beg_func(mlay);
  4171. subnodehash *nodes;
  4172. subnodev *heap;
  4173. subedgev *edges;
  4174. readregv *rds;
  4175. seqletv *lets;
  4176. BitVec *rdbits;
  4177. seqlet_t *_let, *let;
  4178. lay_t *lay;
  4179. u8i i;
  4180. u4i j;
  4181. lets = init_seqletv(4);
  4182. nodes = init_subnodehash(13);
  4183. edges = init_subedgev(32);
  4184. rds = init_readregv(32);
  4185. heap = init_subnodev(32);
  4186. rdbits = init_bitvec(mlay->g->reads->size);
  4187. thread_beg_loop(mlay);
  4188. clear_layv(mlay->lays);
  4189. clear_layregv(mlay->regs);
  4190. for(i=mlay->pb;i<mlay->pe;i++){
  4191. _let = ref_seqletv(mlay->path, i);
  4192. clear_seqletv(lets);
  4193. densify_seqlet_graph(mlay->g, _let, lets, 12, rds, nodes, edges, heap, mlay->log);
  4194. if(lets->size > 1){
  4195. for(j=0;j<rds->size;j++){
  4196. if(rds->buffer[j].closed == 0){
  4197. one_bitvec(rdbits, rds->buffer[j].rid);
  4198. }
  4199. }
  4200. }
  4201. for(j=0;j<lets->size;j++){
  4202. let = ref_seqletv(lets, j);
  4203. lay = next_ref_layv(mlay->lays);
  4204. lay->edge = *let;
  4205. lay->roff = mlay->regs->size;
  4206. gen_lay_regs_core_graph(mlay->g, let, mlay->regs, lets->size > 1? rdbits : NULL, mlay->all_regs);
  4207. lay->rcnt = mlay->regs->size - lay->roff;
  4208. sort_array(mlay->regs->buffer + lay->roff, lay->rcnt, lay_reg_t, num_cmpgt(b.end - b.beg, a.end - a.beg));
  4209. if(lay->rcnt == 0 && mlay->log){
  4210. thread_beg_syn(mlay);
  4211. fprintf(mlay->log, " -- N%llu(%c) -> N%llu(%c) has no read path --\n", (u8i)let->node1, "+-"[let->dir1], (u8i)let->node2, "+-"[let->dir2]); fflush(mlay->log);
  4212. thread_end_syn(mlay);
  4213. }
  4214. }
  4215. if(lets->size > 1){
  4216. for(j=0;j<rds->size;j++){
  4217. if(rds->buffer[j].closed == 0){
  4218. zero_bitvec(rdbits, rds->buffer[j].rid);
  4219. }
  4220. }
  4221. }
  4222. }
  4223. thread_end_loop(mlay);
  4224. free_subnodev(heap);
  4225. free_readregv(rds);
  4226. free_subedgev(edges);
  4227. free_subnodehash(nodes);
  4228. thread_end_func(mlay);
  4229. static inline u8i print_ctgs_graph(Graph *g, u8i uid, u8i beg, u8i end, char *prefix, char *lay_suffix, u4i ncpu, FILE *log){
  4230. FILE *o_lay;
  4231. BufferedWriter *bw;
  4232. layv *lays;
  4233. layregv *regs;
  4234. seqletv *path;
  4235. u8v *divs;
  4236. seqlet_t *t;
  4237. lay_t *lay;
  4238. lay_reg_t *reg;
  4239. u8i i, pb, pe, d, div_idx, ret;
  4240. u4i j, c, len, bsize, nrun;
  4241. thread_preprocess(mlay);
  4242. o_lay = open_file_for_write(prefix, lay_suffix, 1);
  4243. bw = zopen_bufferedwriter(o_lay, 1024 * 1024, ncpu, 0);
  4244. lays = init_layv(32);
  4245. regs = init_layregv(32);
  4246. thread_beg_init(mlay, ncpu);
  4247. mlay->g = g;
  4248. mlay->path = NULL;
  4249. mlay->lays = init_layv(32);
  4250. mlay->regs = init_layregv(32);
  4251. mlay->pb = 0;
  4252. mlay->pe = 0;
  4253. mlay->div_idx = MAX_U8;
  4254. mlay->all_regs = 1;
  4255. mlay->log = log;
  4256. thread_end_init(mlay);
  4257. ret = 0;
  4258. bsize = 100;
  4259. divs = init_u8v(1024);
  4260. for(i=beg;i<end;i++){
  4261. path = (seqletv*)get_vplist(g->ctgs, i);
  4262. if(path->size == 0) continue;
  4263. len = path->buffer[path->size - 1].off + path->buffer[path->size - 1].len;
  4264. len = len * KBM_BIN_SIZE;
  4265. //clear_and_inc_layv(lays, path->size);
  4266. clear_layv(lays);
  4267. clear_u8v(divs);
  4268. clear_layregv(regs);
  4269. div_idx = 0;
  4270. pe = 0;
  4271. nrun = 0;
  4272. while(1){
  4273. pb = pe;
  4274. pe = num_min(pb + bsize, path->size);
  4275. thread_wait_one(mlay);
  4276. if(mlay->div_idx != MAX_U8){
  4277. nrun --;
  4278. divs->buffer[mlay->div_idx * 2 + 0] = lays->size;
  4279. divs->buffer[mlay->div_idx * 2 + 1] = lays->size + mlay->lays->size;
  4280. for(j=0;j<mlay->lays->size;j++){
  4281. mlay->lays->buffer[j].roff += regs->size;
  4282. }
  4283. append_layv(lays, mlay->lays);
  4284. clear_layv(mlay->lays);
  4285. append_layregv(regs, mlay->regs);
  4286. clear_layregv(mlay->regs);
  4287. mlay->pb = 0;
  4288. mlay->pe = 0;
  4289. mlay->div_idx = MAX_U8;
  4290. }
  4291. if(pb < pe){
  4292. inc_u8v(divs, 2);
  4293. mlay->div_idx = div_idx ++;
  4294. mlay->path = path;
  4295. mlay->pb = pb;
  4296. mlay->pe = pe;
  4297. thread_wake(mlay);
  4298. nrun ++;
  4299. } else if(nrun == 0){
  4300. break;
  4301. }
  4302. }
  4303. //thread_apply_all(mlay, EXPR(mlay->path = path));
  4304. uid ++;
  4305. ret ++;
  4306. {
  4307. beg_bufferedwriter(bw);
  4308. fprintf(bw->out, ">ctg%llu nodes=%llu len=%u\n", uid, (u8i)path->size + 1, len);
  4309. if(log) fprintf(log, "OUTPUT_CTG\tctg%d -> ctg%d nodes=%llu len=%u\n", (int)i, (int)uid, (u8i)path->size + 1, len);
  4310. for(d=0;d<div_idx;d++){
  4311. pb = divs->buffer[d * 2 + 0];
  4312. pe = divs->buffer[d * 2 + 1];
  4313. flush_bufferedwriter(bw);
  4314. for(j=pb;j<pe;j++){
  4315. lay = ref_layv(lays, j);
  4316. if(lay->rcnt == 0) continue;
  4317. t = &lay->edge;
  4318. fprintf(bw->out, "E\t%d\tN%llu\t%c\tN%llu\t%c\n", (int)t->off * KBM_BIN_SIZE, (u8i)t->node1, "+-"[t->dir1], (u8i)t->node2, "+-"[t->dir2]);
  4319. for(c=0;c<lay->rcnt;c++){
  4320. reg = ref_layregv(regs, lay->roff + c);
  4321. fprintf(bw->out, "%c\t%s\t%c\t%d\t%d\t", "Ss"[reg->view], g->kbm->reads->buffer[reg->rid].tag, "+-"[reg->dir], reg->beg * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE);
  4322. if(reg->dir){
  4323. print_revseq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out);
  4324. } else {
  4325. print_seq_basebank(g->kbm->rdseqs, (g->kbm->reads->buffer[reg->rid].seqoff + reg->beg) * KBM_BIN_SIZE, (reg->end - reg->beg) * KBM_BIN_SIZE, bw->out);
  4326. }
  4327. fprintf(bw->out, "\n");
  4328. }
  4329. }
  4330. }
  4331. end_bufferedwriter(bw);
  4332. }
  4333. }
  4334. close_bufferedwriter(bw);
  4335. fclose(o_lay);
  4336. thread_beg_close(mlay);
  4337. free_layregv(mlay->regs);
  4338. free_layv(mlay->lays);
  4339. thread_end_close(mlay);
  4340. fprintf(KBM_LOGF, "[%s] output %u contigs\n", date(), (u4i)ret);
  4341. free_layv(lays);
  4342. free_layregv(regs);
  4343. free_u8v(divs);
  4344. return uid;
  4345. }
  4346. static inline u4i print_traces_graph(Graph *g, tracev *path, FILE *out){
  4347. String *str;
  4348. trace_t *t1, *t2;
  4349. node_t *n1, *n2;
  4350. reg_t *r1, *r2;
  4351. edge_ref_t *f;
  4352. edge_t *e;
  4353. int offset, fst;
  4354. u8i beg, end;
  4355. u4i i, rid;
  4356. if(path->size < 2) return 0;
  4357. str = init_string(1024);
  4358. offset = 0;
  4359. t1 = ref_tracev(path, 0);
  4360. for(i=1;i<path->size;i++){
  4361. t2 = ref_tracev(path, i);
  4362. {
  4363. n1 = ref_nodev(g->nodes, t1->node);
  4364. n2 = ref_nodev(g->nodes, t2->node);
  4365. f = t1->edges + t1->dir;
  4366. e = ref_edgev(g->edges, f->idx);
  4367. r1 = ref_regv(g->regs, n1->regs.idx);
  4368. r2 = ref_regv(g->regs, n2->regs.idx);
  4369. fprintf(out, "E\t%d\tN%llu\t%c\tN%llu\t%c\n", offset, t1->node, "+-"[t1->dir], t2->node, "+-"[t2->dir]);
  4370. fst = 1;
  4371. while(r1->node == t1->node && r2->node == t2->node){
  4372. if(r1->rid > r2->rid){
  4373. r2 ++;
  4374. } else if(r1->rid < r2->rid){
  4375. r1 ++;
  4376. } else {
  4377. rid = r1->rid;
  4378. if(r1->beg < r2->beg){
  4379. if(t1->dir ^ r1->dir){ r1 ++; r2 ++; continue; }
  4380. beg = r1->beg * KBM_BIN_SIZE; end = r2->end * KBM_BIN_SIZE;
  4381. fprintf(out, "S\t%s\t", g->kbm->reads->buffer[rid].tag);
  4382. fprintf(out, "+\t%d\t%d\t", (int)beg, (int)(end - beg));
  4383. encap_string(str, end - beg);
  4384. fwdseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].seqoff * KBM_BIN_SIZE + beg, end - beg, str->string);
  4385. fputs(str->string, out);
  4386. } else {
  4387. if(!(t1->dir ^ r1->dir)){ r1 ++; r2 ++; continue; }
  4388. beg = r2->beg * KBM_BIN_SIZE; end = r1->end * KBM_BIN_SIZE;
  4389. fprintf(out, "S\t%s\t", g->kbm->reads->buffer[rid].tag);
  4390. fprintf(out, "-\t%d\t%d\t", (int)beg, (int)(end - beg));
  4391. encap_string(str, end - beg);
  4392. revseq_basebank(g->kbm->rdseqs, g->kbm->reads->buffer[rid].seqoff * KBM_BIN_SIZE + beg, end - beg, str->string);
  4393. fputs(str->string, out);
  4394. }
  4395. fputc('\n', out);
  4396. if(fst){
  4397. offset += end - beg; fst = 0;
  4398. }
  4399. r1 ++; r2 ++;
  4400. }
  4401. }
  4402. }
  4403. t1 = t2;
  4404. }
  4405. free_string(str);
  4406. return offset;
  4407. }
  4408. static inline u8i print_utgs_graph(Graph *g, char *prefix, char *utg, char *lay){
  4409. FILE *o_seq, *o_lay, *files[4];
  4410. tracev *path;
  4411. String *seq;
  4412. char *str;
  4413. u8i i, uid, cnt, tot;
  4414. char ch;
  4415. int beg, end;
  4416. files[0] = open_file_for_write(prefix, utg, 1);
  4417. str = catstr(2, utg, ".filtered");
  4418. files[1] = open_file_for_write(prefix, str, 1);
  4419. free(str);
  4420. files[2] = open_file_for_write(prefix, lay, 1);
  4421. str = catstr(2, lay, ".filtered");
  4422. files[3] = open_file_for_write(prefix, str, 1);
  4423. free(str);
  4424. seq = init_string(1024);
  4425. tot = cnt = 0;
  4426. for(i=uid=0;i<g->utgs->size;i++){
  4427. path = (tracev*)get_vplist(g->utgs, i);
  4428. if(gen_seq_traces_graph(g, path, seq) < g->min_ctg_len || (int)path->size < g->min_ctg_nds){
  4429. o_seq = files[1];
  4430. o_lay = files[3];
  4431. } else {
  4432. o_seq = files[0];
  4433. o_lay = files[2];
  4434. cnt ++;
  4435. tot += seq->size;
  4436. }
  4437. uid ++;
  4438. fprintf(o_seq, ">utg%llu len=%d nodes=%llu beg=N%llu end=N%llu\n", (unsigned long long)uid, seq->size, (unsigned long long)path->size,
  4439. path->buffer[0].node, path->buffer[path->size - 1].node);
  4440. for(beg=0;beg<seq->size;beg+=100){
  4441. end = beg + 100;
  4442. if(end > seq->size) end = seq->size;
  4443. ch = seq->string[end];
  4444. seq->string[end] = '\0';
  4445. fprintf(o_seq, "%s\n", seq->string + beg);
  4446. seq->string[end] = ch;
  4447. }
  4448. fprintf(o_lay, ">utg%llu len=%d nodes=%llu\n", (unsigned long long)uid, seq->size, (unsigned long long)path->size);
  4449. print_traces_graph(g, path, o_lay);
  4450. }
  4451. free_string(seq);
  4452. fprintf(KBM_LOGF, "[%s] %llu unitigs (>= %d bp), total %llu bp\n", date(), (unsigned long long)cnt, g->min_ctg_len, (unsigned long long)tot);
  4453. fclose(files[0]);
  4454. fclose(files[1]);
  4455. fclose(files[2]);
  4456. fclose(files[3]);
  4457. return uid;
  4458. }
  4459. /*
  4460. * For debug in GDB
  4461. * local_dot_node, local_dot_step, and print_local_dot_graph()
  4462. */
  4463. static u8i local_dot_node = 1;
  4464. static u4i local_dot_step = 10;
  4465. static inline void get_subgraph_nodes_graph(Graph *g, ptrrefhash *nodes, u8v *stack, uint16_t max_step, u4i closed_val){
  4466. node_t *n;
  4467. edge_ref_t *f;
  4468. edge_t *e;
  4469. ptr_ref_t *p, *pp;
  4470. u8i nid, idx;
  4471. u4i k, cnt;
  4472. int exists;
  4473. clear_u8v(stack);
  4474. reset_iter_ptrrefhash(nodes);
  4475. while((p = ref_iter_ptrrefhash(nodes))){
  4476. p->cnt = 0;
  4477. push_u8v(stack, p->idx);
  4478. }
  4479. while(stack->size){
  4480. p = get_ptrrefhash(nodes, (ptr_ref_t){stack->buffer[--stack->size], 0});
  4481. if(p->cnt >> 16) continue;
  4482. if((p->cnt & 0xFFFF) >= max_step) continue;
  4483. n = ref_nodev(g->nodes, p->idx);
  4484. cnt = p->cnt;
  4485. p->cnt |= 1U << 16;
  4486. for(k=0;k<2;k++){
  4487. idx = n->edges[k].idx;
  4488. while(idx){
  4489. f = ref_edgerefv(g->erefs, idx);
  4490. idx = f->next;
  4491. e = ref_edgev(g->edges, f->idx);
  4492. if(e->closed >= closed_val) continue;
  4493. nid = f->flg? e->node1 : e->node2;
  4494. pp = prepare_ptrrefhash(nodes, (ptr_ref_t){nid, 0}, &exists);
  4495. if(exists) continue;
  4496. pp->idx = nid; pp->cnt = cnt + 1;
  4497. push_u8v(stack, nid);
  4498. }
  4499. }
  4500. }
  4501. }
  4502. static inline u8i print_local_dot_graph(Graph *g, char *prefix, char *suffix){
  4503. FILE *out;
  4504. ptrrefhash *hash;
  4505. u8v *stack;
  4506. ptr_ref_t *p;
  4507. node_t *n;
  4508. reg_t *r, *rr;
  4509. edge_ref_t *f;
  4510. edge_t *e;
  4511. unsigned long long i, idx;
  4512. u4i j, k, max;
  4513. out = open_file_for_write(prefix, suffix, 1);
  4514. hash = init_ptrrefhash(1023);
  4515. stack = init_u8v(32);
  4516. put_ptrrefhash(hash, (ptr_ref_t){local_dot_node, 0});
  4517. get_subgraph_nodes_graph(g, hash, stack, local_dot_step, 1);
  4518. fprintf(out, "digraph {\n");
  4519. fprintf(out, "node [shape=record]\n");
  4520. reset_iter_ptrrefhash(hash);
  4521. while((p = ref_iter_ptrrefhash(hash))){
  4522. i = p->idx;
  4523. n = ref_nodev(g->nodes, i);
  4524. r = NULL; max = 0;
  4525. for(j=0;j<n->regs.cnt;j++){
  4526. rr = ref_regv(g->regs, n->regs.idx + j);
  4527. if(g->reads->buffer[rr->rid].regs.cnt > max){
  4528. r = rr;
  4529. max = g->reads->buffer[rr->rid].regs.cnt;
  4530. }
  4531. }
  4532. if(r == NULL) continue;
  4533. fprintf(out, "N%llu [label=\"{N%llu %d | %s | %c_%d_%d}\"]\n", i, i, n->regs.cnt, g->kbm->reads->buffer[r->rid].tag, "FR"[r->dir], r->beg, r->end - r->beg);
  4534. }
  4535. reset_iter_ptrrefhash(hash);
  4536. while((p = ref_iter_ptrrefhash(hash))){
  4537. i = p->idx;
  4538. n = ref_nodev(g->nodes, i);
  4539. for(k=0;k<2;k++){
  4540. idx = n->edges[k].idx;
  4541. while(idx){
  4542. f = ref_edgerefv(g->erefs, idx);
  4543. idx = f->next;
  4544. e = g->edges->buffer + f->idx;
  4545. if(e->closed) continue;
  4546. if(f->flg){
  4547. //if(!exists_ptrrefhash(hash, (ptr_ref_t){e->node1, 0})) continue;
  4548. fprintf(out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", i, (unsigned long long)e->node1, "+-"[k], "+-"[!e->dir1], e->cov, e->off, colors[k][!e->dir1]);
  4549. } else {
  4550. //if(!exists_ptrrefhash(hash, (ptr_ref_t){e->node2, 0})) continue;
  4551. fprintf(out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", i, (unsigned long long)e->node2, "+-"[k], "+-"[e->dir2], e->cov, e->off, colors[k][e->dir2]);
  4552. }
  4553. }
  4554. }
  4555. }
  4556. fprintf(out, "}\n");
  4557. fclose(out);
  4558. return 0;
  4559. }
  4560. static inline u8i print_dot_full_graph(Graph *g, FILE *out){
  4561. BufferedWriter *bw;
  4562. node_t *n;
  4563. reg_t *r, *rr;
  4564. edge_ref_t *f;
  4565. edge_t *e;
  4566. unsigned long long i, idx;
  4567. u4i j, k, max;
  4568. bw = zopen_bufferedwriter(out, 1024 * 1024, 8, 0);
  4569. beg_bufferedwriter(bw);
  4570. fprintf(bw->out, "digraph {\n");
  4571. fprintf(bw->out, "node [shape=record]\n");
  4572. for(i=0;i<g->nodes->size;i++){
  4573. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4574. n = ref_nodev(g->nodes, i);
  4575. //if(n->closed) continue;
  4576. r = NULL; max = 0;
  4577. for(j=0;j<n->regs.cnt;j++){
  4578. rr = ref_regv(g->regs, n->regs.idx + j);
  4579. if(g->reads->buffer[rr->rid].regs.cnt > max){
  4580. r = rr;
  4581. max = g->reads->buffer[rr->rid].regs.cnt;
  4582. }
  4583. }
  4584. if(r == NULL) continue;
  4585. fprintf(bw->out, "N%llu [label=\"{N%llu %d | %s | %c_%d_%d}\"]\n", i, i, n->regs.cnt, g->kbm->reads->buffer[r->rid].tag, "FR"[r->dir], r->beg, r->end - r->beg);
  4586. }
  4587. for(i=0;i<g->nodes->size;i++){
  4588. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4589. n = ref_nodev(g->nodes, i);
  4590. //if(n->closed) continue;
  4591. for(k=0;k<2;k++){
  4592. idx = n->edges[k].idx;
  4593. while(idx){
  4594. f = ref_edgerefv(g->erefs, idx);
  4595. idx = f->next;
  4596. e = g->edges->buffer + f->idx;
  4597. //if(e->closed) continue;
  4598. if(f->flg){
  4599. fprintf(bw->out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s%s]\n", i, (unsigned long long)e->node1, "+-"[k], "+-"[!e->dir1], e->cov, e->off, colors[k][!e->dir1], e->closed? " style=dashed" : "");
  4600. } else {
  4601. fprintf(bw->out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s%s]\n", i, (unsigned long long)e->node2, "+-"[k], "+-"[e->dir2], e->cov, e->off, colors[k][e->dir2], e->closed? " style=dashed" : "");
  4602. }
  4603. }
  4604. }
  4605. }
  4606. fprintf(bw->out, "}\n");
  4607. end_bufferedwriter(bw);
  4608. close_bufferedwriter(bw);
  4609. return 0;
  4610. }
  4611. static inline u8i print_dot_graph(Graph *g, FILE *out){
  4612. BufferedWriter *bw;
  4613. node_t *n;
  4614. reg_t *r, *rr;
  4615. edge_ref_t *f;
  4616. edge_t *e;
  4617. unsigned long long i, idx;
  4618. u4i j, k, max;
  4619. bw = zopen_bufferedwriter(out, 1024 * 1024, 8, 0);
  4620. beg_bufferedwriter(bw);
  4621. fprintf(bw->out, "digraph {\n");
  4622. fprintf(bw->out, "node [shape=record]\n");
  4623. for(i=0;i<g->nodes->size;i++){
  4624. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4625. n = ref_nodev(g->nodes, i);
  4626. if(n->closed) continue;
  4627. r = NULL; max = 0;
  4628. for(j=0;j<n->regs.cnt;j++){
  4629. rr = ref_regv(g->regs, n->regs.idx + j);
  4630. if(g->reads->buffer[rr->rid].regs.cnt > max){
  4631. r = rr;
  4632. max = g->reads->buffer[rr->rid].regs.cnt;
  4633. }
  4634. }
  4635. if(r == NULL) continue;
  4636. fprintf(bw->out, "N%llu [label=\"{N%llu %d | %s | %c_%d_%d}\"]\n", i, i, n->regs.cnt, g->kbm->reads->buffer[r->rid].tag, "FR"[r->dir], r->beg, r->end - r->beg);
  4637. }
  4638. for(i=0;i<g->nodes->size;i++){
  4639. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4640. n = ref_nodev(g->nodes, i);
  4641. if(n->closed) continue;
  4642. for(k=0;k<2;k++){
  4643. idx = n->edges[k].idx;
  4644. while(idx){
  4645. f = ref_edgerefv(g->erefs, idx);
  4646. idx = f->next;
  4647. e = g->edges->buffer + f->idx;
  4648. if(e->closed) continue;
  4649. if(f->flg){
  4650. fprintf(bw->out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", i, (unsigned long long)e->node1, "+-"[k], "+-"[!e->dir1], e->cov, e->off, colors[k][!e->dir1]);
  4651. } else {
  4652. fprintf(bw->out, "N%llu -> N%llu [label=\"%c%c:%d:%d\" color=%s]\n", i, (unsigned long long)e->node2, "+-"[k], "+-"[e->dir2], e->cov, e->off, colors[k][e->dir2]);
  4653. }
  4654. }
  4655. }
  4656. }
  4657. fprintf(bw->out, "}\n");
  4658. end_bufferedwriter(bw);
  4659. close_bufferedwriter(bw);
  4660. return 0;
  4661. }
  4662. static inline u8i print_nodes_graph(Graph *g, FILE *out){
  4663. node_t *n;
  4664. reg_t *r;
  4665. unsigned long long i;
  4666. u4i j;
  4667. for(i=0;i<g->nodes->size;i++){
  4668. n = ref_nodev(g->nodes, i);
  4669. if(n->closed){
  4670. fprintf(out, "N%llu*\t%u", i, n->regs.cnt);
  4671. } else {
  4672. fprintf(out, "N%llu\t%u", i, n->regs.cnt);
  4673. }
  4674. for(j=0;j<n->regs.cnt;j++){
  4675. r = ref_regv(g->regs, n->regs.idx + j);
  4676. fprintf(out, "\t%s_%c_%d_%d", g->kbm->reads->buffer[r->rid].tag, "FR"[r->dir], r->beg, r->end - r->beg);
  4677. if(r->closed) fputc('*', out);
  4678. }
  4679. fprintf(out, "\n");
  4680. }
  4681. return i;
  4682. }
  4683. static inline u8i print_reads_graph(Graph *g, FILE *out){
  4684. read_t *rd;
  4685. reg_t *r;
  4686. u8i idx;
  4687. u4i i;
  4688. for(i=0;i<g->kbm->reads->size;i++){
  4689. rd = ref_readv(g->reads, i);
  4690. fprintf(out, "%s\t%d\t%u", g->kbm->reads->buffer[i].tag, g->kbm->reads->buffer[i].bincnt * KBM_BIN_SIZE, rd->regs.cnt);
  4691. idx = rd->regs.idx;
  4692. while(idx){
  4693. r = ref_regv(g->regs, idx);
  4694. fprintf(out, "\tN%llu%s:%c_%d_%d", (unsigned long long)r->node, (r->closed? "*" : (g->nodes->buffer[r->node].closed? "!" : "")), "FR"[r->dir], r->beg, r->end - r->beg);
  4695. idx = r->read_link;
  4696. }
  4697. fprintf(out, "\n");
  4698. }
  4699. return i;
  4700. }
  4701. static inline u8i print_frgs_nodes_graph(Graph *g, FILE *out){
  4702. frg_t *frg;
  4703. trace_t *t;
  4704. node_t *n;
  4705. u4i i, j;
  4706. for(i=0;i<g->frgs->size;i++){
  4707. frg = ref_frgv(g->frgs, i);
  4708. if(frg->closed) continue;
  4709. fprintf(out, "F%u\t%d\t%d\t%u\t%u", i, frg->length, frg->len, frg->tcnt, frg->ty - frg->tx);
  4710. for(j=0;j<frg->tcnt;j++){
  4711. t = ref_tracev(g->traces, frg->toff + j);
  4712. if(j < frg->tx || j >= frg->ty){
  4713. n = ref_nodev(g->nodes, t->node);
  4714. if(n->rep_idx == MAX_REP_IDX){
  4715. fprintf(out, "\tn%llu:%c:%d:::%d", t->node, "+-"[t->dir], t->off, t->cov);
  4716. } else {
  4717. fprintf(out, "\tn%llu:%c:%d:F%llu:%c:%d", t->node, "+-"[t->dir], t->off, (u8i)n->rep_idx, "+-"[n->rep_dir], t->cov);
  4718. }
  4719. } else {
  4720. fprintf(out, "\tN%llu:%c:%d", t->node, "+-"[t->dir], t->off);
  4721. }
  4722. }
  4723. fprintf(out, "\n");
  4724. }
  4725. return i;
  4726. }
  4727. static inline u8i print_frgs_dot_graph(Graph *g, FILE *_out){
  4728. BufferedWriter *bw;
  4729. frg_t *frg;
  4730. trace_t *t1, *t2;
  4731. node_t *n1, *n2;
  4732. reg_t *r1, *r2, *rr;
  4733. edge_ref_t *f;
  4734. lnk_t *e;
  4735. unsigned long long i, idx;
  4736. u4i j, k, max;
  4737. bw = zopen_bufferedwriter(_out, 1024 * 1024, 8, 0);
  4738. beg_bufferedwriter(bw);
  4739. fprintf(bw->out, "digraph {\n");
  4740. fprintf(bw->out, "node [shape=record]\n");
  4741. for(i=0;i<g->frgs->size;i++){
  4742. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4743. frg = ref_frgv(g->frgs, i);
  4744. if(frg->closed){
  4745. continue;
  4746. }
  4747. //if(frg->ty - frg->tx < (u4i)g->min_ctg_nds) continue;
  4748. t1 = ref_tracev(g->traces, frg->toff + frg->tx);
  4749. t2 = ref_tracev(g->traces, frg->toff + frg->ty - 1);
  4750. n1 = ref_nodev(g->nodes, t1->node);
  4751. n2 = ref_nodev(g->nodes, t2->node);
  4752. r1 = NULL; max = 0;
  4753. for(j=0;j<n1->regs.cnt;j++){
  4754. rr = ref_regv(g->regs, n1->regs.idx + j);
  4755. if(g->reads->buffer[rr->rid].regs.cnt > max){
  4756. r1 = rr;
  4757. max = g->reads->buffer[rr->rid].regs.cnt;
  4758. }
  4759. }
  4760. if(r1 == NULL){
  4761. continue;
  4762. }
  4763. r2 = NULL; max = 0;
  4764. for(j=0;j<n2->regs.cnt;j++){
  4765. rr = ref_regv(g->regs, n2->regs.idx + j);
  4766. if(g->reads->buffer[rr->rid].regs.cnt > max){
  4767. r2 = rr;
  4768. max = g->reads->buffer[rr->rid].regs.cnt;
  4769. }
  4770. }
  4771. if(r2 == NULL){
  4772. continue;
  4773. }
  4774. fprintf(bw->out, "F%llu [label=\"{F%llu %u %u/%u | { {N%llu:%c | %s | %c_%d_%d} | {N%llu:%c | %s | %c_%d_%d}}}\"]\n", i, i, frg->ty - frg->tx, frg->len, frg->length,
  4775. t1->node, "+-"[t1->dir], g->kbm->reads->buffer[r1->rid].tag, "FR"[r1->dir], r1->beg, r1->end - r1->beg,
  4776. t2->node, "+-"[t2->dir], g->kbm->reads->buffer[r2->rid].tag, "FR"[r2->dir], r2->beg, r2->end - r2->beg);
  4777. }
  4778. for(i=0;i<g->frgs->size;i++){
  4779. if((i % 1000) == 0) flush_bufferedwriter(bw);
  4780. frg = ref_frgv(g->frgs, i);
  4781. if(frg->closed) continue;
  4782. //if(frg->ty - frg->tx < (u4i)g->min_ctg_nds) continue;
  4783. for(k=0;k<2;k++){
  4784. idx = frg->lnks[k].idx;
  4785. while(idx){
  4786. f = ref_edgerefv(g->lrefs, idx);
  4787. idx = f->next;
  4788. e = g->lnks->buffer + f->idx;
  4789. if(e->closed) continue;
  4790. if(f->flg){
  4791. //if(g->frgs->buffer[e->frg1].ty - g->frgs->buffer[e->frg1].tx < (u4i)g->min_ctg_nds) continue;
  4792. fprintf(bw->out, "F%llu -> F%llu [label=\"%c%c:%d:%d\" color=%s style=%s]\n", i, (u8i)e->frg1, "+-"[k], "+-"[!e->dir1], e->cov, e->off, colors[k][!e->dir1], e->weak? "dashed" : "solid");
  4793. } else {
  4794. //if(g->frgs->buffer[e->frg2].ty - g->frgs->buffer[e->frg2].tx < (u4i)g->min_ctg_nds) continue;
  4795. fprintf(bw->out, "F%llu -> F%llu [label=\"%c%c:%d:%d\" color=%s style=%s]\n", i, (u8i)e->frg2, "+-"[k], "+-"[e->dir2], e->cov, e->off, colors[k][e->dir2], e->weak? "dashed" : "solid");
  4796. }
  4797. }
  4798. }
  4799. }
  4800. fprintf(bw->out, "}\n");
  4801. end_bufferedwriter(bw);
  4802. close_bufferedwriter(bw);
  4803. return 0;
  4804. }
  4805. typedef u8i (*graph_print_func)(Graph *g, FILE *out);
  4806. static inline u8i generic_print_graph(Graph *g, graph_print_func func, char *prefix, char *suffix){
  4807. FILE *out;
  4808. char *file;
  4809. u8i cnt;
  4810. {
  4811. fprintf(KBM_LOGF, "[%s] output \"%s%s\".", date(), prefix, suffix? suffix : ""); fflush(KBM_LOGF);
  4812. file = malloc(strlen(prefix) + (suffix? strlen(suffix) : 0) + 2);
  4813. sprintf(file, "%s%s", prefix, suffix? suffix : "");
  4814. out = fopen(file, "w");
  4815. cnt = func(g, out);
  4816. fclose(out);
  4817. free(file);
  4818. fprintf(KBM_LOGF, " Done.\n"); fflush(KBM_LOGF);
  4819. }
  4820. return cnt;
  4821. }
  4822. #endif