ctg_link_analysis.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. from falcon_kit import fc_asm_graph
  2. import sys
  3. def main(argv=sys.argv):
  4. AsmGraph = fc_asm_graph.AsmGraph
  5. G_asm = AsmGraph("sg_edges_list", "utg_data", "ctg_paths")
  6. sg_edges = G_asm.sg_edges
  7. node_to_ctg = G_asm.node_to_ctg
  8. node_to_utg = G_asm.node_to_utg
  9. ctg_data = G_asm.ctg_data
  10. utg_data = G_asm.utg_data
  11. ctg_pair_links = {}
  12. for (v, w) in list(sg_edges.keys()):
  13. if v in node_to_ctg and w in node_to_ctg:
  14. for ctg1 in list(node_to_ctg[v]):
  15. for ctg2 in list(node_to_ctg[w]):
  16. if ctg1 == ctg2:
  17. continue
  18. ctg_pair_links.setdefault((ctg1, ctg2), set())
  19. ctg_pair_links[(ctg1, ctg2)].add((v, w))
  20. utg_pair_links = {}
  21. for (v, w) in list(sg_edges.keys()):
  22. if v in node_to_utg and w in node_to_utg:
  23. for u1 in list(node_to_utg[v]):
  24. for u2 in list(node_to_utg[w]):
  25. if u1 == u2:
  26. continue
  27. utg_pair_links.setdefault((u1, u2), set())
  28. utg_pair_links[(u1, u2)].add((v, w))
  29. for ctg1, ctg2 in ctg_pair_links:
  30. links = ctg_pair_links[(ctg1, ctg2)]
  31. count = len(links)
  32. if count > 0:
  33. path1 = ctg_data[ctg1][-1][-5:]
  34. path2 = ctg_data[ctg2][-1][:5]
  35. utg1 = []
  36. utg2 = []
  37. for s1, v1, t1 in path1:
  38. u1 = (s1, t1, v1)
  39. type_, length, score, path_or_edges = utg_data[u1]
  40. if type_ == "compound":
  41. for u in path_or_edges.split("|"):
  42. ss, vv, tt = u.split("~")
  43. utg1.append((ss, tt, vv))
  44. else:
  45. utg1.append(u1)
  46. for s2, v2, t2 in path2:
  47. u2 = (s2, t2, v2)
  48. type_, length, score, path_or_edges = utg_data[u2]
  49. if type_ == "compound":
  50. for u in path_or_edges.split("|"):
  51. ss, vv, tt = u.split("~")
  52. utg2.append((ss, tt, vv))
  53. else:
  54. utg2.append(u2)
  55. # print path1
  56. # print path2
  57. # print len(utg1), len(utg2)
  58. for u1 in utg1:
  59. for u2 in utg2:
  60. u1 = tuple(u1)
  61. u2 = tuple(u2)
  62. c = utg_pair_links.get((u1, u2), set())
  63. if len(c) == 0:
  64. continue
  65. s1, t1, v1 = u1
  66. s2, t2, v2 = u2
  67. len_1 = ctg_data[ctg1][3]
  68. len_2 = ctg_data[ctg2][3]
  69. print('{} {} {:7d}\t{:7d}\t{}\t{}\t{}\t{} {} {}'.format(
  70. ctg1, ctg2, len_1, len_2, len(utg1), len(utg2), len(links), "~".join((s1, v1, t1)), "~".join((s2, v2, t2)), len(c)))
  71. if __name__ == "__main__":
  72. main(sys.argv)