generate_read_to_ctg_map.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import argparse
  2. import logging
  3. import sys
  4. from ..util import io
  5. from ..fc_asm_graph import AsmGraph
  6. def run(rawread_id_fn, pread_id_fn, sg_edges_list_fn, utg_data_fn, ctg_paths_fn, output_fn):
  7. read_to_contig_map = output_fn
  8. pread_did_to_rid = open(pread_id_fn).read().split('\n')
  9. rid_to_oid = open(rawread_id_fn).read().split('\n')
  10. asm_G = AsmGraph(sg_edges_list_fn,
  11. utg_data_fn,
  12. ctg_paths_fn)
  13. pread_to_contigs = {}
  14. with open(read_to_contig_map, 'w') as f:
  15. for ctg in asm_G.ctg_data:
  16. if ctg[-1] == 'R':
  17. continue
  18. ctg_g = asm_G.get_sg_for_ctg(ctg)
  19. for n in ctg_g.nodes():
  20. pid = int(n.split(':')[0])
  21. rid = pread_did_to_rid[pid].split('/')[1]
  22. rid = int(int(rid) // 10)
  23. oid = rid_to_oid[rid]
  24. k = (pid, rid, oid)
  25. pread_to_contigs.setdefault(k, set())
  26. pread_to_contigs[k].add(ctg)
  27. for k in pread_to_contigs:
  28. pid, rid, oid = k
  29. for ctg in list(pread_to_contigs[k]):
  30. print('%09d %09d %s %s' % (pid, rid, oid, ctg), file=f)
  31. class HelpF(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
  32. pass
  33. def parse_args(argv):
  34. description = 'Generate read_to_ctg_map from rawread_id file and pread_id file'
  35. epilog = ''
  36. parser = argparse.ArgumentParser(
  37. description=description,
  38. epilog=epilog,
  39. formatter_class=HelpF,
  40. )
  41. parser.add_argument(
  42. '--rawread-id-fn',
  43. required=True,
  44. help='From TASK_DUMP_RAWREAD_IDS_SCRIPT',
  45. )
  46. parser.add_argument(
  47. '--pread-id-fn',
  48. required=True,
  49. help='From TASK_DUMP_PREAD_IDS_SCRIPT',
  50. )
  51. parser.add_argument(
  52. '--sg-edges-list-fn',
  53. required=True,
  54. help='From Falcon stage 2-asm-falcon',
  55. )
  56. parser.add_argument(
  57. '--utg-data-fn',
  58. required=True,
  59. help='From Falcon stage 2-asm-falcon',
  60. )
  61. parser.add_argument(
  62. '--ctg-paths-fn',
  63. required=True,
  64. help='From Falcon stage 2-asm-falcon',
  65. )
  66. parser.add_argument(
  67. '--output-fn',
  68. required=True,
  69. help='read-to-ctg-map',
  70. )
  71. args = parser.parse_args(argv[1:])
  72. return args
  73. def main(argv=sys.argv):
  74. args = parse_args(argv)
  75. logging.basicConfig(level=logging.INFO)
  76. run(**vars(args))
  77. if __name__ == '__main__': # pragma: no cover
  78. main()