get_read_ctg_map.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from .. import pype_tasks
  2. # pylint: disable=no-name-in-module, import-error, fixme, line-too-long
  3. from pypeflow.simple_pwatcher_bridge import (PypeProcWatcherWorkflow, MyFakePypeThreadTaskBase,
  4. makePypeLocalFile, fn, PypeTask)
  5. PypeThreadTaskBase = MyFakePypeThreadTaskBase
  6. import argparse
  7. import glob
  8. import logging
  9. import sys
  10. import subprocess as sp
  11. import shlex
  12. import os
  13. LOG = logging.getLogger(__name__)
  14. def make_dirs(d):
  15. if not os.path.isdir(d):
  16. LOG.debug('mkdirs {}'.format(d))
  17. os.makedirs(d)
  18. def get_read_ctg_map(rawread_dir, pread_dir, asm_dir):
  19. read_map_dir = os.path.abspath(os.path.join(asm_dir, 'read_maps'))
  20. make_dirs(read_map_dir)
  21. wf = PypeProcWatcherWorkflow(
  22. max_jobs=12,
  23. )
  24. """
  25. job_type=config['job_type'],
  26. job_queue=config['job_queue'],
  27. sge_option=config.get('sge_option', ''),
  28. watcher_type=config['pwatcher_type'],
  29. watcher_directory=config['pwatcher_directory'])
  30. """
  31. rawread_db = makePypeLocalFile(os.path.join(rawread_dir, 'raw_reads.db'))
  32. rawread_id_file = makePypeLocalFile(os.path.join(
  33. read_map_dir, 'dump_rawread_ids', 'rawread_ids'))
  34. task = PypeTask(
  35. inputs={'rawread_db': rawread_db},
  36. outputs={'rawread_id_file': rawread_id_file},
  37. )
  38. wf.addTask(task(pype_tasks.task_dump_rawread_ids))
  39. pread_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db'))
  40. pread_id_file = makePypeLocalFile(os.path.join(
  41. read_map_dir, 'dump_pread_ids', 'pread_ids'))
  42. task = PypeTask(
  43. inputs={'pread_db': pread_db},
  44. outputs={'pread_id_file': pread_id_file},
  45. )
  46. wf.addTask(task(pype_tasks.task_dump_pread_ids))
  47. wf.refreshTargets() # block
  48. sg_edges_list = makePypeLocalFile(os.path.join(asm_dir, 'sg_edges_list'))
  49. utg_data = makePypeLocalFile(os.path.join(asm_dir, 'utg_data'))
  50. ctg_paths = makePypeLocalFile(os.path.join(asm_dir, 'ctg_paths'))
  51. inputs = {'rawread_id_file': rawread_id_file,
  52. 'pread_id_file': pread_id_file,
  53. 'sg_edges_list': sg_edges_list,
  54. 'utg_data': utg_data,
  55. 'ctg_paths': ctg_paths}
  56. read_to_contig_map = makePypeLocalFile(os.path.join(
  57. read_map_dir, 'get_ctg_read_map', 'read_to_contig_map'))
  58. task = PypeTask(
  59. inputs=inputs,
  60. outputs={'read_to_contig_map': read_to_contig_map},
  61. )
  62. wf.addTask(task(pype_tasks.task_generate_read_to_ctg_map))
  63. wf.refreshTargets() # block
  64. def parse_args(argv):
  65. parser = argparse.ArgumentParser(description='generate `2-asm-falcon/read_maps/read_to_contig_map` that contains the \
  66. information from the chain of mapping: (contig id) -> (internal p-read id) -> (internal raw-read id) -> (original read id)',
  67. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  68. parser.add_argument('--basedir', type=str, default='./',
  69. help='the base working dir of a FALCON assembly')
  70. args = parser.parse_args(argv[1:])
  71. return args
  72. def main(argv=sys.argv):
  73. logging.basicConfig()
  74. args = parse_args(argv)
  75. basedir = args.basedir
  76. rawread_dir = os.path.abspath(os.path.join(basedir, '0-rawreads'))
  77. pread_dir = os.path.abspath(os.path.join(basedir, '1-preads_ovl'))
  78. asm_dir = os.path.abspath(os.path.join(basedir, '2-asm-falcon'))
  79. get_read_ctg_map(rawread_dir=rawread_dir,
  80. pread_dir=pread_dir, asm_dir=asm_dir)
  81. if __name__ == '__main__':
  82. main()