task_report_pre_assembly.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import argparse
  2. import logging
  3. import os
  4. import sys
  5. from .. import io
  6. from .. import bash
  7. from .. import run_support
  8. LOG = logging.getLogger()
  9. def script_run_report_pre_assembly(i_raw_reads_db_fn, i_preads_fofn_fn, genome_length, length_cutoff, o_json_fn):
  10. params = dict()
  11. params.update(locals())
  12. script = """\
  13. python3 -m falcon_kit.mains.report_pre_assembly --genome-length {genome_length} --length-cutoff {length_cutoff} --db {i_raw_reads_db_fn} --preads-fofn {i_preads_fofn_fn} --out {o_json_fn}
  14. """
  15. return script.format(**params)
  16. def run(config_fn, length_cutoff_fn, raw_reads_db_fn, preads_fofn_fn, pre_assembly_report_fn):
  17. config = io.deserialize(config_fn)
  18. genome_length = int(config['genome_size'])
  19. length_cutoff_user = int(config['length_cutoff'])
  20. # Update length_cutoff if auto-calc (when length_cutoff is negative).
  21. # length_cutoff_fn was created long ago, so no filesystem issues.
  22. length_cutoff = run_support.get_length_cutoff(
  23. length_cutoff_user, length_cutoff_fn)
  24. # Hmmm. Actually, I think we now write the user length_cutoff into the length_cutoff file,
  25. # if not -1. TODO(CD): Check on that, and simplify here if so.
  26. script = script_run_report_pre_assembly(
  27. raw_reads_db_fn, preads_fofn_fn, genome_length, length_cutoff, pre_assembly_report_fn)
  28. script_fn = 'run-report-pre-assembly.sh'
  29. job_done_fn = 'job.done'
  30. bash.write_script(script, script_fn, job_done_fn)
  31. io.syscall('bash -vex {}'.format(script_fn))
  32. class HelpF(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
  33. pass
  34. def parse_args(argv):
  35. description = 'Prepare to run the pre-assembly report generator, and run it.'
  36. epilog = 'length_cutoff might be cleaned up someday. For now, yeah, it is confusing.'
  37. parser = argparse.ArgumentParser(
  38. description=description,
  39. epilog=epilog,
  40. formatter_class=HelpF,
  41. )
  42. parser.add_argument(
  43. '--config-fn',
  44. help='Input. JSON configuration. We use "length_cutoff" (if positive) and "genome_size".',
  45. )
  46. parser.add_argument(
  47. '--length-cutoff-fn',
  48. help='Input. File of a single number: the length-cutoff for raw reads.',
  49. )
  50. parser.add_argument(
  51. '--raw-reads-db-fn',
  52. help='Input. Dazzler DB of raw reads.',
  53. )
  54. parser.add_argument(
  55. '--preads-fofn-fn',
  56. help='Input. FOFN of preads las files.',
  57. )
  58. parser.add_argument(
  59. '--pre-assembly-report-fn',
  60. help='Output. In JSON format.',
  61. )
  62. args = parser.parse_args(argv[1:])
  63. return args
  64. def main(argv=sys.argv):
  65. args = parse_args(argv)
  66. logging.basicConfig(level=logging.INFO)
  67. run(**vars(args))
  68. if __name__ == '__main__': # pragma: no cover
  69. main()