consensus_gather_fasta_fofn.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. """
  2. """
  3. from future.utils import viewitems
  4. import argparse
  5. import logging
  6. import os
  7. import string
  8. import sys
  9. from ..util import io
  10. LOG = logging.getLogger()
  11. def post_hook(config_fn, db_fn, gathered_fn):
  12. # gathered_fn is needed only for this hacky bypass, for pbsmrtpipe.
  13. if os.path.samefile(gathered_fn, db_fn):
  14. return
  15. if os.path.samefile(gathered_fn, config_fn):
  16. return
  17. config = io.deserialize(config_fn)
  18. hook = config.get('LA4Falcon_post')
  19. if hook:
  20. LOG.warning('Found LA4Falcon_post in General section of cfg. About to run {!r}...'.format(hook))
  21. db = os.path.abspath(db_fn)
  22. parent = os.path.abspath(os.path.dirname(os.getcwd()))
  23. dbdir = os.path.join(config['LA4Falcon_dbdir'], 'fc-db') + parent
  24. cmd = string.Template(hook).substitute(DB=db, DBDIR=dbdir)
  25. io.syscall(cmd)
  26. def run(gathered_fn, db_fn, config_fn, preads_fofn_fn):
  27. gathered = io.deserialize(gathered_fn)
  28. d = os.path.abspath(os.path.realpath(os.path.dirname(gathered_fn)))
  29. def abspath(fn):
  30. if os.path.isabs(fn):
  31. return fn # I expect this never to happen though.
  32. return os.path.join(d, fn)
  33. fasta_fns = list()
  34. for desc in gathered:
  35. fn = abspath(desc['fasta'])
  36. if 0 == io.filesize(fn):
  37. LOG.warning('Skipping empty fasta {!r}'.format(fn))
  38. continue
  39. fasta_fns.append(fn)
  40. with open(preads_fofn_fn, 'w') as f:
  41. for filename in sorted(fasta_fns, key=lambda fn: (os.path.basename(fn), fn)):
  42. print(filename, file=f)
  43. post_hook(config_fn, db_fn, gathered_fn)
  44. class HelpF(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
  45. pass
  46. def parse_args(argv):
  47. description = 'Turn gathered file into FOFN of fasta files.'
  48. epilog = ''
  49. parser = argparse.ArgumentParser(
  50. description=description,
  51. epilog=epilog,
  52. formatter_class=HelpF,
  53. )
  54. parser.add_argument(
  55. '--gathered-fn',
  56. help='Input. JSON list of output dicts.')
  57. parser.add_argument(
  58. '--db-fn',
  59. help='Input. Dazzler DB of raw_reads.')
  60. parser.add_argument(
  61. '--config-fn',
  62. help='Input. JSON of relevant configuration (currently from General section of full-prog config).')
  63. parser.add_argument(
  64. '--preads-fofn-fn',
  65. help='Output. FOFN of preads (fasta files).',
  66. )
  67. args = parser.parse_args(argv[1:])
  68. return args
  69. def main(argv=sys.argv):
  70. args = parse_args(argv)
  71. logging.basicConfig(level=logging.INFO)
  72. run(**vars(args))
  73. if __name__ == '__main__': # pragma: no cover
  74. main()