test_assembly.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """
  2. This is meant to replace pysiv2.custom.test_assembly.
  3. In your testkit_cfg.json, instead of
  4. "pysiv2.custom": [
  5. you will now use
  6. "falcon_kit.testkit": [
  7. The values *might* need to be updated slightly, since we rely on the DB
  8. rather than on datasets now.
  9. """
  10. import os, re, unittest
  11. from falcon_kit.util.io import system
  12. from falcon_kit.io import capture
  13. from falcon_kit import functional
  14. # Someday, we might simplify this. For now, we
  15. # use the current 'test_values.json'.
  16. from pysiv2.custom.base import TestStatisticsBase, TestReportStatistics # pylint: disable=no-name-in-module, import-error
  17. class TestPreAssembly(TestReportStatistics):
  18. REPORT_ID = "preassembly"
  19. TEST_ID = "preassembly"
  20. METRIC_IDS = [
  21. "raw_reads",
  22. "raw_mean",
  23. "raw_n50",
  24. "raw_bases",
  25. "preassembled_reads",
  26. "preassembled_mean",
  27. "preassembled_n50",
  28. "preassembled_bases",
  29. "preassembled_yield"
  30. ]
  31. class TestPolishedAssembly(TestReportStatistics):
  32. """
  33. Test metrics in the output of pbreports.report.polished_assembly
  34. """
  35. REPORT_ID = "polished_assembly"
  36. TEST_ID = "polished_assembly"
  37. METRIC_IDS = [
  38. "polished_contigs",
  39. "max_contig_length",
  40. "n_50_contig_length",
  41. "sum_contig_lengths"
  42. ]
  43. class TestFalconAssembly(TestStatisticsBase):
  44. JSON_SCOPE = 'falcon_kit'
  45. TEST_ID = 'filter_subreads'
  46. METRIC_IDS = ['number_of_filtered_subreads']
  47. DEFAULT_VALUES = {}
  48. @classmethod
  49. def getMetrics(cls):
  50. db_fn = os.path.join(cls.job_dir, 'tasks', 'falcon_ns2.tasks.task_falcon0_dazzler_build_raw-0', 'raw_reads.db')
  51. #system('which DBdump', check=True)
  52. dump = capture('DBdump {}'.format(db_fn))
  53. cls.metric_dict['number_of_filtered_subreads'] = functional.dazzler_num_reads(dump)