wtdbg2.pl 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #!/usr/bin/env perl
  2. use strict;
  3. use warnings;
  4. use Getopt::Std;
  5. my %opts = (t=>4, m=>"2g");
  6. getopts('t:x:o:a:g:X:M:OPp:k:', \%opts);
  7. die (qq/Usage: wtdbg2.pl [options] <reads.fa>
  8. Options:
  9. -o STR output prefix [first input]
  10. -t INT number of threads [$opts{t}]
  11. -x STR preset: rs, ont, sq, ccs []
  12. -g NUM estimated genome size []
  13. -X FLOAT coverage cutoff [wtdbg2 default]
  14. -k INT k-mer length [wtdbg2 default]
  15. -p INT length of HPC k-mers [wtdbg2 default]
  16. -a STR additional wtdbg2 options []
  17. -M STR minimap2 preset, according to -x when set []
  18. -m NUM samtools sort block size [$opts{m}]
  19. -P skip minimap2-based polishing
  20. -O output without execution
  21. /) if (@ARGV == 0);
  22. $opts{asm} = gwhich("wtdbg2") || die;
  23. $opts{cns} = gwhich("wtpoa-cns") || die;
  24. unless (defined $opts{P}) {
  25. $opts{smt} = gwhich("samtools") || die;
  26. $opts{mm2} = gwhich("minimap2") || die;
  27. }
  28. my $prefix = defined($opts{o})? $opts{o} : $ARGV[0];
  29. my $smt_threads = $opts{t} < 4? 4 : $opts{t};
  30. my $asm_opt = "";
  31. $asm_opt .= " -x $opts{x}" if defined($opts{x});
  32. $asm_opt .= " -g $opts{g}" if defined($opts{g}) && $opts{g} =~ /^\d/;
  33. $asm_opt .= " -X $opts{X}" if defined($opts{X});
  34. $asm_opt .= " -k $opts{k}" if defined($opts{k}) && $opts{k} =~ /^\d+$/;
  35. $asm_opt .= " -p $opts{p}" if defined($opts{p}) && $opts{p} =~ /^\d+$/;
  36. $asm_opt .= " $opts{a}" if defined($opts{a});
  37. $asm_opt .= " -t $opts{t} -fo $prefix";
  38. $asm_opt .= " -i $ARGV[$_]" for (0 .. @ARGV-1);
  39. my %map_opts = (
  40. rs=>'map-pb', rsII=>'map-pb', sq=>'map-pb', sequel=>'map-pb',
  41. ont=>'map-ont', nanopore=>'map-ont',
  42. ccs=>'asm20', corrected=>'asm20'
  43. );
  44. if (not defined $opts{M}) {
  45. if (defined $opts{x}) {
  46. $opts{M} = $map_opts{lc $opts{x}};
  47. $opts{M} = 'map-ont' unless(defined $opts{M});
  48. } else {
  49. $opts{M} = 'map-ont';
  50. }
  51. }
  52. my $mm2_input = join(' ', @ARGV);
  53. my @lines = ();
  54. push(@lines, qq/$opts{asm} $asm_opt/);
  55. push(@lines, qq/$opts{cns} -t $opts{t} -i $prefix.ctg.lay.gz -fo $prefix.raw.fa/);
  56. unless (defined $opts{P}) {
  57. push(@lines, qq/$opts{mm2} -ax $opts{M} -t$opts{t} -r2k $prefix.raw.fa $mm2_input | $opts{smt} sort -m $opts{m} -\@$smt_threads -o $prefix.bam/);
  58. push(@lines, qq/$opts{smt} view -F0x900 $prefix.bam | $opts{cns} -t $opts{t} -d $prefix.raw.fa -i - -fo $prefix.cns.fa/);
  59. }
  60. if (defined $opts{O}) {
  61. print "$_\n" for (@lines);
  62. } else {
  63. for (@lines) {
  64. system($_) == 0 || die "The following command returns non-zero code:\n $_\n";
  65. }
  66. }
  67. sub which {
  68. my $file = shift;
  69. my $path = (@_)? shift : $ENV{PATH};
  70. return if (!defined($path));
  71. foreach my $x (split(":", $path)) {
  72. $x =~ s/\/$//;
  73. return "$x/$file" if (-x "$x/$file") && (-f "$x/$file");
  74. }
  75. return;
  76. }
  77. sub gwhich {
  78. my $progname = shift;
  79. my $addtional_path = shift if (@_);
  80. my $dirname = &dirname($0);
  81. my $tmp;
  82. chomp($dirname);
  83. if ($progname =~ /^\// && (-x $progname) && (-f $progname)) {
  84. return $progname;
  85. } elsif (defined($addtional_path) && ($tmp = &which($progname, $addtional_path))) {
  86. return $tmp;
  87. } elsif (defined($dirname) && (-x "$dirname/$progname") && (-f "$dirname/$progname")) {
  88. return "$dirname/$progname";
  89. } elsif ((-x "./$progname") && (-f "./$progname")) {
  90. return "./$progname";
  91. } elsif (($tmp = &which($progname))) {
  92. return $tmp;
  93. } else {
  94. return;
  95. }
  96. }
  97. sub dirname {
  98. my $prog = shift;
  99. return '.' unless ($prog =~ /\//);
  100. $prog =~ s/\/[^\s\/]+$//g;
  101. return $prog;
  102. }