wtdbg-dot2gfa.pl 1013 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!/usr/bin/perl -w
  2. #
  3. # Author: Jue Ruan <ruanjue@gmail.com>
  4. #
  5. use strict;
  6. =pod
  7. Transform <dbg>.[1/2/3/frg/ctg].dot into GFA format. <dbg>.[1/2/3/frg/ctg].dot is the assembly graph from wtdbg
  8. TAG:
  9. gl: gap length
  10. rc: read count to support the link
  11. sq: sequence can be substr from <rd_name>_<FR>_<off>_<len>, F fwd, R rev, off is based on the fwd strand
  12. =cut
  13. while(<>){
  14. chomp;
  15. next unless(/^([NF]\d+)\s(->\s([NF]\d+)\s)?\[([^\]]+)\]$/);
  16. my $n1 = $1;
  17. my $n2 = $3;
  18. my $label = $4;
  19. if(defined $n2){
  20. if($label=~/label=\"(\S)(\S):(\d+):(\-?\d+)\"/){
  21. if($4 >= 0){
  22. print "L\t$n1\t$1\t$n2\t$2\t0S\tgl:i:$4\trc:i:$3\n";
  23. } else {
  24. print "L\t$n1\t$1\t$n2\t$2\t". (0 - $4) . "M\trc:i:$3\n";
  25. }
  26. } else {
  27. die("Bad format: $_");
  28. }
  29. } else {
  30. if($label=~/\{N\d+\s(\d+)\s\|\s(\S+)\s\|\s([FR])_(\d+)_(\d+)\}/){
  31. print "S\t$n1\t*\tLN:i:$5\tsq:Z:$2_$3_$4_$5\n"
  32. } elsif($label=~/\{F\d+\s(\d+)\s(\d+)\/(\d+)\s\|/){
  33. print "S\t$n1\t*\tLN:i:$2\n";
  34. } else {
  35. die("Bad format: $_");
  36. }
  37. }
  38. }
  39. 1;