longest_pacbio_subreads.pl 743 B

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #!/usr/bin/perl -w
  2. #
  3. # Author: Jue Ruan
  4. #
  5. use strict;
  6. my $tag = '';
  7. my @seqs = (['', '']);
  8. while(<>){
  9. chomp;
  10. if(/^>(\S+)/){
  11. my $name = $1;
  12. my $comm = substr($_, length($name) + 1);
  13. my $subr = '';
  14. if($name=~/^(.+?)(\/\d+_\d+)$/){
  15. $name = $1;
  16. $subr = $2;
  17. }
  18. if($name eq $tag){
  19. push(@seqs, [$subr, '', $comm]);
  20. } else {
  21. &print_longest_seq;
  22. $tag = $name;
  23. @seqs = ([$subr, '', $comm]);
  24. }
  25. } else {
  26. $seqs[-1][1] .= $_;
  27. }
  28. }
  29. &print_longest_seq;
  30. 1;
  31. sub print_longest_seq {
  32. my $idx = 0;
  33. my $max = 0;
  34. for(my $i=0;$i<@seqs;$i++){
  35. if(length($seqs[$i][1]) > $max){
  36. $idx = $i; $max = length($seqs[$i][1]);
  37. }
  38. }
  39. return unless($max);
  40. print ">$tag$seqs[$idx][0]$seqs[$idx][2] len=$max\n$seqs[$idx][1]\n";
  41. }