ksw.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #ifndef __AC_KSW_H
  2. #define __AC_KSW_H
  3. #include <stdint.h>
  4. #define KSW_XBYTE 0x10000
  5. #define KSW_XSTOP 0x20000
  6. #define KSW_XSUBO 0x40000
  7. #define KSW_XSTART 0x80000
  8. struct _kswq_t;
  9. typedef struct _kswq_t kswq_t;
  10. typedef struct {
  11. int score; // best score
  12. int te, qe; // target end and query end
  13. int score2, te2; // second best score and ending position on the target
  14. int tb, qb; // target start and query start
  15. } kswr_t;
  16. #ifdef __cplusplus
  17. extern "C" {
  18. #endif
  19. /**
  20. * Aligning two sequences
  21. *
  22. * @param qlen length of the query sequence (typically <tlen)
  23. * @param query query sequence with 0 <= query[i] < m
  24. * @param tlen length of the target sequence
  25. * @param target target sequence
  26. * @param m number of residue types
  27. * @param mat m*m scoring matrix in one-dimension array
  28. * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)"
  29. * @param gape gap extension penalty
  30. * @param xtra extra information (see below)
  31. * @param qry query profile (see below)
  32. *
  33. * @return alignment information in a struct; unset values to -1
  34. *
  35. * When xtra==0, ksw_align() uses a signed two-byte integer to store a
  36. * score and only finds the best score and the end positions. The 2nd best
  37. * score or the start positions are not attempted. The default behavior can
  38. * be tuned by setting KSW_X* flags:
  39. *
  40. * KSW_XBYTE: use an unsigned byte to store a score. If overflow occurs,
  41. * kswr_t::score will be set to 255
  42. *
  43. * KSW_XSUBO: track the 2nd best score and the ending position on the
  44. * target if the 2nd best is higher than (xtra&0xffff)
  45. *
  46. * KSW_XSTOP: stop if the maximum score is above (xtra&0xffff)
  47. *
  48. * KSW_XSTART: find the start positions
  49. *
  50. * When *qry==NULL, ksw_align() will compute and allocate the query profile
  51. * and when the function returns, *qry will point to the profile, which can
  52. * be deallocated simply by free(). If one query is aligned against multiple
  53. * target sequences, *qry should be set to NULL during the first call and
  54. * freed after the last call. Note that qry can equal 0. In this case, the
  55. * query profile will be deallocated in ksw_align().
  56. */
  57. kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry);
  58. kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int xtra, kswq_t **qry);
  59. /**
  60. * Banded global alignment
  61. *
  62. * @param qlen query length
  63. * @param query query sequence with 0 <= query[i] < m
  64. * @param tlen target length
  65. * @param target target sequence with 0 <= target[i] < m
  66. * @param m number of residue types
  67. * @param mat m*m scoring mattrix in one-dimension array
  68. * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)"
  69. * @param gape gap extension penalty
  70. * @param w band width
  71. * @param n_cigar (out) number of CIGAR elements
  72. * @param cigar (out) BAM-encoded CIGAR; caller need to deallocate with free()
  73. *
  74. * @return score of the alignment
  75. */
  76. int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar, uint32_t **cigar);
  77. int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int *n_cigar, uint32_t **cigar);
  78. /**
  79. * Extend alignment
  80. *
  81. * The routine aligns $query and $target, assuming their upstream sequences,
  82. * which are not provided, have been aligned with score $h0. In return,
  83. * region [0,*qle) on the query and [0,*tle) on the target sequences are
  84. * aligned together. If *gscore>=0, *gscore keeps the best score such that
  85. * the entire query sequence is aligned; *gtle keeps the position on the
  86. * target where *gscore is achieved. Returning *gscore and *gtle helps the
  87. * caller to decide whether an end-to-end hit or a partial hit is preferred.
  88. *
  89. * The first 9 parameters are identical to those in ksw_global()
  90. *
  91. * @param h0 alignment score of upstream sequences
  92. * @param _qle (out) length of the query in the alignment
  93. * @param _tle (out) length of the target in the alignment
  94. * @param _gtle (out) length of the target if query is fully aligned
  95. * @param _gscore (out) score of the best end-to-end alignment; negative if not found
  96. *
  97. * @return best semi-local alignment score
  98. */
  99. int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
  100. int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
  101. #ifdef __cplusplus
  102. }
  103. #endif
  104. #endif