Class: Minimap2::Aligner

Inherits:
Object
  • Object
show all
Defined in:
lib/minimap2/aligner.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fn_idx_in = nil, seq: nil, preset: nil, k: nil, w: nil, min_cnt: nil, min_chain_score: nil, min_dp_score: nil, bw: nil, bw_long: nil, best_n: nil, n_threads: 3, fn_idx_out: nil, max_frag_len: nil, extra_flags: nil, scoring: nil, sc_ambi: nil, max_chain_skip: nil, batch_size: nil) ⇒ Aligner

Create a new aligner.

Parameters:

  • fn_idx_in (String) (defaults to: nil)

    index or sequence file name.

  • seq (String) (defaults to: nil)

    a single sequence to index.

  • preset (String) (defaults to: nil)

    minimap2 preset.

    • map-pb : PacBio CLR genomic reads

    • map-ont : Oxford Nanopore genomic reads

    • map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)

    • asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)

    • sr : short genomic paired-end reads

    • splice : spliced long reads (strand unknown)

    • splice:hq : Final PacBio Iso-seq or traditional cDNA

    • asm5 : intra-species asm-to-asm alignment

    • ava-pb : PacBio read overlap

    • ava-ont : Nanopore read overlap

  • k (Integer) (defaults to: nil)

    k-mer length, no larger than 28.

  • w (Integer) (defaults to: nil)

    minimizer window size, no larger than 255.

  • min_cnt (Integer) (defaults to: nil)

    minimum number of minimizers on a chain.

  • min_chain_score (Integer) (defaults to: nil)

    minimum chain score.

  • min_dp_score (defaults to: nil)
  • bw (Integer) (defaults to: nil)

    chaining and alignment band width. (initial chaining and extension)

  • bw_long (Integer) (defaults to: nil)

    chaining and alignment band width (RMQ-based rechaining and closing gaps)

  • best_n (Integer) (defaults to: nil)

    max number of alignments to return.

  • n_threads (Integer) (defaults to: 3)

    number of indexing threads.

  • fn_idx_out (String) (defaults to: nil)

    name of file to which the index is written. This parameter has no effect if seq is set.

  • max_frag_len (Integer) (defaults to: nil)
  • extra_flags (Integer) (defaults to: nil)

    additional flags defined in minimap.h.

  • scoring (Array) (defaults to: nil)

    scoring system. It is a tuple/list consisting of 4, 6 or 7 positive integers. The first 4 elements specify match scoring, mismatch penalty, gap open and gap extension penalty. The 5th and 6th elements, if present, set long-gap open and long-gap extension penalty. The 7th sets a mismatch penalty involving ambiguous bases.

Raises:

  • (ArgumentError)


41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/minimap2/aligner.rb', line 41

def initialize(
  fn_idx_in = nil,
  seq: nil,
  preset: nil,
  k: nil,
  w: nil,
  min_cnt: nil,
  min_chain_score: nil,
  min_dp_score: nil,
  bw: nil,
  bw_long: nil,
  best_n: nil,
  n_threads: 3,
  fn_idx_out: nil,
  max_frag_len: nil,
  extra_flags: nil,
  scoring: nil,
  sc_ambi: nil,
  max_chain_skip: nil,
  batch_size: nil
)
  @idx_opt = FFI::IdxOpt.new
  @map_opt = FFI::MapOpt.new

  r = FFI.mm_set_opt(preset, idx_opt, map_opt)
  raise ArgumentError, "Unknown preset name: #{preset}" if r == -1

  # always perform alignment
  map_opt[:flag] |= 4

  # Keep a large batch_size by default (mappy-compatible behavior) to avoid
  # splitting indexes unless explicitly requested.
  idx_opt[:batch_size] = 0x7fffffffffffffff
  idx_opt[:batch_size] = batch_size if batch_size

  # override preset options
  idx_opt[:k] = k if k
  idx_opt[:w] = w if w
  map_opt[:min_cnt] = min_cnt if min_cnt
  map_opt[:min_chain_score] = min_chain_score if min_chain_score
  map_opt[:min_dp_max] = min_dp_score if min_dp_score
  map_opt[:bw] = bw if bw
  map_opt[:bw_long] = bw_long if bw_long
  map_opt[:best_n] = best_n if best_n
  map_opt[:max_frag_len] = max_frag_len if max_frag_len
  map_opt[:flag] |= extra_flags if extra_flags
  if scoring && scoring.size >= 4
    map_opt[:a] = scoring[0]
    map_opt[:b] = scoring[1]
    map_opt[:q] = scoring[2]
    map_opt[:e] = scoring[3]
    map_opt[:q2] = map_opt[:q]
    map_opt[:e2] = map_opt[:e]
    if scoring.size >= 6
      map_opt[:q2] = scoring[4]
      map_opt[:e2] = scoring[5]
      map_opt[:sc_ambi] = scoring[6] if scoring.size >= 7
    end
  end
  map_opt[:sc_ambi] = sc_ambi if sc_ambi
  map_opt[:max_chain_skip] = max_chain_skip if max_chain_skip

  if fn_idx_in
    warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
    reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)

    # The Ruby version raises an error here
    raise "Cannot open : #{fn_idx_in}" if reader.null?

    @indexes = []
    begin
      loop do
        idx = FFI.mm_idx_reader_read(reader, n_threads)
        break if idx.nil? || idx.null?

        # Initialize sequence name index for each part
        FFI.mm_idx_index_name(idx)
        @indexes << idx
      end
    ensure
      FFI.mm_idx_reader_close(reader)
    end

    raise "Failed to read index parts from: #{fn_idx_in}" if @indexes.empty?

    # Keep backward-compatible accessor for a single index
    @index = @indexes[0]
    FFI.mm_mapopt_update(map_opt, index)
  elsif seq
    @index = FFI.mappy_idx_seq(
      idx_opt[:w], idx_opt[:k], idx_opt[:flag] & 1,
      idx_opt[:bucket_bits], seq, seq.size
    )
    @indexes = [@index]
    FFI.mm_mapopt_update(map_opt, index)
    map_opt[:mid_occ] = 1000 # don't filter high-occ seeds
  else
    @indexes = []
    @index = FFI::Idx.new(::FFI::Pointer::NULL)
  end
end

Instance Attribute Details

#idx_optObject (readonly)

Returns the value of attribute idx_opt.



5
6
7
# File 'lib/minimap2/aligner.rb', line 5

def idx_opt
  @idx_opt
end

#indexObject (readonly)

Returns the value of attribute index.



5
6
7
# File 'lib/minimap2/aligner.rb', line 5

def index
  @index
end

#map_optObject (readonly)

Returns the value of attribute map_opt.



5
6
7
# File 'lib/minimap2/aligner.rb', line 5

def map_opt
  @map_opt
end

Instance Method Details

#align(seq, seq2 = nil, name: nil, buf: nil, cs: false, md: false, max_frag_len: nil, extra_flags: nil) ⇒ Array

Note:

Name change: map -> align In the Ruby language, the name map means iterator. The original name is map, but here I use the method name align.

Note:

The use of Enumerator is being considered. The method names may change again.

Returns alignments.

Parameters:

  • seq (String)
  • seq2 (String) (defaults to: nil)
  • buf (FFI::TBuf) (defaults to: nil)
  • cs (true, false) (defaults to: false)
  • md (true, false) (defaults to: false)
  • max_frag_len (Integer) (defaults to: nil)
  • extra_flags (Integer) (defaults to: nil)

Returns:

  • (Array)

    alignments



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/minimap2/aligner.rb', line 172

def align(
  seq, seq2 = nil,
  name: nil,
  buf: nil,
  cs: false,
  md: false,
  max_frag_len: nil,
  extra_flags: nil
)
  return if index.null?
  return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?

  orig_map_opt_bytes = map_opt.to_ptr.read_bytes(FFI::MapOpt.size)
  orig_best_n = map_opt[:best_n]

  owned_buf = false
  if buf.nil?
    buf = FFI.mm_tbuf_init
    owned_buf = true
  end

  km = FFI.mm_tbuf_get_km(buf)
  alignments = []

  idx_parts = @indexes
  idx_parts = [index] if idx_parts.nil? || idx_parts.empty?

  begin
    idx_parts.each do |idx_part|
      next if idx_part.nil? || idx_part.null?

      # Update options for this specific index part
      FFI.mm_mapopt_update(map_opt, idx_part)

      # Per-call options (do not leak across calls)
      map_opt[:flag] |= 4
      map_opt[:best_n] = orig_best_n
      map_opt[:max_frag_len] = max_frag_len if max_frag_len
      map_opt[:flag] |= extra_flags if extra_flags

      n_regs_ptr = ::FFI::MemoryPointer.new :int
      regs_ptr = FFI.mm_map_aux(idx_part, name, seq, seq2, n_regs_ptr, buf, map_opt)
      n_regs = n_regs_ptr.read_int

      next if regs_ptr.nil? || regs_ptr.null? || n_regs <= 0

      regs = Array.new(n_regs) do |i|
        FFI::Reg1.new(regs_ptr + i * FFI::Reg1.size)
      end

      hit = FFI::Hit.new

      cs_buf_ptr = nil
      m_cs_ptr = nil
      if cs || md
        cs_buf_ptr = ::FFI::MemoryPointer.new(:pointer)
        cs_buf_ptr.write_pointer(::FFI::Pointer::NULL)
        m_cs_ptr = ::FFI::MemoryPointer.new(:int)
        m_cs_ptr.write_int(0)
      end

      i = 0
      begin
        while i < n_regs
          FFI.mm_reg2hitpy(idx_part, regs[i], hit)

          c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
          cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array

          _cs = ""
          _md = ""
          if cs or md
            cur_seq = hit[:seg_id] > 0 && seq2 ? seq2 : seq

            if cs
              l_cs_str = FFI.mm_gen_cs(km, cs_buf_ptr, m_cs_ptr, idx_part, regs[i], cur_seq, 1)
              cs_ptr = cs_buf_ptr.read_pointer
              _cs = cs_ptr.null? || l_cs_str <= 0 ? "" : cs_ptr.read_string(l_cs_str)
            end

            if md
              l_cs_str = FFI.mm_gen_md(km, cs_buf_ptr, m_cs_ptr, idx_part, regs[i], cur_seq)
              cs_ptr = cs_buf_ptr.read_pointer
              _md = cs_ptr.null? || l_cs_str <= 0 ? "" : cs_ptr.read_string(l_cs_str)
            end
          end

          alignments << Alignment.new(hit, cigar, _cs, _md)

          FFI.mm_free_reg1(regs[i])
          i += 1
        end
      ensure
        while i < n_regs
          FFI.mm_free_reg1(regs[i])
          i += 1
        end

        if cs_buf_ptr
          cs_ptr = cs_buf_ptr.read_pointer
          FFI.mappy_free(cs_ptr) unless cs_ptr.nil? || cs_ptr.null?
        end

        # Free the mm_map/mm_map_aux return value array itself
        FFI.mappy_free(regs_ptr) unless regs_ptr.nil? || regs_ptr.null?
      end
    end
  ensure
    FFI.mm_tbuf_destroy(buf) if owned_buf

    # Restore map_opt to the state before this call
    map_opt.to_ptr.put_bytes(0, orig_map_opt_bytes)
  end

  if orig_best_n && orig_best_n > 0 && alignments.length > orig_best_n
    alignments.sort_by! do |aln|
      [
        aln.primary? ? 1 : 0,
        aln.mapq,
        aln.mlen,
        aln.blen,
        -aln.nm
      ]
    end
    alignments.reverse!
    alignments = alignments.take(orig_best_n)
  end

  alignments
end

#free_indexObject

Explicitly releases the memory of the index object.



145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/minimap2/aligner.rb', line 145

def free_index
  indexes = @indexes
  if indexes && !indexes.empty?
    indexes.each do |idx|
      FFI.mm_idx_destroy(idx) unless idx.nil? || idx.null?
    end
  elsif defined?(@index) && !@index.nil? && !@index.null?
    FFI.mm_idx_destroy(@index)
  end
ensure
  @indexes = []
  @index = FFI::Idx.new(::FFI::Pointer::NULL)
end

#kObject

k-mer length, no larger than 28



338
339
340
# File 'lib/minimap2/aligner.rb', line 338

def k
  index[:k]
end

#n_seqObject



348
349
350
351
352
353
354
355
# File 'lib/minimap2/aligner.rb', line 348

def n_seq
  return 0 if index.null?

  indexes = @indexes
  return index[:n_seq] if indexes.nil? || indexes.empty?

  indexes.sum { |idx| idx.nil? || idx.null? ? 0 : idx[:n_seq] }
end

#seq(name, start = 0, stop = 0x7fffffff) ⇒ Object

Retrieve a subsequence from the index.

Parameters:

  • name
  • start (defaults to: 0)
  • stop (defaults to: 0x7fffffff)


308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/minimap2/aligner.rb', line 308

def seq(name, start = 0, stop = 0x7fffffff)
  return if index.null?
  return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?

  idx_parts = @indexes
  idx_parts = [index] if idx_parts.nil? || idx_parts.empty?

  idx_parts.each do |idx_part|
    next if idx_part.nil? || idx_part.null?

    lp = ::FFI::MemoryPointer.new(:int)
    s = FFI.mappy_fetch_seq(idx_part, name, start, stop, lp)
    l = lp.read_int
    if l == 0
      FFI.mappy_free(s) unless s.nil? || s.null?
      next
    end

    begin
      return s.read_string(l)
    ensure
      FFI.mappy_free(s) unless s.nil? || s.null?
    end
  end

  nil
end

#seq_namesObject



357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/minimap2/aligner.rb', line 357

def seq_names
  return [] if index.null?

  indexes = @indexes
  indexes = [index] if indexes.nil? || indexes.empty?

  names = []
  seen = {}
  indexes.each do |idx|
    next if idx.nil? || idx.null?

    ptr = idx[:seq].to_ptr
    idx[:n_seq].times do |i|
      name = FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name]
      next if seen[name]

      seen[name] = true
      names << name
    end
  end
  names
end

#wObject

minimizer window size, no larger than 255



344
345
346
# File 'lib/minimap2/aligner.rb', line 344

def w
  index[:w]
end