class Llama::Memory

Llama::Memory
Reference
Object

Overview

Modern memory management for llama.cpp contexts

This class provides a unified interface to various memory types:

Standard KV cache (llama_kv_cache_unified)
SWA (Sliding Window Attention) cache (llama_kv_cache_unified_iswa)
Recurrent layer memory (llama_memory_recurrent)
Hybrid attention/recurrent models (llama_memory_hybrid)

The Memory API replaces the deprecated KV cache API and provides better support for modern model architectures.

Defined in:

llama/memory.cr
llama/memory/error.cr

Constructors

.new(ctx : Context)
Creates a new Memory instance from a context

Instance Method Summary

#can_shift? : Bool
Check if memory supports shifting
#clear(data : Bool = false) : self
Clear memory contents
#seq_add(seq_id : Int32, p0 : Int32, p1 : Int32, delta : Int32) : self
Add relative position delta to tokens in sequence
#seq_cp(seq_id_src : Int32, seq_id_dst : Int32, p0 : Int32, p1 : Int32) : self
Copy tokens from one sequence to another
#seq_div(seq_id : Int32, p0 : Int32, p1 : Int32, d : Int32) : self
Divide positions of tokens in sequence by factor
#seq_keep(seq_id : Int32) : self
Keep only specified sequence, remove all others
#seq_pos_max(seq_id : Int32) : Int32
Get maximum position in sequence
#seq_pos_min(seq_id : Int32) : Int32
Get minimum position in sequence
#seq_rm(seq_id : Int32, p0 : Int32, p1 : Int32) : Bool
Remove tokens from sequence in specified position range
#to_unsafe : Pointer(Void)
Get raw pointer for internal use

Constructor Detail

def self.new(ctx : Context) #

Creates a new Memory instance from a context

Parameters:

ctx: The context to get memory from

Raises:

Memory::Error if memory handle cannot be obtained

[View source]

Instance Method Detail

def can_shift? : Bool #

Check if memory supports shifting

Returns:

true if shifting is supported, false otherwise

[View source]

def clear(data : Bool = false) : self #

Clear memory contents

Parameters:

data: If true, data buffers will also be cleared together with metadata (default: false)

Returns:

self for method chaining

[View source]

def seq_add(seq_id : Int32, p0 : Int32, p1 : Int32, delta : Int32) : self #

Add relative position delta to tokens in sequence

Parameters:

seq_id: Sequence ID
p0: Start position (< 0 for [0, p1])
p1: End position (< 0 for [p0, inf))
delta: Position delta to add

Returns:

self for method chaining

[View source]

def seq_cp(seq_id_src : Int32, seq_id_dst : Int32, p0 : Int32, p1 : Int32) : self #

Copy tokens from one sequence to another

Parameters:

seq_id_src: Source sequence ID
seq_id_dst: Destination sequence ID
p0: Start position (< 0 for [0, p1])
p1: End position (< 0 for [p0, inf))

Returns:

self for method chaining

[View source]

def seq_div(seq_id : Int32, p0 : Int32, p1 : Int32, d : Int32) : self #

Divide positions of tokens in sequence by factor

Parameters:

seq_id: Sequence ID
p0: Start position (< 0 for [0, p1])
p1: End position (< 0 for [p0, inf))
d: Divisor (must be > 1)

Returns:

self for method chaining

Raises:

ArgumentError if divisor is <= 1

[View source]

def seq_keep(seq_id : Int32) : self #

Keep only specified sequence, remove all others

Parameters:

seq_id: Sequence ID to keep

Returns:

self for method chaining

[View source]

def seq_pos_max(seq_id : Int32) : Int32 #

Get maximum position in sequence

All positions in the range [pos_min, pos_max] are guaranteed to be present.

Parameters:

seq_id: Sequence ID

Returns:

Maximum position, or -1 if sequence is empty

[View source]

def seq_pos_min(seq_id : Int32) : Int32 #

Get minimum position in sequence

This is typically non-zero only for SWA (Sliding Window Attention) caches. All positions in the range [pos_min, pos_max] are guaranteed to be present.

Parameters:

seq_id: Sequence ID

Returns:

Minimum position, or -1 if sequence is empty

[View source]

def seq_rm(seq_id : Int32, p0 : Int32, p1 : Int32) : Bool #

Remove tokens from sequence in specified position range

Parameters:

seq_id: Sequence ID (< 0 to match any sequence)
p0: Start position (< 0 for [0, p1])
p1: End position (< 0 for [p0, inf))

Returns:

true if successful, false if partial sequence cannot be removed

Note: Removing a whole sequence never fails

[View source]

def to_unsafe : Pointer(Void) #

Get raw pointer for internal use

Returns:

Raw memory handle pointer

[View source]