class Llama::KvCacheView

Llama::KvCacheView
Reference
Object

Overview

Wrapper for the llama_kv_cache_view structure Provides methods for visualizing and debugging the KV cache state

Included Modules

Enumerable(Llama::KvCacheViewCell)

Defined in:

llama/kv_cache_view.cr
llama/kv_cache_view/error.cr

Constructors

.new(ctx : Context, n_seq_max : Int32 = 4)
Creates a new KvCacheView instance
.new(ctx : Context, n_seq_max : Int32 = 4, &)
Creates a new KvCacheView instance with a block

Instance Method Summary

#[](index : Int32) : KvCacheViewCell
Returns the cell at the specified index
#capacity : Int32
Alias for n_cells
#each(&)
Implements Enumerable interface Yields each cell in the KV cache
#empty? : Bool
Returns whether the KV cache is empty
#finalize
Frees the resources associated with this view
#free : self
Frees the resources associated with this view
#freed? : Bool
Returns whether the view has been freed
#full? : Bool
Returns whether the KV cache is full
#inspect(io : IO) : Nil
Returns a detailed string representation of the KV cache view
#max_contiguous : Int32
Returns the maximum number of contiguous empty slots
#max_contiguous_idx : Int32
Returns the index to the start of the max_contiguous slot range
#n_cells : Int32
Returns the number of cells in the KV cache
#n_seq_max : Int32
Returns the maximum number of sequences per cell
#sequences(index : Int32) : Array(Int32)
Returns the sequences for the cell at the specified index
#size : Int32
Alias for token_count
#to_s(io : IO) : Nil
Returns a string representation of the KV cache view
#token_count : Int32
Returns the total number of tokens in the KV cache
#update : self
Updates the view with the current state of the KV cache
#used_cells : Int32
Returns the number of used cells in the KV cache

Constructor Detail

def self.new(ctx : Context, n_seq_max : Int32 = 4) #

Creates a new KvCacheView instance

Parameters:

ctx: The context to create the view for
n_seq_max: Maximum number of sequences per cell to track (default: 4)

Raises:

Llama::KvCacheView::Error if the view cannot be created

[View source]

def self.new(ctx : Context, n_seq_max : Int32 = 4, &) #

Creates a new KvCacheView instance with a block

Parameters:

ctx: The context to create the view for
n_seq_max: Maximum number of sequences per cell to track (default: 4)
block: The block to execute with the view

The view will be automatically freed after the block execution

Raises:

Llama::KvCacheView::Error if the view cannot be created

[View source]

Instance Method Detail

def [](index : Int32) : KvCacheViewCell #

Returns the cell at the specified index

Parameters:

index: The index of the cell to get

Returns:

The cell at the specified index

Raises:

IndexError if the index is out of bounds
Llama::KvCacheView::Error if the view has been freed

[View source]

def capacity : Int32 #

Alias for n_cells

[View source]

def each(&) #

Implements Enumerable interface Yields each cell in the KV cache

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def empty? : Bool #

Returns whether the KV cache is empty

Returns:

true if the KV cache is empty, false otherwise

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def finalize #

Frees the resources associated with this view

[View source]

def free : self #

Frees the resources associated with this view

Returns:

self for method chaining

Raises:

Llama::KvCacheView::Error if the free operation fails

[View source]

def freed? : Bool #

Returns whether the view has been freed

Returns:

true if the view has been freed, false otherwise

[View source]

def full? : Bool #

Returns whether the KV cache is full

Returns:

true if the KV cache is full, false otherwise

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def inspect(io : IO) : Nil #

Returns a detailed string representation of the KV cache view

Returns:

A detailed string representation of the KV cache view

[View source]

def max_contiguous : Int32 #

Returns the maximum number of contiguous empty slots

Returns:

The maximum number of contiguous empty slots

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def max_contiguous_idx : Int32 #

Returns the index to the start of the max_contiguous slot range

Returns:

The index to the start of the max_contiguous slot range

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def n_cells : Int32 #

Returns the number of cells in the KV cache

Returns:

The number of cells

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def n_seq_max : Int32 #

Returns the maximum number of sequences per cell

Returns:

The maximum number of sequences per cell

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def sequences(index : Int32) : Array(Int32) #

Returns the sequences for the cell at the specified index

Parameters:

index: The index of the cell to get sequences for

Returns:

An array of sequence IDs for the cell

Raises:

IndexError if the index is out of bounds
Llama::KvCacheView::Error if the view has been freed

[View source]

def size : Int32 #

Alias for token_count

[View source]

def to_s(io : IO) : Nil #

Returns a string representation of the KV cache view

Returns:

A string representation of the KV cache view

[View source]

def token_count : Int32 #

Returns the total number of tokens in the KV cache

Returns:

The number of tokens

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]

def update : self #

Updates the view with the current state of the KV cache

Returns:

self for method chaining

Raises:

Llama::KvCacheView::Error if the update fails

[View source]

def used_cells : Int32 #

Returns the number of used cells in the KV cache

Returns:

The number of used cells

Raises:

Llama::KvCacheView::Error if the view has been freed

[View source]