Class: RedAmber::Vector

Inherits:
Object
  • Object
show all
Includes:
Helper, VectorFunctions, VectorSelectable, VectorUpdatable
Defined in:
lib/red_amber/vector.rb

Overview

Values in variable (columnar) data object

@data : holds Arrow::ChunkedArray

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from VectorSelectable

#[], #drop_nil, #filter, #index, #is_in, #take

Methods included from VectorUpdatable

#if_else, #list_flatten, #list_separate, #list_sizes, #merge, #primitive_invert, #replace, #shift, #split, #split_to_columns, #split_to_rows

Methods included from VectorFunctions

#coerce, #is_na, #quantile, #quantiles, #sd, #unbiased_variance

Constructor Details

#initialize(*array) ⇒ Vector

Note:

default is headless Vector and ‘@key == nil’

Create a Vector.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/red_amber/vector.rb', line 26

def initialize(*array)
  @data =
    case array
    in [Vector => v]
      v.data
    in [Range => r]
      Arrow::Array.new(Array(r))
    in [Arrow::Array | Arrow::ChunkedArray]
      array[0]
    in [arrow_array_like] if arrow_array_like.respond_to?(:to_arrow_array)
      arrow_array_like.to_arrow_array
    else
      Arrow::Array.new(array.flatten)
    end
end

Instance Attribute Details

#dataObject (readonly) Also known as: to_arrow_array

Returns the value of attribute data.



42
43
44
# File 'lib/red_amber/vector.rb', line 42

def data
  @data
end

#keyObject

Returns the value of attribute key.



45
46
47
# File 'lib/red_amber/vector.rb', line 45

def key
  @key
end

Class Method Details

.create(arrow_array) ⇒ Object

Quicker constructor of Vector.



17
18
19
20
21
# File 'lib/red_amber/vector.rb', line 17

def self.create(arrow_array)
  instance = allocate
  instance.instance_variable_set(:@data, arrow_array)
  instance
end

Instance Method Details

#boolean?Boolean

Returns:

  • (Boolean)


105
106
107
# File 'lib/red_amber/vector.rb', line 105

def boolean?
  @data.boolean?
end

#chunked?Boolean

undocumented

Returns:

  • (Boolean)


157
158
159
# File 'lib/red_amber/vector.rb', line 157

def chunked?
  @data.is_a? Arrow::ChunkedArray
end

#dictionary?Boolean

Returns:

  • (Boolean)


125
126
127
# File 'lib/red_amber/vector.rb', line 125

def dictionary?
  @data.dictionary?
end

#eachObject



141
142
143
144
145
146
147
# File 'lib/red_amber/vector.rb', line 141

def each
  return enum_for(:each) unless block_given?

  size.times do |i|
    yield data[i]
  end
end

#empty?Boolean

Returns:

  • (Boolean)


97
98
99
# File 'lib/red_amber/vector.rb', line 97

def empty?
  size.zero?
end

#float?Boolean

Returns:

  • (Boolean)


113
114
115
# File 'lib/red_amber/vector.rb', line 113

def float?
  @data.float?
end

#has_nil?Boolean

Returns:

  • (Boolean)


197
198
199
# File 'lib/red_amber/vector.rb', line 197

def has_nil?
  is_nil.any
end

#indicesObject Also known as: indexes, indeces



81
82
83
# File 'lib/red_amber/vector.rb', line 81

def indices
  (0...size).to_a
end

#inspect(limit: 80) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/red_amber/vector.rb', line 51

def inspect(limit: 80)
  if ENV.fetch('RED_AMBER_OUTPUT_MODE', 'Table').casecmp('MINIMUM').zero?
    # Better performance than `.upcase == 'MINIMUM'`
    "#{self.class}(:#{type}, size=#{size})"
  else
    sio = StringIO.new << '['
    each.with_index do |e, i|
      next_str = "#{sio.size > 1 ? ', ' : ''}#{e.inspect}"
      if (sio.size + next_str.size) < limit
        sio << next_str
      else
        sio << ', ... ' if i < size
        break
      end
    end
    sio << ']'

    format "#<#{self.class}(:#{type}, size=#{size}):0x%016x>\n%s\n",
           object_id, sio.string
  end
end

#integer?Boolean

Returns:

  • (Boolean)


117
118
119
# File 'lib/red_amber/vector.rb', line 117

def integer?
  @data.integer?
end

#list?Boolean

Returns:

  • (Boolean)


133
134
135
# File 'lib/red_amber/vector.rb', line 133

def list?
  @data.list?
end

#map(&block) ⇒ Object Also known as: collect



149
150
151
152
153
# File 'lib/red_amber/vector.rb', line 149

def map(&block)
  return enum_for(:map) unless block

  Vector.new(to_a.map(&block))
end

#n_chunksObject

undocumented



162
163
164
# File 'lib/red_amber/vector.rb', line 162

def n_chunks
  chunked? ? @data.n_chunks : 0
end

#n_nansObject



193
194
195
# File 'lib/red_amber/vector.rb', line 193

def n_nans
  numeric? ? is_nan.to_a.count(true) : 0
end

#n_nullsObject Also known as: n_nils



188
189
190
# File 'lib/red_amber/vector.rb', line 188

def n_nulls
  @data.n_nulls
end

#numeric?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/red_amber/vector.rb', line 109

def numeric?
  @data.numeric?
end

#sizeObject Also known as: length, n_rows, nrow



88
89
90
91
# File 'lib/red_amber/vector.rb', line 88

def size
  # only defined :length in Arrow?
  @data.length
end

#string?Boolean

Returns:

  • (Boolean)


121
122
123
# File 'lib/red_amber/vector.rb', line 121

def string?
  @data.string?
end

#tallyObject

def each_chunk() end



168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/red_amber/vector.rb', line 168

def tally
  hash = values.tally
  if (type_class < Arrow::FloatingPointDataType) && is_nan.any
    a = 0
    hash.each do |key, value|
      if key.is_a?(Float) && key.nan?
        hash.delete(key)
        a += value
      end
    end
    hash[Float::NAN] = a
  end
  hash
end

#temporal?Boolean

Returns:

  • (Boolean)


129
130
131
# File 'lib/red_amber/vector.rb', line 129

def temporal?
  @data.temporal?
end

#to_aryObject Also known as: to_a, values, entries



73
74
75
# File 'lib/red_amber/vector.rb', line 73

def to_ary
  @data.values
end

#to_sObject



47
48
49
# File 'lib/red_amber/vector.rb', line 47

def to_s
  @data.to_a.inspect
end

#typeObject



101
102
103
# File 'lib/red_amber/vector.rb', line 101

def type
  list? ? :list : @data.value_type.nick.to_sym
end

#type_classObject



137
138
139
# File 'lib/red_amber/vector.rb', line 137

def type_class
  @data.type_class
end

#value_countsObject



183
184
185
186
# File 'lib/red_amber/vector.rb', line 183

def value_counts
  values, counts = Arrow::Function.find(:value_counts).execute([data]).value.fields
  values.zip(counts).to_h
end