Module: RedAmber::DataFrameVariableOperation

Included in:
DataFrame
Defined in:
lib/red_amber/data_frame_variable_operation.rb

Overview

mix-ins for the class DataFrame

Instance Method Summary collapse

Instance Method Details

#assign(*assigner, &block) ⇒ Object

assign variables to create a new DataFrame



166
167
168
# File 'lib/red_amber/data_frame_variable_operation.rb', line 166

def assign(*assigner, &block)
  assign_update(*assigner, append_to_left: false, &block)
end

#assign_left(*assigner, &block) ⇒ Object



170
171
172
# File 'lib/red_amber/data_frame_variable_operation.rb', line 170

def assign_left(*assigner, &block)
  assign_update(*assigner, append_to_left: true, &block)
end

#drop(keys) ⇒ DataFrame #drop(booleans) ⇒ DataFrame #drop(indices) ⇒ DataFrame

Note:

DataFrame#drop creates a DataFrame even if it is a single column.

Drop some variables (columns) to create a remainer DataFrame

Overloads:

  • #drop(keys) ⇒ DataFrame

    Drop variables by Symbols or Strings.

    Parameters:

    • keys (Symbol, String, <Symbol, String>)

      key name(s) of variables to drop.

    Returns:

  • #drop(booleans) ⇒ DataFrame

    Drop variables by booleans.

    Parameters:

    • booleans (<true, false, nil>)

      boolean array of variables to drop at true.

    Returns:

  • #drop(indices) ⇒ DataFrame

    Pick variables by column indices.

    Parameters:

    • indices (Integer, Float, Range<Integer>, Vector, Arrow::Array)

      numeric array of variables to drop by column index.

    Returns:



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/red_amber/data_frame_variable_operation.rb', line 101

def drop(*args, &block)
  if block
    unless args.empty?
      raise DataFrameArgumentError, 'Must not specify both arguments and block.'
    end

    args = [instance_eval(&block)]
  end
  return self if args.empty? || empty?

  picker =
    if args.symbols?
      keys - args
    elsif args.booleans?
      keys.reject_by_booleans(args)
    elsif args.integers?
      keys.reject_by_indices(args)
    else
      dropper = parse_args(args, n_keys)
      if dropper.booleans?
        keys.reject_by_booleans(dropper)
      elsif dropper.symbols?
        keys - dropper
      else
        dropper.compact!
        unless dropper.integers?
          raise DataFrameArgumentError, "Invalid argument #{args}"
        end

        keys.reject_by_indices(dropper)
      end
    end

  return DataFrame.new if picker.empty?

  DataFrame.create(@table.select_columns(*picker))
end

#pick(keys) ⇒ DataFrame #pick(booleans) ⇒ DataFrame #pick(indices) ⇒ DataFrame

Note:

DataFrame#pick creates a DataFrame with single key. DataFrame#[] creates a Vector if single key is specified.

Pick up variables (columns) to create a new DataFrame

Overloads:

  • #pick(keys) ⇒ DataFrame

    Pick variables by Symbols or Strings.

    Parameters:

    • keys (Symbol, String, <Symbol, String>)

      key name(s) of variables to pick.

    Returns:

  • #pick(booleans) ⇒ DataFrame

    Pick variables by booleans.

    Parameters:

    • booleans (<true, false, nil>)

      boolean array to pick variables at true.

    Returns:

  • #pick(indices) ⇒ DataFrame

    Pick variables by column indices.

    Parameters:

    • indices (Integer, Float, Range<Integer>, Vector, Arrow::Array)

      numeric array to pick variables by column index.

    Returns:

Raises:



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/red_amber/data_frame_variable_operation.rb', line 38

def pick(*args, &block)
  if block
    unless args.empty?
      raise DataFrameArgumentError, 'Must not specify both arguments and block.'
    end

    args = [instance_eval(&block)]
  end

  case args
  in [] | [nil]
    return DataFrame.new
  in [*] if args.symbols?
    return DataFrame.create(@table.select_columns(*args))
  in [*] if args.booleans?
    picker = keys.select_by_booleans(args)
    return DataFrame.create(@table.select_columns(*picker))
  in [(Vector | Arrow::Array | Arrow::ChunkedArray) => a]
    picker = a.to_a
  else
    picker = parse_args(args, n_keys)
  end

  return DataFrame.new if picker.compact.empty?

  if picker.booleans?
    picker = keys.select_by_booleans(picker)
    return DataFrame.create(@table.select_columns(*picker))
  end
  picker.compact!
  raise DataFrameArgumentError, "some keys are duplicated: #{args}" if picker.uniq!

  DataFrame.create(@table.select_columns(*picker))
end

#rename(*renamer, &block) ⇒ Object

rename variables to create a new DataFrame



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/red_amber/data_frame_variable_operation.rb', line 140

def rename(*renamer, &block)
  if block
    unless renamer.empty?
      raise DataFrameArgumentError, 'Must not specify both arguments and a block'
    end

    renamer = [instance_eval(&block)]
  end
  case renamer
  in [] | [nil] | [{}] | [[]]
    return self
  in [Hash => key_pairs]
  # noop
  in [ (Symbol | String) => from, (Symbol | String) => to]
    key_pairs = { from => to }
  in [Array => array_in_array]
    key_pairs = try_convert_to_hash(array_in_array)
  in [Array, *] => array_in_array1
    key_pairs = try_convert_to_hash(array_in_array1)
  else
    raise DataFrameArgumentError, "Invalid argument #{renamer}"
  end
  rename_by_hash(key_pairs)
end