Module: RedAmber::DataFrameReshaping

Included in:
DataFrame
Defined in:
lib/red_amber/data_frame_reshaping.rb

Overview

mix-ins for the class DataFrame

Instance Method Summary collapse

Instance Method Details

#to_long(*keep_keys, name: :NAME, value: :VALUE) ⇒ DataFrame

Reshape wide DataFrame to a longer DataFrame.

Parameters:

  • keep_keys (Array)

    keys to keep.

  • name (Symbol, String) (defaults to: :NAME)

    key of the column which is come **from values**.

  • value (Symbol, String) (defaults to: :VALUE)

    key of the column which is come **from values**.

Returns:



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/red_amber/data_frame_reshaping.rb', line 40

def to_long(*keep_keys, name: :NAME, value: :VALUE)
  warn('[Info] No key to keep is specified.') if keep_keys.empty?

  not_included = keep_keys - keys
  unless not_included.empty?
    raise DataFrameArgumentError, "Not have keys #{not_included}"
  end

  name = name.to_sym
  if keep_keys.include?(name)
    raise DataFrameArgumentError,
          "Can't specify the key: #{name} for the column from keys."
  end

  value = value.to_sym
  if keep_keys.include?(value)
    raise DataFrameArgumentError,
          "Can't specify the key: #{value} for the column from values."
  end

  hash = Hash.new { |h, k| h[k] = [] }
  l = keys.size - keep_keys.size
  each_row do |row|
    row.each do |k, v|
      if keep_keys.include?(k)
        hash[k].concat([v] * l)
      else
        hash[name] << k
        hash[value] << v
      end
    end
  end
  hash[name] = hash[name].map { |x| x&.to_s }
  DataFrame.new(hash)
end

#to_wide(name: :NAME, value: :VALUE) ⇒ DataFrame

Reshape long DataFrame to a wide DataFrame.

Parameters:

  • name (Symbol, String) (defaults to: :NAME)

    key of the column which will be expanded **to key names**.

  • value (Symbol, String) (defaults to: :VALUE)

    key of the column which will be expanded **to values**.

Returns:



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/red_amber/data_frame_reshaping.rb', line 83

def to_wide(name: :NAME, value: :VALUE)
  name = name.to_sym
  unless keys.include?(name)
    raise DataFrameArgumentError,
          "You are going to keep the key: #{name}. " \
          'You may need to specify the column name ' \
          'that gives the new keys by `:name` option.'
  end

  value = value.to_sym
  unless keys.include?(value)
    raise DataFrameArgumentError,
          "You are going to keep the key: #{value}. " \
          'You may need to specify the column name ' \
          'that gives the new values by `:value` option.'
  end

  hash = Hash.new { |h, k| h[k] = {} }
  keep_keys = keys - [name, value]
  each_row do |row|
    keeps, converts = row.partition { |k, _| keep_keys.include?(k) }
    h = converts.to_h
    hash[keeps.to_h][h[name].to_s.to_sym] = h[value]
  end
  ks = hash.first[0].keys + hash.first[1].keys
  vs = hash.map { |k, v| k.values + v.values }.transpose
  DataFrame.new(ks.zip(vs))
end

#transpose(key: keys.first, name: :NAME) ⇒ DataFrame

Transpose a wide DataFrame.

Parameters:

  • key (Symbol) (defaults to: keys.first)

    key of the index column to transepose into keys. If it is not specified, keys is used.

  • name (Symbol) (defaults to: :NAME)

    key name of transposed index column. If it is not specified, :NAME is used. If it already exists, :NAME1 or :NAME1.succ is used.

Returns:



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/red_amber/data_frame_reshaping.rb', line 15

def transpose(key: keys.first, name: :NAME)
  unless keys.include?(key)
    raise DataFrameArgumentError, "Self does not include: #{key}"
  end

  # Find unused name
  new_keys = self[key].to_a.map { |e| e.to_s.to_sym }
  name = (:NAME1..).find { |k| !new_keys.include?(k) } if new_keys.include?(name)

  names = (keys - [key]).map { |x| x&.to_s }
  hash = { name => names }
  i = keys.index(key)
  each_row do |h|
    k = h.values[i]
    hash[k] = h.values - [k]
  end
  DataFrame.new(hash)
end