2 years ago

#43064

test-img

gm007

Using dot syntax in dplyr::across

This question is related to this one; it still hasn't been answered.

I am trying to calculate summary statistics across multiple columns. The function dplyr::summarise_each has been deprecated so I get a warning, but it allowed me to pass in the calculation for skewness:

summarise_each(funs(mean = mean,
                    median = median,
                    sd = sd,
                    skewness = moments::skewness(.)))

However, when I use dplyr::across and include skewness = skewness(.) in it, I get this error:

Error: Problem with `summarise()` input `..1`.
ℹ `..1 = across(...)`.
x non-numeric argument to binary operator
Run `rlang::last_error()` to see where the error occurred.
In addition: Warning message:
In mean.default(x) : argument is not numeric or logical: returning NA

This is the working function:

ag_data %>%
    filter(data_year_fiscal < 2021 & data_year_fiscal > 1988) %>%
    mutate(g_k_it_to_K_int = g_k_it/K_int) %>%
    select(K_int, K_phys, Intangibles_intensity, g_k_it_to_K_int,
           # new measures
           total_q, i_phys, i_int, i_tot,
           c_tot,
           operating_activities_net_cash_flow,
           investing_activities_net_cash_flow,
           financing_activities_net_cash_flow,
           # standard measures
           tobins_q_star,
           i_phys_star,
           c_star) %>%
    select(K_int, K_phys, Intangibles_intensity, g_k_it_to_K_int,
           # new measures
           total_q, i_phys, i_int, i_tot,
           c_tot,
           operating_activities_net_cash_flow,
           investing_activities_net_cash_flow,
           financing_activities_net_cash_flow,
           # standard measures
           tobins_q_star,
           i_phys_star,
           c_star) %>%
    summarise_each(funs(mean = mean,
                        median = median,
                        sd = sd,
                        skewness = moments::skewness(.))) %>%
    pivot_longer(
      cols = everything(),
      names_to = c("metric", ".value"),
      names_pattern = "(.*)_([^_]+)$"
    )

This is the problematic function:

ag_data %>%
  filter(data_year_fiscal < 2021 & data_year_fiscal > 1988) %>%
  mutate(g_k_it_to_K_int = g_k_it/K_int) %>%
  summarise(across(.cols = c(
    K_int, K_phys, Intangibles_intensity, g_k_it_to_K_int,
    # new measures
    total_q, i_phys, i_int, i_tot,
    c_tot,
    operating_activities_net_cash_flow,
    investing_activities_net_cash_flow,
    financing_activities_net_cash_flow,
    # standard measures
    tobins_q_star,
    i_phys_star,
    c_star
  ),
   .fns = list(
     mean = mean,
     median = median,
     sd = sd,
     skewness = moments::skewness(.)
     ),
   .names = "{.col}_{.fn}"))  %>%
  pivot_longer(
    cols = everything(),
    names_to = c("metric", ".value"),
    names_pattern = "(.*)_([^_]+)$"
  )

The data:

structure(list(K_int = c(11.85294, 16.859952, 20.3777616, 23.88320928, 
27.189167424, 30.5188339392, 4.5886, 5.63738, 6.653804, 7.9427432, 
9.94269456, 11.550355648, 22.5039845184, 23.47018761472, 23.837550091776, 
24.5622400734208, 639.19194, 927.419652, 1206.4772216, 1465.55267728, 
1750.485841824, 2048.8569734592, 2322.22127876736, 2584.04762301389, 
2881.76689841111, 3204.21311872889, 3729.00919498311, 7035.77875598649, 
8657.05820478919, 9907.84656383135), K_phys = c(25.927, 25.943, 
23.465, 24.516, 32.156, 36.745, 29.592, 30.947, 23.892, 24.28, 
25.529, 27.102, 29.979, 31.359, 31.111, 37.441, 1477.894, 1745.221, 
1959.444, 2166.205, 2727.27, 3109.172, 3496.257, 4063.378, 4622.655, 
5205.636, 6102.889, 13999.479, 15248, 15035), Intangibles_intensity = c(0.313736337326105, 
0.393896944304215, 0.464791925880873, 0.493462798985628, 0.458153015724821, 
0.453718323085569, 0.134245741736541, 0.154092538946949, 0.217830376964378, 
0.246494941498339, 0.280299395992544, 0.298826693855014, 0.428786295689993, 
0.428060101485417, 0.43381581592166, 0.39614446026265, 0.30192063908374, 
0.347004993471902, 0.381082515057108, 0.403538123275236, 0.390929274319465, 
0.397217034646695, 0.399111445898415, 0.388729076421362, 0.384009179843854, 
0.381007206371045, 0.379276628076346, 0.334475519035851, 0.362143364413763, 
0.39722196656569), g_k_it_to_K_int = c(0, 0, 0, 0, 0, 0, 0.0884801464498976, 
0.0872391075286747, 0.0976043177707068, 0.0889556645870157, 0.0911464789078264, 
0.0953211583740837, 0.060957854946904, 0.0669545435680467, 0.0742588291391029, 
0.0961687061288879, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    total_q = c(0.0240471530658863, -0.0085391306655672, 0.0629693454346635, 
    1.66047909037245, 0.425325280821302, 0.137633397590273, -0.070555373515971, 
    0.0176373086000091, -0.128454631608322, -0.0928497918823993, 
    -0.0388020650570239, -0.0472448566040888, 0.0809952985526135, 
    0.00230944512418794, -0.0717967078914873, 0.767288531110069, 
    1.00664636930138, 1.226411413576, 1.38539170528803, 1.27369366600044, 
    1.38779172413936, 1.24355704146002, 1.18524890350901, 1.17589072346115, 
    1.13474100540682, 1.33294840867422, 1.57820945455047, 0.699618417835253, 
    0.521082186593649, 0.548471080275074), i_phys = c(0.0424278211532994, 
    0.0517205691697763, 0.0495526570223474, 0.103757150188276, 
    0.173184647532324, 0.0329934194306133, 0.0531255533911811, 
    0.0696008847123807, 0.0403724212355109, 0.0434102176521528, 
    0.0533474133263738, 0.0540994735042621, 0.0416533474611995, 
    0.0290760903558181, 0.0340876836099288, 0.0872998467109318, 
    0.214076329348485, 0.139884259965375, 0.0953581244860897, 
    0.08480943814019, 0.0911844978181534, 0.0972643474510191, 
    0.0917495815620865, 0.11280114293716, 0.101288835435623, 
    0.0898207762203129, 0.0992137894771306, 0.186840828044513, 
    0.0841919799863629, 0.062204408257918), i_int = c(0.218846950539408, 
    0.195278234957493, 0.160965533405266, 0.172913377792333, 
    0.166998596056402, 0.147737387567876, 0.0628942157541414, 
    0.0565554729876012, 0.0570762713485919, 0.0841915963318562, 
    0.110173735922024, 0.100519584537153, 0.110878106344387, 
    0.0828725736524215, 0.0711628271317389, 0.0828411303373278, 
    0.249586380024823, 0.195695929093932, 0.173879829169043, 
    0.157997140480711, 0.15941694117478, 0.144819932775935, 0.132441229685815, 
    0.124545553885529, 0.122563657903796, 0.11953640295596, 0.120770216642545, 
    0.26365921906339, 0.123079071815186, 0.113247998679109), 
    i_tot = c(0.261274771692708, 0.24699880412727, 0.210518190427613, 
    0.27667052798061, 0.340183243588727, 0.18073080699849, 0.116019769145323, 
    0.126156357699982, 0.0974486925841029, 0.127601813984009, 
    0.163521149248398, 0.154619058041415, 0.152531453805586, 
    0.11194866400824, 0.105250510741668, 0.17014097704826, 0.463662709373308, 
    0.335580189059307, 0.269237953655133, 0.242806578620901, 
    0.250601438992933, 0.242084280226954, 0.224190811247902, 
    0.237346696822689, 0.223852493339419, 0.209357179176273, 
    0.219984006119675, 0.450500047107903, 0.207271051801548, 
    0.175452406937027), c_tot = c(0.235017949030377, 0.0356887808715419, 
    0.207435692753154, 0.296188002901715, 0.180867004445408, 
    0.0717539470328953, 0.132632934623372, 0.118083649789647, 
    0.119003520081521, 0.155706819830311, 0.160044412357791, 
    0.174192974895756, 0.0943795000030938, 0.108922540371076, 
    0.087093393277233, 0.191968668552344, 0.341438953625522, 
    0.280888724810104, 0.255497801206086, 0.23431112086393, 0.23509651961126, 
    0.221183969154638, 0.217531346910507, 0.210308754518412, 
    0.204354743781864, 0.196356913290281, 0.196615835392053, 
    0.320768620408342, 0.17010963409667, 0.143151293365787), 
    operating_activities_net_cash_flow = c(-1.059, -0.176, 3.53, 
    7.191, 1.831, 2.618, 4.257, 4.593, 1.43, 2.134, -0.869, 0.396, 
    3.466, 5.082, 6.092, 7.263, 237.889, 291.86, 371.001, 408.02, 
    500.65, 584.515, 608.132, 793.464, 656.951, 864.672, 825.446, 
    1396.753, 1780, 2092), investing_activities_net_cash_flow = c(1.039, 
    -1.678, 0.235, -4.243, -8.178, -1.863, -1.502, -1.869, -1.447, 
    -1.208, -1.462, -1.883, -16.634, -1.168, -1.509, -4.793, 
    -320.051, -299.68, -263.264, -260.422, -756.775, -418.371, 
    -460.466, -665.71, -670.858, -646.132, -1055.687, -1476.338, 
    -1549, -1159), financing_activities_net_cash_flow = c(0.047, 
    1.802, -3.836, 0.102, 3.044, -0.734, -2.636, -1.993, 0.076, 
    -0.892, 1.708, 0.63, 13.12, -3.676, -2.461, -4.871, -37.578, 
    -30.113, -128.012, -136.627, 261.262, -143.222, -159.905, 
    -108.865, 35.659, -201.322, 202.804, 194.361, -419, -905), 
    tobins_q_star = c(0.0350406911713658, -0.0140885788073855, 
    0.117653952695504, 3.27809899657367, 0.784954596342829, 0.251945842971833, 
    -0.0814958434712084, 0.0208501631822148, -0.164228612087728, 
    -0.123223846787479, -0.0539141760350973, -0.0673797136742676, 
    0.141795089896261, 0.00403791574986449, -0.126808042171579, 
    1.27064915467001, 1.44202282098716, 1.87813291267983, 2.23841099822194, 
    2.13541504612906, 2.27853952853953, 2.06302618510652, 1.97249372686276, 
    1.92368175567225, 1.84213947396031, 2.15341506782264, 2.5425326738697, 
    1.05122867429566, 0.816926810073452, 0.909905553708015), 
    i_phys_star = c(0.0523374571241377, 0.0753654491456782, 0.081756157730409, 
    0.1938632005114, 0.341899167890357, 0.060890658042045, 0.0592034445640474, 
    0.0803933495539335, 0.0477267586518887, 0.0554997488699146, 
    0.0707990115321252, 0.0751694151748991, 0.0594052099476053, 
    0.0509022982754595, 0.0596001147995791, 0.154189836392273, 
    0.274087283221257, 0.200384466003651, 0.146031935210498, 
    0.13702866731583, 0.152875651196447, 0.159693026359693, 0.152209977447372, 
    0.187723900159513, 0.16570203412038, 0.145815121396687, 0.16028262444781, 
    0.301004983049831, 0.12650470778234, 0.0975209863588667), 
    c_star = c(0.100936524453694, -0.14718247386894, 0.156342751416567, 
    0.32725335606222, 0.126284875183554, -0.0584338848115437, 
    0.0987441693577323, 0.0906663963233306, 0.0934500920929331, 
    0.123723422065964, 0.11004942339374, 0.144267303850523, 0.0239096745627629, 
    0.0891290570065713, 0.0651806498931726, 0.236636559416284, 
    0.213466656230432, 0.206139276565166, 0.204874339696806, 
    0.199886294275315, 0.207062120159449, 0.196709530042863, 
    0.207076996705232, 0.204907991603592, 0.19395685067941, 0.182927127375934, 
    0.181062986347874, 0.219432960356972, 0.126147551633886, 
    0.100144281217209)), row.names = c(NA, -30L), class = c("tbl_df", 
"tbl", "data.frame"))

r

dplyr

summarize

0 Answers

Your Answer

Accepted video resources