sql - Why does this GROUP BY and NA combo produce a character type? -
why y
end character
class? seems should never happen sqldf
sum?
library(sqldf) # 3 similar data.frame objects x <- structure(list(size = c(1l, 2l), diff = c(1, na)) , .names = c("gb","diff"), row.names = 1:2, class = "data.frame") y <- structure(list(size = c(1l, 1l, 2l, 2l), diff = c(na, na, 1, na)) , .names = c("gb","diff"), row.names = 1:4, class = "data.frame") z <- structure(list(size = c(2l, 2l, 1l, 1l), diff = c(na, na, 1, na)) , .names = c("gb","diff"), row.names = 1:4, class = "data.frame") # when summed in sqldf: numeric, character, numeric sapply(sqldf("select sum(diff) x"),class) sapply(sqldf("select sum(diff) , gb y group gb"),class)[1] sapply(sqldf("select sum(diff) , gb z group gb"),class)[1] # despite both being numeric class( x$diff ) class( y$diff )
exclude na
, (i.e.: null
) before aggregating:
out1 <- sqldf("select sum(diff) diff_sum x diff not null") out2 <- sqldf("select sum(diff) diff_sum, gb y diff not null group gb") str(out1) # 'data.frame': 1 obs. of 1 variable: # $ diff_sum: num 1 str(out2) # 'data.frame': 1 obs. of 2 variables: # $ diff_sum: num 1 # $ gb : int 2
Comments
Post a Comment