# r读取数据框 - r选取数据

## 如何按列排序数据框? (10)

dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
dd
b x y z
1  Hi A 8 1
2 Med D 3 1
3  Hi A 9 1
4 Low C 9 2

### 您的选择

• base
• dplyr arrange
• 来自data.table setorderdata.table
• plyr arrange
• taRifx sort
• doBy
• 来自Deducer

dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
library(taRifx)
sort(dd, f= ~ -z + b )

library(plyr)
arrange(dd,desc(z),b)

dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
library(microbenchmark)

microbenchmark(dd[with(dd, order(-z, b)), ] ,
dd[order(-dd\$z, dd\$b),],
times=1000
)

dd[with(dd, order(-z, b)), ] 778

dd[order(-dd\$z, dd\$b),] 788

library(taRifx)
microbenchmark(sort(dd, f= ~-z+b ),times=1000)

library(plyr)
microbenchmark(arrange(dd,desc(z),b),times=1000)

library(doBy)
microbenchmark(orderBy(~-z+b, data=dd),times=1000)

library(Deducer)
microbenchmark(sortData(dd,c("z","b"),increasing= c(FALSE,TRUE)),times=1000)

esort <- function(x, sortvar, ...) {
attach(x)
x <- x[with(x,order(sortvar,...)),]
return(x)
detach(x)
}

microbenchmark(esort(dd, -z, b),times=1000)

m <- microbenchmark(
arrange(dd,desc(z),b),
sort(dd, f= ~-z+b ),
dd[with(dd, order(-z, b)), ] ,
dd[order(-dd\$z, dd\$b),],
times=1000
)

uq <- function(x) { fivenum(x)[4]}
lq <- function(x) { fivenum(x)[2]}

y_min <- 0 # min(by(m\$time,m\$expr,lq))
y_max <- max(by(m\$time,m\$expr,uq)) * 1.05

p <- ggplot(m,aes(x=expr,y=time)) + coord_cartesian(ylim = c( y_min , y_max ))
p + stat_summary(fun.y=median,fun.ymin = lq, fun.ymax = uq, aes(fill=expr))

（线从下四分位延伸到上四分位，点是中位数）

Dirk的答案很好，但如果你需要这种排序来坚持下去，你会希望将排序应用到该数据框的名称上。 使用示例代码：

dd <- dd[with(dd, order(-z, b)), ]

library(BBmisc)
sortByCol(dd, c("z", "b"), asc = c(FALSE, TRUE))
b x y z
4 Low C 9 2
2 Med D 3 1
1  Hi A 8 1
3  Hi A 9 1

library(microbenchmark)
microbenchmark(sortByCol(dd, c("z", "b"), asc = c(FALSE, TRUE)), times = 100000)
median 202.878

library(plyr)
microbenchmark(arrange(dd,desc(z),b),times=100000)
median 148.758

microbenchmark(dd[with(dd, order(-z, b)), ], times = 100000)
median 115.872

library(dplyr)
library(data.table)

# dplyr

df1 <- tbl_df(iris)
#using strings or formula
arrange_(df1, c('Petal.Length', 'Petal.Width'))
arrange_(df1, ~Petal.Length, ~Petal.Width)
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
(dbl)       (dbl)        (dbl)       (dbl)  (fctr)
1           4.6         3.6          1.0         0.2  setosa
2           4.3         3.0          1.1         0.1  setosa
3           5.8         4.0          1.2         0.2  setosa
4           5.0         3.2          1.2         0.2  setosa
5           4.7         3.2          1.3         0.2  setosa
6           5.4         3.9          1.3         0.4  setosa
7           5.5         3.5          1.3         0.2  setosa
8           4.4         3.0          1.3         0.2  setosa
9           5.0         3.5          1.3         0.3  setosa
10          4.5         2.3          1.3         0.3  setosa
..          ...         ...          ...         ...     ...

#Or using a variable
sortBy <- c('Petal.Length', 'Petal.Width')
arrange_(df1, .dots = sortBy)
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
(dbl)       (dbl)        (dbl)       (dbl)  (fctr)
1           4.6         3.6          1.0         0.2  setosa
2           4.3         3.0          1.1         0.1  setosa
3           5.8         4.0          1.2         0.2  setosa
4           5.0         3.2          1.2         0.2  setosa
5           4.7         3.2          1.3         0.2  setosa
6           5.5         3.5          1.3         0.2  setosa
7           4.4         3.0          1.3         0.2  setosa
8           4.4         3.2          1.3         0.2  setosa
9           5.0         3.5          1.3         0.3  setosa
10          4.5         2.3          1.3         0.3  setosa
..          ...         ...          ...         ...     ...

#Doing the same operation except sorting Petal.Length in descending order
sortByDesc <- c('desc(Petal.Length)', 'Petal.Width')
arrange_(df1, .dots = sortByDesc)

# data.table

dt1 <- data.table(iris) #not really required, as you can work directly on your data.frame
sortBy <- c('Petal.Length', 'Petal.Width')
sortType <- c(-1, 1)
setorderv(dt1, sortBy, sortType)
dt1
Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
1:          7.7         2.6          6.9         2.3 virginica
2:          7.7         2.8          6.7         2.0 virginica
3:          7.7         3.8          6.7         2.2 virginica
4:          7.6         3.0          6.6         2.1 virginica
5:          7.9         3.8          6.4         2.0 virginica
---
146:          5.4         3.9          1.3         0.4    setosa
147:          5.8         4.0          1.2         0.2    setosa
148:          5.0         3.2          1.2         0.2    setosa
149:          4.3         3.0          1.1         0.1    setosa
150:          4.6         3.6          1.0         0.2    setosa

dd <- dd[order(dd\$b, decreasing = FALSE),]

dd <- dd[order(dd\$z, decreasing = TRUE),]

R> dd[with(dd, order(-z, b)), ]
b x y z
4 Low C 9 2
2 Med D 3 1
1  Hi A 8 1
3  Hi A 9 1

R> dd[ order(-dd[,4], dd[,1]), ]
b x y z
4 Low C 9 2
2 Med D 3 1
1  Hi A 8 1
3  Hi A 9 1
R>

set.seed(1234)

ID        = 1:10
Age       = round(rnorm(10, 50, 1))
diag      = c("Depression", "Bipolar")
Diagnosis = sample(diag, 10, replace=TRUE)

data = data.frame(ID, Age, Diagnosis)

databyAge = data[order(Age),]
databyAge

id age  diagnosis
1  49 Depression
2  50 Depression
3  51 Depression
4  48 Depression
5  50 Depression
6  51    Bipolar
7  49    Bipolar
8  49    Bipolar
9  49    Bipolar
10  49 Depression

databyage = my.data[order(age),]

databyage = my.data[order(my.data\$age),]

set.seed(1234)

v1  <- c(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)
v2  <- c(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)
v3  <- c(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)
v4  <- c(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)

df.1 <- data.frame(v1, v2, v3, v4)
df.1

rdf.1 <- df.1[sample(nrow(df.1), nrow(df.1), replace = FALSE),]
rdf.1

order.rdf.1 <- rdf.1[do.call(order, as.list(rdf.1)),]
order.rdf.1

order.rdf.2 <- rdf.1[do.call(order, rev(as.list(rdf.1))),]
order.rdf.2

rdf.3 <- data.frame(rdf.1\$v2, rdf.1\$v4, rdf.1\$v3, rdf.1\$v1)
rdf.3

order.rdf.3 <- rdf.1[do.call(order, as.list(rdf.3)),]
order.rdf.3

library(Deducer)
dd<- sortData(dd,c("z","b"),increasing= c(FALSE,TRUE))