# table用法 - r選取資料

## 如何按列排序數據框? (10)

``````dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
dd
b x y z
1  Hi A 8 1
2 Med D 3 1
3  Hi A 9 1
4 Low C 9 2
``````

### 您的選擇

• `base` `order`
• `dplyr` `arrange`
• 來自`data.table` `setorder``data.table`
• `plyr` `arrange`
• `taRifx` `sort`
• `doBy`
• 來自`Deducer`

``````dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
library(taRifx)
sort(dd, f= ~ -z + b )
``````

``````library(plyr)
arrange(dd,desc(z),b)
``````

``````#Load each time
dd <- data.frame(b = factor(c("Hi", "Med", "Hi", "Low"),
levels = c("Low", "Med", "Hi"), ordered = TRUE),
x = c("A", "D", "A", "C"), y = c(8, 3, 9, 9),
z = c(1, 1, 1, 2))
library(microbenchmark)

microbenchmark(dd[with(dd, order(-z, b)), ] ,
dd[order(-dd\$z, dd\$b),],
times=1000
)
``````

`dd[with(dd, order(-z, b)), ]` 778

`dd[order(-dd\$z, dd\$b),]` 788

``````library(taRifx)
microbenchmark(sort(dd, f= ~-z+b ),times=1000)
``````

``````library(plyr)
microbenchmark(arrange(dd,desc(z),b),times=1000)
``````

``````library(doBy)
microbenchmark(orderBy(~-z+b, data=dd),times=1000)
``````

``````library(Deducer)
microbenchmark(sortData(dd,c("z","b"),increasing= c(FALSE,TRUE)),times=1000)
``````

``````esort <- function(x, sortvar, ...) {
attach(x)
x <- x[with(x,order(sortvar,...)),]
return(x)
detach(x)
}

microbenchmark(esort(dd, -z, b),times=1000)
``````

``````m <- microbenchmark(
arrange(dd,desc(z),b),
sort(dd, f= ~-z+b ),
dd[with(dd, order(-z, b)), ] ,
dd[order(-dd\$z, dd\$b),],
times=1000
)

uq <- function(x) { fivenum(x)[4]}
lq <- function(x) { fivenum(x)[2]}

y_min <- 0 # min(by(m\$time,m\$expr,lq))
y_max <- max(by(m\$time,m\$expr,uq)) * 1.05

p <- ggplot(m,aes(x=expr,y=time)) + coord_cartesian(ylim = c( y_min , y_max ))
p + stat_summary(fun.y=median,fun.ymin = lq, fun.ymax = uq, aes(fill=expr))
``````

（線從下四分位延伸到上四分位，點是中位數）

Dirk的答案很好，但如果你需要這種排序來堅持下去，你會希望將排序應用到該數據框的名稱上。 使用示例代碼：

``````dd <- dd[with(dd, order(-z, b)), ]
``````

``````library(dplyr)
library(data.table)
``````

# dplyr

``````df1 <- tbl_df(iris)
#using strings or formula
arrange_(df1, c('Petal.Length', 'Petal.Width'))
arrange_(df1, ~Petal.Length, ~Petal.Width)
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
(dbl)       (dbl)        (dbl)       (dbl)  (fctr)
1           4.6         3.6          1.0         0.2  setosa
2           4.3         3.0          1.1         0.1  setosa
3           5.8         4.0          1.2         0.2  setosa
4           5.0         3.2          1.2         0.2  setosa
5           4.7         3.2          1.3         0.2  setosa
6           5.4         3.9          1.3         0.4  setosa
7           5.5         3.5          1.3         0.2  setosa
8           4.4         3.0          1.3         0.2  setosa
9           5.0         3.5          1.3         0.3  setosa
10          4.5         2.3          1.3         0.3  setosa
..          ...         ...          ...         ...     ...

#Or using a variable
sortBy <- c('Petal.Length', 'Petal.Width')
arrange_(df1, .dots = sortBy)
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
(dbl)       (dbl)        (dbl)       (dbl)  (fctr)
1           4.6         3.6          1.0         0.2  setosa
2           4.3         3.0          1.1         0.1  setosa
3           5.8         4.0          1.2         0.2  setosa
4           5.0         3.2          1.2         0.2  setosa
5           4.7         3.2          1.3         0.2  setosa
6           5.5         3.5          1.3         0.2  setosa
7           4.4         3.0          1.3         0.2  setosa
8           4.4         3.2          1.3         0.2  setosa
9           5.0         3.5          1.3         0.3  setosa
10          4.5         2.3          1.3         0.3  setosa
..          ...         ...          ...         ...     ...

#Doing the same operation except sorting Petal.Length in descending order
sortByDesc <- c('desc(Petal.Length)', 'Petal.Width')
arrange_(df1, .dots = sortByDesc)
``````

# data.table

``````dt1 <- data.table(iris) #not really required, as you can work directly on your data.frame
sortBy <- c('Petal.Length', 'Petal.Width')
sortType <- c(-1, 1)
setorderv(dt1, sortBy, sortType)
dt1
Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
1:          7.7         2.6          6.9         2.3 virginica
2:          7.7         2.8          6.7         2.0 virginica
3:          7.7         3.8          6.7         2.2 virginica
4:          7.6         3.0          6.6         2.1 virginica
5:          7.9         3.8          6.4         2.0 virginica
---
146:          5.4         3.9          1.3         0.4    setosa
147:          5.8         4.0          1.2         0.2    setosa
148:          5.0         3.2          1.2         0.2    setosa
149:          4.3         3.0          1.1         0.1    setosa
150:          4.6         3.6          1.0         0.2    setosa
``````

``````newdata <- A[order(-A\$x),]
``````

``````newdata <- A[order(-A\$x, A\$y, -A\$z),]
``````

`````` dd <- dd[order(dd\$b, decreasing = FALSE),]
``````

``````dd <- dd[order(dd\$z, decreasing = TRUE),]
``````

``````## The data.frame way
dd[with(dd, order(-z, b)), ]

## The data.table way: (7 fewer characters, but that's not the important bit)
dd[order(-z, b)]
``````

``````quarterlyreport[with(quarterlyreport,order(-z,b)),]
``````

``````quarterlyreport[with(lastquarterlyreport,order(-z,b)),]
``````

`data.table`我們關注這樣的細節。 所以我們做了一些簡單的事情來避免兩次輸入變量名。 一些非常簡單的東西。 `i`已經自動地在`dd`幀內`dd`進行了評估。 `with()`完全不需要。

``````dd[with(dd, order(-z, b)), ]
``````

``````dd[order(-z, b)]
``````

``````quarterlyreport[with(lastquarterlyreport,order(-z,b)),]
``````

``````quarterlyreport[order(-z,b)]
``````

``````set.seed(1234)

ID        = 1:10
Age       = round(rnorm(10, 50, 1))
diag      = c("Depression", "Bipolar")
Diagnosis = sample(diag, 10, replace=TRUE)

data = data.frame(ID, Age, Diagnosis)

databyAge = data[order(Age),]
databyAge
``````

``````my.data <- read.table(text = '

id age  diagnosis
1  49 Depression
2  50 Depression
3  51 Depression
4  48 Depression
5  50 Depression
6  51    Bipolar
7  49    Bipolar
8  49    Bipolar
9  49    Bipolar
10  49 Depression

``````

``````databyage = my.data[order(age),]
``````

``````databyage = my.data[order(my.data\$age),]
``````

``````set.seed(1234)

v1  <- c(0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1)
v2  <- c(0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1)
v3  <- c(0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1)
v4  <- c(0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1)

df.1 <- data.frame(v1, v2, v3, v4)
df.1

rdf.1 <- df.1[sample(nrow(df.1), nrow(df.1), replace = FALSE),]
rdf.1

order.rdf.1 <- rdf.1[do.call(order, as.list(rdf.1)),]
order.rdf.1

order.rdf.2 <- rdf.1[do.call(order, rev(as.list(rdf.1))),]
order.rdf.2

rdf.3 <- data.frame(rdf.1\$v2, rdf.1\$v4, rdf.1\$v3, rdf.1\$v1)
rdf.3

order.rdf.3 <- rdf.1[do.call(order, as.list(rdf.3)),]
order.rdf.3
``````

``````library(doBy)
dd <- orderBy(~-z+b, data=dd)
``````

``````sort(dd,by = ~ -z + b)
#     b x y z
# 4 Low C 9 2
# 2 Med D 3 1
# 1  Hi A 8 1
# 3  Hi A 9 1
``````

``````library(BBmisc)
sortByCol(dd, c("z", "b"), asc = c(FALSE, TRUE))
b x y z
4 Low C 9 2
2 Med D 3 1
1  Hi A 8 1
3  Hi A 9 1
``````

``````library(microbenchmark)
microbenchmark(sortByCol(dd, c("z", "b"), asc = c(FALSE, TRUE)), times = 100000)
median 202.878

library(plyr)
microbenchmark(arrange(dd,desc(z),b),times=100000)
median 148.758

microbenchmark(dd[with(dd, order(-z, b)), ], times = 100000)
median 115.872
``````