R is an OOP (objective-oriented programming), remember?
Object is created by a “name” + <-...
Naming rules
1stday
).
and -
(WRONG: M&M
)X != x
)list <- c(1:5)
)# Creating a legit object
# Try to build an illegal object
=
is not “equals” in R!=
is (almost) equivalent to <-
in R (<<-
also exists)Short key: Alt + -
a <- 12
25 -> b
When you don’t want to create an object
median(x <- 1:10); x
# median(x = 1:10); x
light <- function(finger){
{{shadow <- finger + 5}}
}
handShadow <- light(finger = 3)
handShadow
NB: Object can contains everything, no matter data, functions, outcomes, or plots.
x <- y <- 42
# basic math
x + (1 - 2) * 3 / 4
# advanced math
x^2;sqrt(x);log(x);exp(x)
# matrix algebra
z <- matrix(1:4, ncol = 2)
z + z - z
z %*% z # inner multiplication
z %o% z # outter multiplication
# logical evaluation
x == y; x != y
x & y; x | y
x > y; x <= y
See more about linear algebra in R here.
Hey, matrix and its friends!
Creating matrix in R
A <- matrix(-8:7, ncol = 4)
A
## [,1] [,2] [,3] [,4]
## [1,] -8 -4 0 4
## [2,] -7 -3 1 5
## [3,] -6 -2 2 6
## [4,] -5 -1 3 7
A[2, 3]
## [1] 1
A one-dimension matrix.
A[2,]
A[,3]
Let’s know some common types of it
c(1, 2, 3);c(1:3) # Integer
c(1.5, -2.34, NA) #Double
# What does c(1.5:3) come out?
Note: 1. NA
: not available
c("R is hard.", "But I can nail it.")
c(TRUE, TRUE, FALSE)
# c(TRUE, TRUE, FALSE) == c(1, 1, 0)
factor(c(1, 2, 2, 3), labels = c("Apple", "Pear", "Orange"))
# x == c(1, 2, 2, 3)?
x <- factor(c(1, 2, 2, 3), labels = c("Apple", "Pear", "Orange"))
levels(x)
x <- factor(c(1, 2, 2, 3), labels = c("Apple", "Pear", "Orange"))
levels(x)
# level change
x2 <- x
levels(x2) <- c("Pear", "Orange", "Apple")
levels(x2)
Learn more about levels
, labels
, and more sophisticated manipulations in the ?factor
and the vignette of the package forcats
as.POSIXct
vs. as.POSIXlt
Sys.time()
## [1] "2020-10-15 22:23:24 CST"
# The full pack
time1 <- Sys.time()
time2 <- as.POSIXlt(Sys.time())
time2$wday # week of the day
## [1] 4
## If I only care about the date?
Sys.Date()
## [1] "2020-10-15"
date1 <- as.Date("2019-01-02")
lubridate
library(lubridate)
ymd("20110604")
## [1] "2011-06-04"
mdy("06-04-2011")
## [1] "2011-06-04"
dmy("04/06/2011")
## [1] "2011-06-04"
time1 <- Sys.time()
Sys.timezone()
with_tz(time1, tzone = "America/Chicago")
# learn time zone names by `OlsonNames()`
time2 <- force_tz(time1, tzone = "America/Chicago")
start <- ymd("2019-09-09")
end <- start + weeks(15)
semester <- interval(start, end)
# how many days does the semester have?
semester/days(1)
# How about month?
semester/months(1)
x <- rep(1:5, times = 300)
class(x)
length(x)
is.na(x)
is.character(x)
unique(x)
table(x)
Note: Most of the property function can be used for all types of data in R
“spreadsheet”
In Excel:
In R(studio):
data(mtcars)
#data(gapminder::gapminder)
.RDS
(single object).RData
(multiple objects).txt
(tab table).csv
df_rds <- readRDS("<FileName>.rds")
df_txt <- read.table("<FileName>.txt")
df_csv <- read.csv("<FileName>.csv")
# SPSS, Stata, SAS
library(haven)
df_spss <- read_spss("<FileName>.sav")
df_stata <- read_dta("<FileName>.dta")
df_sas <- read_sas("<FileName>.sas7bdat")
# Excel sheets
library(readxl)
df_excel <- read_excel("<FileName>.xls")
df_excel2 <- read_excel("<FileName>.xlsx")
# JavaScript Object Notation
library(rjson)
df_json <- fromJSON(file = "<FileName>.json" )
# XML/Html
df_xml <- xmlTreeParse("<url>")
df_html <- readHTMLTable(url, which=3)
# Feather
feather::read_feather("<FileName>.feather")
df_new <- data.frame(x = 1:3, y = c("x", "y", "z"))
df_new
Demographic statistics popularized by Hans Rosling’s TED talks.
library(gapminder)
gapminder
Q: Assuming we have two separate data for Asia and Europe (how?), how could we combine them into one?
gapminder_asia <- gapminder[gapminder$continent == "Asia", ]
gapminder_europe <- gapminder[gapminder$continent == "Europe", ]
library(dplyr)
gapminder_eurasia <- bind_rows(gapminder_asia, gapminder_europe)
Q: I have several rows and just want to merge them together
gapminder_country <- gapminder$country
gapminder_year <- gapminder$year
bind_cols(gapminder_country, gapminder_year)
Q: How can I use two datasets jointly?
gapminder_country <- select(gapminder, country, year)
gapminder_year <- select(gapminder, year, pop)
gapminder_countryYear<- left_join(gapminder_country, gapminder_year)
library(tidyr)
relig_income # wide
## # A tibble: 18 x 11
## religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Don’t k… 15 14 15 11 10 35 21
## 6 Evangel… 575 869 1064 982 881 1486 949
## 7 Hindu 1 9 7 9 11 34 47
## 8 Histori… 228 244 236 238 197 223 131
## 9 Jehovah… 20 27 24 24 21 30 15
## 10 Jewish 19 19 25 25 30 95 69
## 11 Mainlin… 289 495 619 655 651 1107 939
## 12 Mormon 29 40 48 51 56 112 85
## 13 Muslim 6 7 9 10 9 23 16
## 14 Orthodox 13 17 23 32 32 47 38
## 15 Other C… 9 7 11 13 13 14 18
## 16 Other F… 20 33 40 46 49 63 46
## 17 Other W… 5 2 3 4 2 7 3
## 18 Unaffil… 217 299 374 365 341 528 407
## # … with 3 more variables: `$100-150k` <dbl>, `>150k` <dbl>, `Don't
## # know/refused` <dbl>
fish_encounters # long
## # A tibble: 114 x 3
## fish station seen
## <fct> <fct> <int>
## 1 4842 Release 1
## 2 4842 I80_1 1
## 3 4842 Lisbon 1
## 4 4842 Rstr 1
## 5 4842 Base_TD 1
## 6 4842 BCE 1
## 7 4842 BCW 1
## 8 4842 BCE2 1
## 9 4842 BCW2 1
## 10 4842 MAE 1
## # … with 104 more rows
relig_income %>%
pivot_longer(-religion, names_to = "income", values_to = "count")
fish_encounters %>%
pivot_wider(names_from = station, values_from = seen)
saveRDS(gapminder_country, file = "gapminder_country.rds")
save(gapminder_country, gapminder_year, file = "gapminder.rdata")
feather::write_feather(gapminder, path = "gapminder.feather")
write.csv(gapminder_country, file = "gapminder_country.csv")
Hint: Don’t open csv file with Excel if you have data in Chinese.
Of course you could save the data in the format of STATA, SPSS, Excel, but…
STATA (.dta, <14): 3.16 G = R (.rds): 0.05 G
Method | Average Time | Minimum | Maximum |
---|---|---|---|
base::readRDS | 19.65 | 18.64 | 21.01 |
fst::read_fst | 1.39 | 0.56 | 3.41 |
haven::read_sav | 104.78 | 101.00 | 111.85 |
qs::qread | 3.33 | 3.00 | 4.24 |
Method | Average Time | Minimum | Maximum | File Size |
---|---|---|---|---|
base::saveRDS | 98.36 | 93.09 | 103.24 | 30.9 MB |
fst::write_fst | 2.70 | 1.86 | 4.05 | 122.1 MB |
qs::qsave | 5.03 | 4.35 | 6.62 | 44.6 MB |