Read xls and xlsx files
read_excel()
calls excel_format()
to determine if path
is xls or xlsx,
based on the file extension and the file itself, in that order. Use
read_xls()
and read_xlsx()
directly if you know better and want to
prevent such guessing.
read_excel(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max)) read_xls(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max)) read_xlsx(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max))
path | Path to the xls/xlsx file. |
---|---|
sheet | Sheet to read. Either a string (the name of a sheet), or an
integer (the position of the sheet). Ignored if the sheet is specified via
|
range | A cell range to read from, as described in cell-specification.
Includes typical Excel ranges like "B3:D87", possibly including the sheet
name like "Budget!B2:G14", and more. Interpreted strictly, even if the
range forces the inclusion of leading or trailing empty rows or columns.
Takes precedence over |
col_names |
|
col_types | Either |
na | Character vector of strings to interpret as missing values. By default, readxl treats blank cells as missing data. |
trim_ws | Should leading and trailing whitespace be trimmed? |
skip | Minimum number of rows to skip before reading anything, be it
column names or data. Leading empty rows are automatically skipped, so this
is a lower bound. Ignored if |
n_max | Maximum number of data rows to read. Trailing empty rows are
automatically skipped, so this is an upper bound on the number of rows in
the returned tibble. Ignored if |
guess_max | Maximum number of data rows to use for guessing column types. |
A tibble
cell-specification for more details on targetting cells with the
range
argument
#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 5.10 3.50 1.40 0.200 setosa #> 2 4.90 3.00 1.40 0.200 setosa #> 3 4.70 3.20 1.30 0.200 setosa #> 4 4.60 3.10 1.50 0.200 setosa #> 5 5.00 3.60 1.40 0.200 setosa #> 6 5.40 3.90 1.70 0.400 setosa #> 7 4.60 3.40 1.40 0.300 setosa #> 8 5.00 3.40 1.50 0.200 setosa #> 9 4.40 2.90 1.40 0.200 setosa #> 10 4.90 3.10 1.50 0.100 setosa #> # ... with 140 more rows# Specify sheet either by position or by name read_excel(datasets, 2)#> # A tibble: 32 x 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21.0 6. 160. 110. 3.90 2.62 16.5 0. 1. 4. 4. #> 2 21.0 6. 160. 110. 3.90 2.88 17.0 0. 1. 4. 4. #> 3 22.8 4. 108. 93. 3.85 2.32 18.6 1. 1. 4. 1. #> 4 21.4 6. 258. 110. 3.08 3.22 19.4 1. 0. 3. 1. #> 5 18.7 8. 360. 175. 3.15 3.44 17.0 0. 0. 3. 2. #> 6 18.1 6. 225. 105. 2.76 3.46 20.2 1. 0. 3. 1. #> 7 14.3 8. 360. 245. 3.21 3.57 15.8 0. 0. 3. 4. #> 8 24.4 4. 147. 62. 3.69 3.19 20.0 1. 0. 4. 2. #> 9 22.8 4. 141. 95. 3.92 3.15 22.9 1. 0. 4. 2. #> 10 19.2 6. 168. 123. 3.92 3.44 18.3 1. 0. 4. 4. #> # ... with 22 more rowsread_excel(datasets, "mtcars")#> # A tibble: 32 x 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21.0 6. 160. 110. 3.90 2.62 16.5 0. 1. 4. 4. #> 2 21.0 6. 160. 110. 3.90 2.88 17.0 0. 1. 4. 4. #> 3 22.8 4. 108. 93. 3.85 2.32 18.6 1. 1. 4. 1. #> 4 21.4 6. 258. 110. 3.08 3.22 19.4 1. 0. 3. 1. #> 5 18.7 8. 360. 175. 3.15 3.44 17.0 0. 0. 3. 2. #> 6 18.1 6. 225. 105. 2.76 3.46 20.2 1. 0. 3. 1. #> 7 14.3 8. 360. 245. 3.21 3.57 15.8 0. 0. 3. 4. #> 8 24.4 4. 147. 62. 3.69 3.19 20.0 1. 0. 4. 2. #> 9 22.8 4. 141. 95. 3.92 3.15 22.9 1. 0. 4. 2. #> 10 19.2 6. 168. 123. 3.92 3.44 18.3 1. 0. 4. 4. #> # ... with 22 more rows# Skip rows and use default column names read_excel(datasets, skip = 148, col_names = FALSE)#> # A tibble: 3 x 5 #> X__1 X__2 X__3 X__4 X__5 #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 6.50 3.00 5.20 2.00 virginica #> 2 6.20 3.40 5.40 2.30 virginica #> 3 5.90 3.00 5.10 1.80 virginica# Recycle a single column type read_excel(datasets, col_types = "text")#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <chr> <chr> <chr> <chr> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # ... with 140 more rows# Specify some col_types and guess others read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess"))#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <chr> <dbl> <dbl> <dbl> <chr> #> 1 5.1 3.50 1.40 0.200 setosa #> 2 4.9 3.00 1.40 0.200 setosa #> 3 4.7 3.20 1.30 0.200 setosa #> 4 4.6 3.10 1.50 0.200 setosa #> 5 5 3.60 1.40 0.200 setosa #> 6 5.4 3.90 1.70 0.400 setosa #> 7 4.6 3.40 1.40 0.300 setosa #> 8 5 3.40 1.50 0.200 setosa #> 9 4.4 2.90 1.40 0.200 setosa #> 10 4.9 3.10 1.50 0.100 setosa #> # ... with 140 more rows# Accomodate a column with disparate types via col_type = "list" df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list")) df#> # A tibble: 4 x 2 #> name value #> <chr> <list> #> 1 Name <chr [1]> #> 2 Species <chr [1]> #> 3 Approx date of death <dttm [1]> #> 4 Weight in grams <dbl [1]>df$value#> [[1]] #> [1] "Clippy" #> #> [[2]] #> [1] "paperclip" #> #> [[3]] #> [1] "2007-01-01 UTC" #> #> [[4]] #> [1] 0.9 #>sapply(df$value, class)#> [[1]] #> [1] "character" #> #> [[2]] #> [1] "character" #> #> [[3]] #> [1] "POSIXct" "POSIXt" #> #> [[4]] #> [1] "numeric" #># Limit the number of data rows read read_excel(datasets, n_max = 3)#> # A tibble: 3 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 5.10 3.50 1.40 0.200 setosa #> 2 4.90 3.00 1.40 0.200 setosa #> 3 4.70 3.20 1.30 0.200 setosa# Read from an Excel range using A1 or R1C1 notation read_excel(datasets, range = "C1:E7")#> # A tibble: 6 x 3 #> Petal.Length Petal.Width Species #> <dbl> <dbl> <chr> #> 1 1.40 0.200 setosa #> 2 1.40 0.200 setosa #> 3 1.30 0.200 setosa #> 4 1.50 0.200 setosa #> 5 1.40 0.200 setosa #> 6 1.70 0.400 setosaread_excel(datasets, range = "R1C2:R2C5")#> # A tibble: 1 x 4 #> Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <chr> #> 1 3.50 1.40 0.200 setosa# Specify the sheet as part of the range read_excel(datasets, range = "mtcars!B1:D5")#> # A tibble: 4 x 3 #> cyl disp hp #> <dbl> <dbl> <dbl> #> 1 6. 160. 110. #> 2 6. 160. 110. #> 3 4. 108. 93. #> 4 6. 258. 110.# Read only specific rows or columns read_excel(datasets, range = cell_rows(102:151), col_names = FALSE)#> # A tibble: 50 x 5 #> X__1 X__2 X__3 X__4 X__5 #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 6.30 3.30 6.00 2.50 virginica #> 2 5.80 2.70 5.10 1.90 virginica #> 3 7.10 3.00 5.90 2.10 virginica #> 4 6.30 2.90 5.60 1.80 virginica #> 5 6.50 3.00 5.80 2.20 virginica #> 6 7.60 3.00 6.60 2.10 virginica #> 7 4.90 2.50 4.50 1.70 virginica #> 8 7.30 2.90 6.30 1.80 virginica #> 9 6.70 2.50 5.80 1.80 virginica #> 10 7.20 3.60 6.10 2.50 virginica #> # ... with 40 more rows#> # A tibble: 150 x 3 #> Sepal.Width Petal.Length Petal.Width #> <dbl> <dbl> <dbl> #> 1 3.50 1.40 0.200 #> 2 3.00 1.40 0.200 #> 3 3.20 1.30 0.200 #> 4 3.10 1.50 0.200 #> 5 3.60 1.40 0.200 #> 6 3.90 1.70 0.400 #> 7 3.40 1.40 0.300 #> 8 3.40 1.50 0.200 #> 9 2.90 1.40 0.200 #> 10 3.10 1.50 0.100 #> # ... with 140 more rows#> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"