The explore package offers a simplified way to use popular data sets or to create synthetic data for experimenting/teaching/training.
This data set comes with the palmerpenguins package. It contains measurements for penguin species, island in Palmer Archipelago, size (flipper length, body mass, bill dimensions), and sex.
library(dplyr)
library(explore)
data <- use_data_penguins()
glimpse(data)
#> Rows: 344
#> Columns: 8
#> $ species           <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel…
#> $ island            <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse…
#> $ bill_length_mm    <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
#> $ bill_depth_mm     <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
#> $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
#> $ body_mass_g       <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
#> $ sex               <fct> male, female, female, NA, female, male, female, male…
#> $ year              <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…data <- use_data_penguins(short_names = TRUE)
glimpse(data)
#> Rows: 344
#> Columns: 8
#> $ species     <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Ad…
#> $ island      <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgersen, Tor…
#> $ bill_len    <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, …
#> $ bill_dep    <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, 20.2, …
#> $ flipper_len <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186, 180,…
#> $ body_mass   <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, 4250, …
#> $ sex         <fct> male, female, female, NA, female, male, female, male, NA, …
#> $ year        <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007…This data set comes with the dplyr package. It contains data of 87 star war characters.
data <- use_data_starwars()
glimpse(data)
#> Rows: 87
#> Columns: 14
#> $ name       <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or…
#> $ height     <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2…
#> $ mass       <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.…
#> $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N…
#> $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "…
#> $ eye_color  <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",…
#> $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, …
#> $ sex        <chr> "male", "none", "none", "male", "female", "male", "female",…
#> $ gender     <chr> "masculine", "masculine", "masculine", "masculine", "femini…
#> $ homeworld  <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T…
#> $ species    <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma…
#> $ films      <list> <"A New Hope", "The Empire Strikes Back", "Return of the J…
#> $ vehicles   <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp…
#> $ starships  <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",…This data set comes with the ggplot2 package. It contains the prices and other attributes of almost 54,000 diamonds.
data <- use_data_diamonds()
glimpse(data)
#> Rows: 53,940
#> Columns: 10
#> $ carat   <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
#> $ cut     <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
#> $ color   <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
#> $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
#> $ depth   <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
#> $ table   <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
#> $ price   <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
#> $ x       <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
#> $ y       <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
#> $ z       <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…This data set comes with base R. The data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica.
data <- use_data_iris()
glimpse(data)
#> Rows: 150
#> Columns: 5
#> $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
#> $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
#> $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
#> $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
#> $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…This data set comes with the ggplot2 package. It contains a subset of the fuel economy data that the EPA makes available on https://fueleconomy.gov/. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.
data <- use_data_mpg()
glimpse(data)
#> Rows: 234
#> Columns: 11
#> $ manufacturer <chr> "audi", "audi", "audi", "audi", "audi", "audi", "audi", "…
#> $ model        <chr> "a4", "a4", "a4", "a4", "a4", "a4", "a4", "a4 quattro", "…
#> $ displ        <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.…
#> $ year         <int> 1999, 1999, 2008, 2008, 1999, 1999, 2008, 1999, 1999, 200…
#> $ cyl          <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, …
#> $ trans        <chr> "auto(l5)", "manual(m5)", "manual(m6)", "auto(av)", "auto…
#> $ drv          <chr> "f", "f", "f", "f", "f", "f", "f", "4", "4", "4", "4", "4…
#> $ cty          <int> 18, 21, 20, 21, 16, 18, 18, 18, 16, 20, 19, 15, 17, 17, 1…
#> $ hwy          <int> 29, 29, 31, 30, 26, 26, 27, 26, 25, 28, 27, 25, 25, 25, 2…
#> $ fl           <chr> "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p…
#> $ class        <chr> "compact", "compact", "compact", "compact", "compact", "c…This data set comes with base R. The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models).
data <- use_data_mtcars()
glimpse(data)
#> Rows: 32
#> Columns: 11
#> $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
#> $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
#> $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
#> $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
#> $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
#> $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
#> $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
#> $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
#> $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
#> $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
#> $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…This data set comes with base R. Survival of passengers on the Titanic.
data <- use_data_titanic(count = FALSE)
glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class    <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex      <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ Age      <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…data <- use_data_titanic(count = TRUE)
glimpse(data)
#> Rows: 32
#> Columns: 5
#> $ Class    <chr> "1st", "2nd", "3rd", "Crew", "1st", "2nd", "3rd", "Crew", "1s…
#> $ Sex      <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Female",…
#> $ Age      <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
#> $ n        <dbl> 0, 0, 35, 0, 0, 0, 17, 0, 118, 154, 387, 670, 4, 13, 89, 3, 5…This data set is an incomplete collection of popular beers in Austria, Germany and Switzerland. Data are collected from various websites in 2023. Some of the collected data may be incorrect.
data <- use_data_beer()
glimpse(data)
#> Rows: 161
#> Columns: 11
#> $ name              <chr> "Puntigamer Maerzen", "Puntigamer PR0,0ST", "Puntiga…
#> $ brand             <chr> "Puntigamer", "Puntigamer", "Puntigamer", "Puntigame…
#> $ country           <chr> "Austria", "Austria", "Austria", "Austria", "Austria…
#> $ year              <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023…
#> $ type              <chr> "Rest", "Alkoholfrei", "Rest", "Rest", "Rest", "Rest…
#> $ color_dark        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1…
#> $ alcohol_vol_pct   <dbl> 5.1, 0.0, 5.2, 6.0, 4.9, 5.2, 4.4, 0.5, 5.7, 5.3, 7.…
#> $ original_wort     <dbl> 11.5, 5.1, 12.1, 13.8, 11.5, 11.9, 11.1, 7.0, 13.2, …
#> $ energy_kcal_100ml <dbl> 40, 20, 43, 50, 42, 43, 42, 27, 48, 45, 58, 45, 43, …
#> $ carb_g_100ml      <dbl> 2.7, 4.4, 2.9, 3.6, 3.2, 3.0, 3.8, 5.7, 3.5, 3.3, 3.…
#> $ sugar_g_100ml     <dbl> 0.0, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 2.7, 0.0, 0.0, 0.…Artificial data that can be used for unit-testing or teaching.
data <- create_data_app(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 7
#> $ os           <chr> "Android", "iOS", "Android", "iOS", "Other", "Android", "…
#> $ free         <int> 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, …
#> $ downloads    <int> 5802, 5048, 4579, 3449, 2464, 11276, 4026, 6841, 10419, 5…
#> $ rating       <dbl> 4, 4, 3, 4, 1, 4, 5, 5, 4, 1, 1, 4, 4, 5, 5, 4, 3, 4, 2, …
#> $ type         <chr> "Kids", "Media", "Other", "Shopping", "Connect", "Learn",…
#> $ updates      <dbl> 63.00000, 58.00000, 62.00000, 44.00000, 24.00000, 75.0000…
#> $ screen_sizes <dbl> 3, 2, 3, 2, 1, 3, 1, 2, 2, 3, 1, 3, 2, 1, 3, 1, 4, 5, 3, …data <- create_data_buy(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 13
#> $ period          <int> 202012, 202012, 202012, 202012, 202012, 202012, 202012…
#> $ buy             <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, …
#> $ age             <int> 39, 57, 55, 66, 71, 44, 64, 51, 70, 44, 58, 47, 68, 71…
#> $ city_ind        <int> 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, …
#> $ female_ind      <int> 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, …
#> $ fixedvoice_ind  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
#> $ fixeddata_ind   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
#> $ fixedtv_ind     <int> 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, …
#> $ mobilevoice_ind <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, …
#> $ mobiledata_prd  <chr> "NO", "NO", "MOBILE STICK", "NO", "BUSINESS", "BUSINES…
#> $ bbi_speed_ind   <int> 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, …
#> $ bbi_usg_gb      <int> 77, 49, 53, 44, 55, 93, 50, 64, 63, 87, 45, 45, 70, 79…
#> $ hh_single       <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, …data <- create_data_churn(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 9
#> $ price      <dbl> 29, 27, 29, 11, 18, 21, 19, 13, 29, 22, 13, 27, 17, 11, 16,…
#> $ type       <chr> "Premium", "Regular", "Premium", "Promo", "Promo", "Promo",…
#> $ usage      <dbl> 63.0, 39.0, 87.0, 29.0, 22.5, 8.0, 56.0, 94.5, 46.0, 76.0, …
#> $ shared     <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,…
#> $ device     <chr> "Computer", "Tablet", "Phone", "Tablet", "Computer", "Table…
#> $ newsletter <int> 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,…
#> $ language   <chr> "sp", "sp", "sp", "sp", "en", "en", "fr", "en", "en", "de",…
#> $ duration   <int> 7, 47, 99, 33, 94, 17, 95, 92, 43, 16, 62, 14, 52, 20, 76, …
#> $ churn      <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,…data <- create_data_esoteric(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ starsign        <chr> "Leo", "Aquarius", "Virgo", "Pisces", "Aries", "Taurus…
#> $ chinese         <chr> "Dragon", "Monkey", "Tiger", "Pig", "Pig", "Horse", "D…
#> $ moon            <chr> "Waxing (+)", "Waxing (+)", "Waxing (+)", "Waning (-)"…
#> $ blood           <chr> "A+", "AB+", "0+", "0+", "A+", "0+", "B+", "0+", "0-",…
#> $ fingers_crossed <int> 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, …
#> $ success         <int> 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, …data <- create_data_person(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 15
#> $ age               <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, …
#> $ gender            <chr> "Female", "Female", "Male", "Male", "Female", "Femal…
#> $ eye_color         <chr> "Blue", "Green", "Brown", "Green", "Brown", "Brown",…
#> $ shoe_size         <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0…
#> $ iq                <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126,…
#> $ education         <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, …
#> $ income            <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 8…
#> $ handset           <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Andr…
#> $ pet               <chr> "No", "Cat", "Other", "No", "Dog", "No", "Cat", "Dog…
#> $ favorite_pizza    <chr> "Pepperoni", "Hawai", "Margaritha", "Carciofi", "Mar…
#> $ favorite_icecream <chr> "Lemon", "Strawberry", "Vanilla", "Vanilla", "Apple"…
#> $ likes_garlic      <int> 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0…
#> $ likes_sushi       <int> 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1…
#> $ likes_beatles     <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0…
#> $ likes_beer        <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1…data <- create_data_random(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 12
#> $ id         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
#> $ target_ind <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,…
#> $ var_1      <int> 27, 59, 16, 85, 85, 48, 77, 30, 7, 44, 46, 34, 19, 51, 2, 7…
#> $ var_2      <int> 16, 14, 15, 51, 49, 62, 45, 6, 1, 22, 85, 27, 60, 61, 99, 1…
#> $ var_3      <int> 21, 94, 38, 63, 18, 66, 73, 50, 87, 83, 98, 67, 64, 5, 19, …
#> $ var_4      <int> 30, 83, 59, 81, 29, 14, 89, 1, 57, 97, 27, 98, 4, 26, 26, 9…
#> $ var_5      <int> 25, 99, 72, 65, 24, 9, 30, 54, 78, 27, 32, 95, 49, 97, 85, …
#> $ var_6      <int> 44, 40, 37, 53, 7, 72, 24, 84, 100, 11, 49, 68, 82, 77, 43,…
#> $ var_7      <int> 93, 59, 8, 85, 3, 81, 39, 14, 67, 62, 45, 81, 87, 99, 40, 3…
#> $ var_8      <int> 58, 49, 74, 23, 75, 82, 10, 28, 2, 60, 99, 85, 59, 34, 65, …
#> $ var_9      <int> 80, 88, 24, 56, 90, 1, 16, 26, 77, 7, 90, 31, 89, 61, 46, 7…
#> $ var_10     <int> 31, 32, 87, 33, 13, 36, 93, 88, 82, 2, 63, 78, 72, 19, 58, …data <- create_data_unfair(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 22
#> $ age         <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, 87, 41…
#> $ gender      <chr> "Female", "Female", "Male", "Male", "Female", "Female", "F…
#> $ eye_color   <chr> "Blue", "Green", "Blue", "Blue", "Blue", "Brown", "Brown",…
#> $ shoe_size   <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0, 42.0…
#> $ iq          <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126, 106, …
#> $ education   <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, 16, 69…
#> $ income      <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 82.0, 9…
#> $ handset     <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Android", …
#> $ pet         <chr> "Other", "Cat", "Cat", "Dog", "Cat", "No", "Dog", "No", "N…
#> $ smoking     <int> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0…
#> $ name_arabic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
#> $ outfit      <chr> "Casual", "Casual", "Casual", "Alternative", "Elegant", "A…
#> $ glasses     <int> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1…
#> $ tatoos      <int> 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0…
#> $ kids        <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0…
#> $ bad_debt    <dbl> 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0…
#> $ credit_card <chr> "No", "Master", "Master", "No", "No", "Visa", "Visa", "Vis…
#> $ left_handed <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
#> $ skin_color  <chr> "White", "Brown", "White", "White", "White", "White", "Bla…
#> $ religion    <chr> "Christian", "No", "Christian", "No", "Christian", "No", "…
#> $ internet_gb <dbl> 0.000000, 60.609298, 260.437887, 55.199729, 0.000000, 179.…
#> $ target_ind  <int> 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…Create an empty data set and add random variables.
data <- create_data_empty(obs = 1000) %>%
  add_var_random_01("smoking", prob = c(0.8, 0.2)) %>%
  add_var_random_cat("gender", 
                     cat = c("female", "male", "diverse"), 
                     prob = c(0.45, 0.45, 0.1)) %>%
  add_var_random_dbl("internet_usage", min_val = 0, max_val = 1000) %>%
  add_var_random_int("age", min_val = 18, max_val = 100) %>%
  add_var_random_moon() %>%
  add_var_random_starsign()
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ smoking         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
#> $ gender          <chr> "male", "male", "female", "male", "female", "female", …
#> $ internet_usage  <dbl> 923.7630, 979.0669, 773.8658, 697.6332, 470.4925, 609.…
#> $ age             <int> 84, 54, 44, 45, 60, 73, 60, 74, 62, 46, 81, 95, 58, 19…
#> $ random_moon     <chr> "Waxing (+)", "Waning (-)", "Waning (-)", "Waxing (+)"…
#> $ random_starsign <chr> "Saggitarius", "Saggitarius", "Libra", "Pisces", "Pisc…