# Fix the random numbers by setting the seed # This enables the analysis to be reproducible when random numbers are used set.seed(1234)# Put 3/4 of the data into the training set data_split <-initial_split(fludata, prop =3/4)# Create data frames for the two sets:train_data <-training(data_split)test_data <-testing(data_split)
Workflow creation and model fitting
Create a simple recipe for a logistic model to our categorical outcome of interest.
#recipe #1: Nausea predicted by all variablesflu_recipe <-recipe(Nausea ~ ., data= train_data)#model: logistic regression using GLM enginelr_mod <-logistic_reg() %>%set_engine ("glm")#workflow: tying together model and recipeflu_wflow <-workflow() %>%add_model (lr_mod) %>%add_recipe (flu_recipe)#Printing workflowflu_wflow
Use the trained workflow to predict with unseen test data
#Applying the model to unseen test datapredict(flu_fit, test_data)
Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
prediction from a rank-deficient fit may be misleading
# A tibble: 183 × 1
.pred_class
<fct>
1 Yes
2 No
3 No
4 No
5 No
6 No
7 No
8 No
9 No
10 No
# … with 173 more rows
flu_aug <-augment(flu_fit, test_data)
Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
prediction from a rank-deficient fit may be misleading
Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
prediction from a rank-deficient fit may be misleading
# The data look like: flu_aug
# A tibble: 183 × 35
SwollenLymph…¹ Chest…² Chill…³ Nasal…⁴ CoughYN Sneeze Fatigue Subje…⁵ Heada…⁶
<fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 Yes Yes Yes Yes No Yes Yes Yes Yes
2 Yes No Yes No No No Yes Yes Yes
3 No No Yes No Yes Yes Yes Yes Yes
4 No Yes Yes Yes Yes Yes Yes Yes Yes
5 Yes Yes Yes Yes Yes No Yes Yes Yes
6 No Yes Yes Yes Yes Yes Yes Yes Yes
7 No No Yes No Yes No Yes Yes Yes
8 Yes Yes Yes Yes Yes Yes Yes Yes Yes
9 No Yes No No Yes Yes Yes No Yes
10 Yes Yes Yes Yes Yes Yes Yes Yes Yes
# … with 173 more rows, 26 more variables: Weakness <fct>, WeaknessYN <fct>,
# CoughIntensity <fct>, CoughYN2 <fct>, Myalgia <fct>, MyalgiaYN <fct>,
# RunnyNose <fct>, AbPain <fct>, ChestPain <fct>, Diarrhea <fct>,
# EyePn <fct>, Insomnia <fct>, ItchyEye <fct>, Nausea <fct>, EarPn <fct>,
# Hearing <fct>, Pharyngitis <fct>, Breathless <fct>, ToothPn <fct>,
# Vision <fct>, Vomit <fct>, Wheeze <fct>, BodyTemp <dbl>, .pred_class <fct>,
# .pred_No <dbl>, .pred_Yes <dbl>, and abbreviated variable names …
#recipe #2: Body Temperature predicted by all variablesflu_recipe2 <-recipe(BodyTemp ~ ., data= train_data)#model: linear regression using GLM engineln_mod <-linear_reg() %>%set_engine ("lm")#workflow: tying together linear model and recipe 2flu_wflow2 <-workflow() %>%add_model(ln_mod) %>%add_recipe(flu_recipe2)#Printing workflowflu_wflow2
══ Workflow ════════════════════════════════════════════════════════════════════
Preprocessor: Recipe
Model: linear_reg()
── Preprocessor ────────────────────────────────────────────────────────────────
0 Recipe Steps
── Model ───────────────────────────────────────────────────────────────────────
Linear Regression Model Specification (regression)
Computational engine: lm
#Fitting the model to train datasetflu_fit2 <- flu_wflow2 %>%fit(data = train_data)#Looking at model output;flu_fit2 %>%extract_fit_parsnip() %>%tidy()
Warning in predict.lm(object = object$fit, newdata = new_data, type =
"response"): prediction from a rank-deficient fit may be misleading
#pull body temperatures and predictionstemps <- flu_aug2 %>%pull(BodyTemp)pred <- flu_aug2 %>%pull(.pred)#run rmsermse_vec(temps, pred)
[1] 1.153632
Model with only Runny Nose as predictor
#Recipe for body temperature and runny noseflu_recipe_btrn<-recipe(BodyTemp ~ RunnyNose, data=train_data)#Modified workflowflu_btrn_workflow <-workflow() %>%add_model(ln_mod) %>%add_recipe(flu_recipe_btrn)#Fittingflu_btrn_fit<-flu_btrn_workflow %>%fit(data=train_data)
#Check fit characteristicsflu_btrn_fit %>%extract_fit_parsnip() %>%tidy()