fitting

Author

Kelly Hatfield

library(here)
here() starts at /Users/kellymccormickhatfield/Documents/MADA 2023/kellyhatfield-MADA-portfolio
library(tidyverse)
── Attaching packages
───────────────────────────────────────
tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.3.0      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom        1.0.2     ✔ rsample      1.1.1
✔ dials        1.1.0     ✔ tune         1.0.1
✔ infer        1.0.4     ✔ workflows    1.1.2
✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
✔ parsnip      1.0.3     ✔ yardstick    1.1.0
✔ recipes      1.0.4     
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Learn how to get started at https://www.tidymodels.org/start/
library(performance)

Attaching package: 'performance'

The following objects are masked from 'package:yardstick':

    mae, rmse
CleanSymp <- readRDS("~/Documents/MADA 2023/kellyhatfield-MADA-portfolio/fluanalysis/Data/CleanSymp.Rds")
ls(CleanSymp)
 [1] "AbPain"            "BodyTemp"          "Breathless"       
 [4] "ChestCongestion"   "ChestPain"         "ChillsSweats"     
 [7] "CoughIntensity"    "CoughYN"           "CoughYN2"         
[10] "Diarrhea"          "EarPn"             "EyePn"            
[13] "Fatigue"           "Headache"          "Hearing"          
[16] "Insomnia"          "ItchyEye"          "Myalgia"          
[19] "MyalgiaYN"         "NasalCongestion"   "Nausea"           
[22] "Pharyngitis"       "RunnyNose"         "Sneeze"           
[25] "SubjectiveFever"   "SwollenLymphNodes" "ToothPn"          
[28] "Vision"            "Vomit"             "Weakness"         
[31] "WeaknessYN"        "Wheeze"           
summary(CleanSymp)
 SwollenLymphNodes ChestCongestion ChillsSweats NasalCongestion CoughYN  
 No :418           No :323         No :130      No :167         No : 75  
 Yes:312           Yes:407         Yes:600      Yes:563         Yes:655  
                                                                         
                                                                         
                                                                         
                                                                         
 Sneeze    Fatigue   SubjectiveFever Headache      Weakness   WeaknessYN
 No :339   No : 64   No :230         No :115   None    : 49   No : 49   
 Yes:391   Yes:666   Yes:500         Yes:615   Mild    :223   Yes:681   
                                               Moderate:338             
                                               Severe  :120             
                                                                        
                                                                        
  CoughIntensity CoughYN2      Myalgia    MyalgiaYN RunnyNose AbPain   
 None    : 47    No : 47   None    : 79   No : 79   No :211   No :639  
 Mild    :154    Yes:683   Mild    :213   Yes:651   Yes:519   Yes: 91  
 Moderate:357              Moderate:325                                
 Severe  :172              Severe  :113                                
                                                                       
                                                                       
 ChestPain Diarrhea  EyePn     Insomnia  ItchyEye  Nausea    EarPn    
 No :497   No :631   No :617   No :315   No :551   No :475   No :568  
 Yes:233   Yes: 99   Yes:113   Yes:415   Yes:179   Yes:255   Yes:162  
                                                                      
                                                                      
                                                                      
                                                                      
 Hearing   Pharyngitis Breathless ToothPn   Vision    Vomit     Wheeze   
 No :700   No :119     No :436    No :565   No :711   No :652   No :510  
 Yes: 30   Yes:611     Yes:294    Yes:165   Yes: 19   Yes: 78   Yes:220  
                                                                         
                                                                         
                                                                         
                                                                         
    BodyTemp     
 Min.   : 97.20  
 1st Qu.: 98.20  
 Median : 98.50  
 Mean   : 98.94  
 3rd Qu.: 99.30  
 Max.   :103.10  

Modeling: BodyTemp predicted by RunnyNose

CleanSymp %>% group_by(RunnyNose) %>% summarize(mean=mean(BodyTemp),  q1 = quantile(BodyTemp, 0.25), median = median(BodyTemp),  q3 = quantile(BodyTemp, 0.75))
# A tibble: 2 × 5
  RunnyNose  mean    q1 median    q3
  <fct>     <dbl> <dbl>  <dbl> <dbl>
1 No         99.1  98.2   98.8  99.6
2 Yes        98.9  98.2   98.5  99.2
ggplot(CleanSymp, aes(x = RunnyNose, y = BodyTemp)) + geom_boxplot(fill = "grey92") + geom_point(size = 2, alpha = .15,position = position_jitter(seed = 1, width = .2)) 

lm_mod <- linear_reg()  %>% set_engine("lm")
fit_lm <- lm_mod%>% 
  fit(BodyTemp ~ RunnyNose, data = CleanSymp)


tidy(fit_lm)
# A tibble: 2 × 5
  term         estimate std.error statistic p.value
  <chr>           <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept)    99.1      0.0819   1210.   0      
2 RunnyNoseYes   -0.293    0.0971     -3.01 0.00268
glance(fit_lm)
# A tibble: 1 × 12
  r.squ…¹ adj.r…² sigma stati…³ p.value    df logLik   AIC   BIC devia…⁴ df.re…⁵
    <dbl>   <dbl> <dbl>   <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>   <dbl>   <int>
1  0.0123  0.0110  1.19    9.08 0.00268     1 -1162. 2329. 2343.   1031.     728
# … with 1 more variable: nobs <int>, and abbreviated variable names
#   ¹​r.squared, ²​adj.r.squared, ³​statistic, ⁴​deviance, ⁵​df.residual

Modeling: Nausea predicted by RunnyNose

lr_mod <- logistic_reg()
fit_lr <- lr_mod  %>% 
  fit(Nausea ~ RunnyNose, data = CleanSymp)

tidy(fit_lr)
# A tibble: 2 × 5
  term         estimate std.error statistic    p.value
  <chr>           <dbl>     <dbl>     <dbl>      <dbl>
1 (Intercept)   -0.658      0.145    -4.53  0.00000589
2 RunnyNoseYes   0.0502     0.172     0.292 0.770     
glance(fit_lr)
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1          945.     729  -472.  949.  958.     945.         728   730

Modeling: BodyTemp predicted by all variables

lm_mod2 <- linear_reg()  %>% set_engine("lm")
fit_lm2 <- lm_mod2 %>% 
  fit(BodyTemp ~ ., data = CleanSymp)


tidy(fit_lm2)
# A tibble: 38 × 5
   term                 estimate std.error statistic   p.value
   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>
 1 (Intercept)           97.9       0.304   322.     0        
 2 SwollenLymphNodesYes  -0.165     0.0920   -1.80   0.0727   
 3 ChestCongestionYes     0.0873    0.0975    0.895  0.371    
 4 ChillsSweatsYes        0.201     0.127     1.58   0.114    
 5 NasalCongestionYes    -0.216     0.114    -1.90   0.0584   
 6 CoughYNYes             0.314     0.241     1.30   0.193    
 7 SneezeYes             -0.362     0.0983   -3.68   0.000249 
 8 FatigueYes             0.265     0.161     1.65   0.0996   
 9 SubjectiveFeverYes     0.437     0.103     4.22   0.0000271
10 HeadacheYes            0.0115    0.125     0.0913 0.927    
# … with 28 more rows
glance(fit_lm2)
# A tibble: 1 × 12
  r.squ…¹ adj.r…² sigma stati…³ p.value    df logLik   AIC   BIC devia…⁴ df.re…⁵
    <dbl>   <dbl> <dbl>   <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>   <dbl>   <int>
1   0.129  0.0860  1.14    3.02 4.20e-8    34 -1116. 2304. 2469.    909.     695
# … with 1 more variable: nobs <int>, and abbreviated variable names
#   ¹​r.squared, ²​adj.r.squared, ³​statistic, ⁴​deviance, ⁵​df.residual

Modeling: Nausea predicted by all variables

lr_mod2 <- logistic_reg()
fit_lr2 <- lr_mod2  %>% 
  fit(Nausea ~ ., data = CleanSymp)

tidy(fit_lr2)
# A tibble: 38 × 5
   term                 estimate std.error statistic p.value
   <chr>                   <dbl>     <dbl>     <dbl>   <dbl>
 1 (Intercept)             0.223     7.83     0.0285  0.977 
 2 SwollenLymphNodesYes   -0.251     0.196   -1.28    0.200 
 3 ChestCongestionYes      0.276     0.213    1.30    0.195 
 4 ChillsSweatsYes         0.274     0.288    0.952   0.341 
 5 NasalCongestionYes      0.426     0.255    1.67    0.0944
 6 CoughYNYes             -0.140     0.519   -0.271   0.787 
 7 SneezeYes               0.177     0.210    0.840   0.401 
 8 FatigueYes              0.229     0.372    0.616   0.538 
 9 SubjectiveFeverYes      0.278     0.225    1.23    0.218 
10 HeadacheYes             0.331     0.285    1.16    0.245 
# … with 28 more rows
glance(fit_lr2)
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1          945.     729  -376.  821.  982.     751.         695   730