Palmer Penguins — three-species classification

Author

Aparna Pandey and Stephan Peischl

Overview

Here we treat species as a three-level outcome (Adelie, Chinstrap, Gentoo) using multinomial logistic regression (nnet::multinom) and a multiclass classification tree (rpart). This complements the binary Adelie-vs-Gentoo notebook (penguins-classification.Rmd). For metrics and multiclass intuition on the site, see Module 06.

See Palmer Penguins data card.

Prepare data

data("penguins", package = "palmerpenguins")
pg <- penguins |>
  tidyr::drop_na(species, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, island, sex, year) |>
  mutate(
    species = droplevels(species),
    year = as.numeric(year)
  )

table(pg$species)


   Adelie Chinstrap    Gentoo 
      146        68       119

nrow(pg)

[1] 333

Pair plot (measurements + island, coloured by species)

GGally::ggpairs(
  pg,
  columns = c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "island"),
  aes(color = species, alpha = 0.25)
) +
  theme_minimal()

Train / test split (stratified on `species`)

set.seed(24)
split <- initial_split(pg, prop = 0.75, strata = species)
train <- training(split)
test <- testing(split)

Multinomial logistic regression

set.seed(1)
multi_fit <- nnet::multinom(
  species ~ bill_length_mm + bill_depth_mm + flipper_length_mm + body_mass_g + island + sex + year,
  data = train,
  trace = FALSE,
  MaxNWts = 5000
)
summary(multi_fit)

Call:
nnet::multinom(formula = species ~ bill_length_mm + bill_depth_mm + 
    flipper_length_mm + body_mass_g + island + sex + year, data = train, 
    trace = FALSE, MaxNWts = 5000)

Coefficients:
          (Intercept) bill_length_mm bill_depth_mm flipper_length_mm
Chinstrap  -0.1332473       23.38094     -31.56866         2.5471506
Gentoo      0.3359627       16.08307     -23.95942        -0.3325775
          body_mass_g islandDream islandTorgersen   sexmale       year
Chinstrap -0.08459162   114.22016        39.99244 -28.49904 -0.3629059
Gentoo     0.06840838   -40.97153       -67.58936 -44.32825 -0.2457162

Std. Errors:
           (Intercept) bill_length_mm bill_depth_mm flipper_length_mm
Chinstrap 0.0003721332     0.03345882   0.007958916         0.4629206
Gentoo    0.0354867677     1.84472663   0.734152531         7.4520501
          body_mass_g  islandDream islandTorgersen    sexmale       year
Chinstrap   0.3827169 0.0003875734    1.117192e-20 0.03542335  0.7402786
Gentoo    170.2851969 0.0354242593    4.030526e-25 0.03542630 71.2574316

Residual Deviance: 0.0001072089 
AIC: 36.00011

pred_multi <- predict(multi_fit, newdata = test)
tibble(truth = test$species, .pred_class = pred_multi) |>
  conf_mat(truth = truth, estimate = .pred_class)

           Truth
Prediction  Adelie Chinstrap Gentoo
  Adelie        37         1      0
  Chinstrap      0        16      0
  Gentoo         0         0     30

cm_obj <- conf_mat(
  tibble(truth = test$species, .pred_class = pred_multi),
  truth = truth,
  estimate = .pred_class
)
cm <- as.data.frame.table(cm_obj$table, stringsAsFactors = FALSE) |>
  dplyr::rename(Reference = Truth)
ggplot(cm, aes(Reference, Prediction, fill = Freq)) +
  geom_tile(color = "gray80") +
  geom_text(aes(label = Freq), color = "gray15") +
  scale_fill_gradient(low = "white", high = "steelblue") +
  theme_minimal() +
  labs(
    title = "Multinomial logit — test confusion (counts)",
    x = "True species", y = "Predicted species"
  )

Multiclass tree

tree_fit <- rpart(
  species ~ bill_length_mm + bill_depth_mm + flipper_length_mm + body_mass_g + island + sex + year,
  data = train,
  method = "class"
)
rpart.plot(tree_fit, type = 4, extra = 104, box.palette = "GnYlRd", main = "Three species (rpart, train)")

pred_t <- predict(tree_fit, test, type = "class") |> factor(levels = levels(test$species))
tibble(truth = test$species, .pred_class = pred_t) |>
  conf_mat(truth = truth, estimate = .pred_class)

           Truth
Prediction  Adelie Chinstrap Gentoo
  Adelie        35         1      0
  Chinstrap      1        13      0
  Gentoo         1         3     30

Takeaways

Chinstrap is often the hardest class (smaller n, overlap in measurement space) — inspect per-class metrics, not only overall accuracy.
Multiclass ROC and one-vs-rest calibration are natural Thursday extensions; here we stay with confusion matrices + trees for clarity.
Compare with the binary pipeline in _includes/day02-tidymodels-walkthrough.qmd (Adelie vs Gentoo slice).

--- title: "Palmer Penguins — three-species classification" author: "Aparna Pandey and Stephan Peischl" format: html: toc: true code-tools: true engine: knitr --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) library(palmerpenguins) library(dplyr) library(ggplot2) library(GGally) library(nnet) library(rpart) library(rpart.plot) library(tidymodels) library(tidyr) theme_set(theme_classic()) ``` # Overview Here we treat **`species`** as a **three-level** outcome (Adelie, Chinstrap, Gentoo) using **multinomial logistic regression** (`nnet::multinom`) and a **multiclass classification tree** (`rpart`). This complements the **binary** Adelie-vs-Gentoo notebook (`penguins-classification.Rmd`). For **metrics and multiclass intuition** on the site, see [Module 06](../modules/module-06-evaluation-and-interpretability.qmd). See **[Palmer Penguins data card](../data/cards/palmer-penguins.qmd)**. ## Prepare data ```{r} data("penguins", package = "palmerpenguins") pg <- penguins |> tidyr::drop_na(species, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, island, sex, year) |> mutate( species = droplevels(species), year = as.numeric(year) ) table(pg$species) nrow(pg) ``` ## Pair plot (measurements + island, coloured by species) ```{r fig.width=8.5, fig.height=5.5} GGally::ggpairs( pg, columns = c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "island"), aes(color = species, alpha = 0.25) ) + theme_minimal() ``` ## Train / test split (stratified on `species`) ```{r} set.seed(24) split <- initial_split(pg, prop = 0.75, strata = species) train <- training(split) test <- testing(split) ``` ## Multinomial logistic regression ```{r} set.seed(1) multi_fit <- nnet::multinom( species ~ bill_length_mm + bill_depth_mm + flipper_length_mm + body_mass_g + island + sex + year, data = train, trace = FALSE, MaxNWts = 5000 ) summary(multi_fit) ``` ```{r} pred_multi <- predict(multi_fit, newdata = test) tibble(truth = test$species, .pred_class = pred_multi) |> conf_mat(truth = truth, estimate = .pred_class) ``` ```{r fig.width=5.5, fig.height=4.5} cm_obj <- conf_mat( tibble(truth = test$species, .pred_class = pred_multi), truth = truth, estimate = .pred_class ) cm <- as.data.frame.table(cm_obj$table, stringsAsFactors = FALSE) |> dplyr::rename(Reference = Truth) ggplot(cm, aes(Reference, Prediction, fill = Freq)) + geom_tile(color = "gray80") + geom_text(aes(label = Freq), color = "gray15") + scale_fill_gradient(low = "white", high = "steelblue") + theme_minimal() + labs( title = "Multinomial logit — test confusion (counts)", x = "True species", y = "Predicted species" ) ``` ## Multiclass tree ```{r fig.width=9, fig.height=6} tree_fit <- rpart( species ~ bill_length_mm + bill_depth_mm + flipper_length_mm + body_mass_g + island + sex + year, data = train, method = "class" ) rpart.plot(tree_fit, type = 4, extra = 104, box.palette = "GnYlRd", main = "Three species (rpart, train)") ``` ```{r} pred_t <- predict(tree_fit, test, type = "class") |> factor(levels = levels(test$species)) tibble(truth = test$species, .pred_class = pred_t) |> conf_mat(truth = truth, estimate = .pred_class) ``` ## Takeaways - **Chinstrap** is often the hardest class (smaller *n*, overlap in measurement space) — inspect **per-class** metrics, not only overall accuracy. - Multiclass **ROC** and **one-vs-rest** calibration are natural Thursday extensions; here we stay with **confusion matrices** + trees for clarity. - Compare with the **binary** pipeline in `_includes/day02-tidymodels-walkthrough.qmd` (Adelie vs Gentoo slice).