Code
library(readxl)
library(openxlsx)
library(dplyr)
library(gtsummary)
library(gt)
library(kableExtra)
library(lubridate)
library(tidyverse)

1 Attrition Results

Code
setwd("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA")

#install.packages("readxl")
#install.packages("openxlsx")
library(readxl)
library(openxlsx)
library(dplyr)
library(gtsummary)
library(gt)
library(kableExtra)

hanna_dummy_data <- read_excel("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx")

#reading all the sheets from hanna_dummy_data
sheet_names <- readxl::excel_sheets("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx")

for (sheet in sheet_names) {
  sheet_data <- readxl::read_excel("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx", sheet = sheet, na = ".", skip = 1)
  df <- paste0(sheet, "_data")
  assign(df, sheet_data, envir = .GlobalEnv)
}

#reading INEX data
INEX_data <- read.csv("hanna csv files/INEX_new.csv")

#creating inclusion table by filtering all the variables
incl01 <- INEX_data %>% filter(incl01 == 1)
incl02 <- incl01 %>% filter(incl02 == 1)
incl03 <- incl02 %>% filter(incl03 == 1)
incl04 <- incl03 %>% filter(incl04 == 1)
inclusion_table <- incl04 %>% filter(incl05 == 1)

#creating exclusion table by filtering the variables
excl01 <- inclusion_table %>% filter(excl01 == 0)
excl02 <- excl01 %>% filter(excl02 == 0)
exclusion_table <- excl02 %>% filter( excl03 == 0)

#creating index table by Joining nivoinit and exclusion table
NIVOINIT_data <- read.csv("hanna csv files/nivoinit_new.csv")

index_table <- left_join(exclusion_table,NIVOINIT_data, by = "patid") %>% select(patid,nivotrtst)

#removing null values from the index table
index_table$nivotrtst[index_table$nivotrtst == ""] <- NA
index_table <- index_table[!is.na(index_table$nivotrtst),]

write.csv(index_table, file = "C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/index_table.csv", row.names = FALSE)

#total count of the INEX TABLE
tot_pat <- nrow(INEX_data)

#count of all variables
count_age_greater_18 <- nrow(incl01)
count_prog_pat_platinum_therapy <- nrow(incl02)
count_scchn_his_cytology <- nrow(incl03)
count_descision_to_start_nivo <- nrow(incl04)
count_consent_to_participate <- nrow(inclusion_table)
count_primary_can_treat <- nrow(excl01)
count_already_treated_nivo <- nrow(excl02)
count_ivstudy_scchn <- nrow(exclusion_table)
count_index_date <- length(index_table$nivotrtst)
count_idx_tab <- nrow(index_table)

#percentage of all variables
per_tot_pat <- paste0(round(tot_pat / tot_pat * 100, 2), "%")
per_age_greater_18 <- paste0(round(count_age_greater_18 / tot_pat * 100, 2), "%")
per_prog_pat_platinum_therapy <- paste0(round(count_prog_pat_platinum_therapy / tot_pat * 100, 2), "%")
per_scchn_his_cytology <- paste0(round(count_scchn_his_cytology / tot_pat * 100, 2), "%")
per_descision_to_start_nivo <- paste0(round(count_descision_to_start_nivo / tot_pat * 100, 2), "%")
per_consent_to_participate <- paste0(round(count_consent_to_participate / tot_pat * 100, 2), "%")
per_primary_can_treat <- paste0(round(count_primary_can_treat / tot_pat * 100, 2), "%")
per_already_treated_nivo <- paste0(round(count_already_treated_nivo / tot_pat * 100, 2), "%")
per_ivstudy_scchn <- paste0(round(count_ivstudy_scchn / tot_pat * 100, 2), "%")
per_index_date <- paste0(round(count_index_date / tot_pat * 100, 2), "%")
per_idx_tab <- paste0(round(count_idx_tab / tot_pat * 100, 2), "%")

# Create a data frame with the counts and percentages
Attrition_data <- data.frame(
  Objectives = c("Total Patients", "Inclusion Criteria", "Patients must be 18 or older when making treatment decisions",
              "Progression of patients on platinum-based therapy",
              "SCCHN diagnosis is confirmed through histology or cytology",
              "Decision to start using nivolumab for initial treatment of SCCHN made",
              "Informed consent to participate in the study",
              "Exclusion Criteria", 
              "Exclude patients diagnosed primarily with a cancer other than SCCHN",
              "Exclude patients who have already been treated with nivolumab and/or ipilimumab",
              "Exclude patients participating in an interventional study for advanced SCCHN",
              "Index Date",
              "Patients with Nivolumab initiation date",
              "Final"),
  
  N = c(tot_pat, "", count_age_greater_18, count_prog_pat_platinum_therapy, count_scchn_his_cytology,
        count_descision_to_start_nivo, count_consent_to_participate, "",
        count_primary_can_treat,
        count_already_treated_nivo, count_ivstudy_scchn, "", count_index_date, count_idx_tab),
  
  Percentage = c(per_tot_pat, "", per_age_greater_18, per_prog_pat_platinum_therapy, per_scchn_his_cytology,
                 per_descision_to_start_nivo, per_consent_to_participate, "",
                 per_primary_can_treat,
                 per_already_treated_nivo, per_ivstudy_scchn, "", per_index_date, per_idx_tab)
)

saveRDS(Attrition_data,"Results/attr_results.rds")
Code
library(kableExtra)
attr_results<-readRDS("Results/attr_results.rds")
style<-kable(attr_results, format = "html", align = "l", col.names = c("Objectives", "N", "Percentage"), escape = FALSE)
attr_results %>% kable("html") %>% kable_styling(full_width = FALSE) %>% row_spec(row = c(0,2,8,12,14), background = "lightgray", bold = TRUE)
Objectives N Percentage
Total Patients 385 100%
Inclusion Criteria
Patients must be 18 or older when making treatment decisions 385 100%
Progression of patients on platinum-based therapy 376 97.66%
SCCHN diagnosis is confirmed through histology or cytology 371 96.36%
Decision to start using nivolumab for initial treatment of SCCHN made 368 95.58%
Informed consent to participate in the study 368 95.58%
Exclusion Criteria
Exclude patients diagnosed primarily with a cancer other than SCCHN 367 95.32%
Exclude patients who have already been treated with nivolumab and/or ipilimumab 366 95.06%
Exclude patients participating in an interventional study for advanced SCCHN 366 95.06%
Index Date
Patients with Nivolumab initiation date 351 91.17%
Final 351 91.17%

2 Patient Characteristics

Code
#reading index table and demo table
index_table <- index_table
DEMO_data <- read.csv("hanna csv files/DEMO_new.csv")

#joining both the table and filtering the patients with baseline visit==1
index_demo_joined <- left_join(index_table, DEMO_data, by = "patid") %>% filter(visit==1) %>% select(patid,visit_date,brthyr,nivotrtst,sex,visit)

library(lubridate)

#extracting only the year in nivotrtst to get age
index_demo_joined$nivotrtst <- as.Date(index_demo_joined$nivotrtst, format = "%d/%m/%Y")
index_demo_joined$index_year <- year(index_demo_joined$nivotrtst)

index_demo_joined$brthyr <- as.numeric(index_demo_joined$brthyr)

#creating the age column
index_demo_joined$age <- index_demo_joined$index_year - index_demo_joined$brthyr

index_demo_joined$age_category <- ifelse(index_demo_joined$age < 70, "<70 yrs", "≥70 yrs")

#Age at nivolumab initiation
# Mean
mean_age <- round(mean(index_demo_joined$age),2)

# Standard Deviation
sd_age <- round(sd(index_demo_joined$age),2)

# Minimum
min_age <- min(index_demo_joined$age)

sorted_age <- sort(index_demo_joined$age)

# 1st Quartile
q1_age <- quantile(sorted_age, 0.25)

# Median
median_age <- median(sorted_age)

# 3rd Quartile
q3_age <- quantile(sorted_age, 0.75)

# Maximum
max_age <- max(index_demo_joined$age)

missing_count_age <- sum(is.na(index_demo_joined$age))

count_total_pat <- nrow(index_demo_joined)
per_total_pat <- paste0(round(count_total_pat / count_total_pat *100,2),"%")

count_age_above_70 <- index_demo_joined %>% filter(age_category == "≥70 yrs") %>% nrow()
count_age_below_70 <- index_demo_joined %>% filter(age_category == "<70 yrs") %>% nrow()
mis_count_age_cat <- sum(is.na(index_demo_joined$age_category))

per_age_above_70 <- paste0(round(count_age_above_70 / count_total_pat * 100,2), "%")
per_age_below_70 <- paste0(round(count_age_below_70 / count_total_pat * 100,2), "%")
per_mis_age_cat <- paste0(round(mis_count_age_cat / count_total_pat * 100,2), "%")

count_male <- index_demo_joined %>% filter(sex == 1) %>% nrow()
count_female <- index_demo_joined %>% filter(sex ==2) %>% nrow()
mis_count_sex_cat <- sum(is.na(index_demo_joined$gender_category))

per_count_male <- paste0(round(count_male / count_total_pat * 100,2), "%")
per_count_female <- paste0(round(count_female / count_total_pat * 100,2), "%")
per_mis_sex_cat <- paste0(round(mis_count_sex_cat / count_total_pat * 100,2), "%")

#left joining the anam table with the above table
ANAM_data <- read.csv("hanna csv files/ANAM_new.csv")
final_anam_join <- left_join(index_demo_joined, ANAM_data, by = "patid") %>% filter(visit.x==1)

#final_anam_join[final_anam_join == "."] <- NA

final_anam_join <- apply(final_anam_join, 2, function(x) replace(x, x == ".", NA))
final_anam_join <- as.data.frame(final_anam_join)

#count and percentage for smoking status
count_smok_his <- final_anam_join %>% filter(smokstat == 1) %>% nrow()
count_smok_no_his <- final_anam_join %>% filter(smokstat == 0) %>% nrow()
mis_count_smok_cat <- sum(is.na(final_anam_join$smokstat))

per_smok_his <- paste0(round(count_smok_his / count_total_pat * 100,2), "%")
per_smok_no_his <- paste0(round(count_smok_no_his / count_total_pat * 100,2), "%")
per_mis_smok_cat <- paste0(round(mis_count_smok_cat / count_total_pat * 100,2), "%")

#count and percentage for alcohol status
count_alco_status_yes <- final_anam_join %>% filter(alcohol == 1) %>% nrow()
count_alco_status_no <- final_anam_join %>% filter(alcohol == 0) %>% nrow()
count_alco_status_occasionally <- final_anam_join %>% filter(alcohol == 2) %>% nrow()
count_alco_status_no_longer <- final_anam_join %>% filter(alcohol == 3) %>% nrow()
mis_count_alco_status_cat <- sum(is.na(final_anam_join$alcohol))

per_alco_status_yes <- paste0(round(count_alco_status_yes / count_total_pat * 100,2), "%")
per_alco_status_no <- paste0(round(count_alco_status_no / count_total_pat * 100,2), "%")
per_alco_status_occasionally <- paste0(round(count_alco_status_occasionally / count_total_pat * 100,2), "%")
per_alco_status_no_longer <- paste0(round(count_alco_status_no_longer / count_total_pat * 100,2), "%")
per_mis_alco_status_cat <- paste0(round(mis_count_alco_status_cat / count_total_pat * 100,2), "%")

final_anam_join <- final_anam_join %>%
  mutate(alc_past = ifelse(alcpast == 99, NA, alcpast))

#count and percentage for alcohol past
count_alco_past_yes <- final_anam_join %>% filter(alcpast == 1) %>% nrow()
count_alco_past_no <- final_anam_join %>% filter(alcpast == 0) %>% nrow()
mis_count_alco_past_cat <- sum(is.na(final_anam_join$alc_past))

# Count missing values
missing_count <- sum(is.na(final_anam_join$column_name))
per_alco_past_yes <- paste0(round(count_alco_past_yes / count_total_pat * 100,2), "%")
per_alco_past_no<- paste0(round(count_alco_past_no / count_total_pat * 100,2), "%")
per_mis_alco_past_cat <- paste0(round(mis_count_alco_past_cat / count_total_pat * 100,2), "%")

#missing count and percentage for alcohol since
count_since_years <- sum(!is.na(final_anam_join$alcsincepst))
per_since_year <- paste0(round(count_since_years / count_total_pat * 100,2), "%")

#final_anam_join$alcsincepst[final_anam_join$alcsincepst == "0"] <- NA
#final_anam_join$alcuntil[final_anam_join$alcuntil == "0"] <- NA

final_anam_join$alcsincepst <- as.numeric(final_anam_join$alcsincepst)
mean_acl_since <- round(mean(final_anam_join$alcsincepst, na.rm = TRUE))


# Standard Deviation
sd_acl_since <- round(sd(final_anam_join$alcsincepst, na.rm = TRUE),2)

# Minimum
min_acl_since <- min(final_anam_join$alcsincepst, na.rm = TRUE)

sorted_acl_since <- sort(final_anam_join$alcsincepst)

# 1st Quartile
q1_acl_since <- quantile(sorted_acl_since, 0.25)

# Median
median_acl_since <- median(sorted_acl_since)

# 3rd Quartile
q3_acl_since <- quantile(sorted_acl_since, 0.75)

# Maximum
max_acl_since <- max(final_anam_join$alcsincepst, na.rm = TRUE)