Code
library(readxl)
library(openxlsx)
library(dplyr)
library(gtsummary)
library(gt)
library(kableExtra)
library(lubridate)
library(tidyverse)
library(readxl)
library(openxlsx)
library(dplyr)
library(gtsummary)
library(gt)
library(kableExtra)
library(lubridate)
library(tidyverse)
setwd("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA")
#install.packages("readxl")
#install.packages("openxlsx")
library(readxl)
library(openxlsx)
library(dplyr)
library(gtsummary)
library(gt)
library(kableExtra)
<- read_excel("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx")
hanna_dummy_data
#reading all the sheets from hanna_dummy_data
<- readxl::excel_sheets("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx")
sheet_names
for (sheet in sheet_names) {
<- readxl::read_excel("C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/HANNA_DUMMY_DATA.xlsx", sheet = sheet, na = ".", skip = 1)
sheet_data <- paste0(sheet, "_data")
df assign(df, sheet_data, envir = .GlobalEnv)
}
#reading INEX data
<- read.csv("hanna csv files/INEX_new.csv")
INEX_data
#creating inclusion table by filtering all the variables
<- INEX_data %>% filter(incl01 == 1)
incl01 <- incl01 %>% filter(incl02 == 1)
incl02 <- incl02 %>% filter(incl03 == 1)
incl03 <- incl03 %>% filter(incl04 == 1)
incl04 <- incl04 %>% filter(incl05 == 1)
inclusion_table
#creating exclusion table by filtering the variables
<- inclusion_table %>% filter(excl01 == 0)
excl01 <- excl01 %>% filter(excl02 == 0)
excl02 <- excl02 %>% filter( excl03 == 0)
exclusion_table
#creating index table by Joining nivoinit and exclusion table
<- read.csv("hanna csv files/nivoinit_new.csv")
NIVOINIT_data
<- left_join(exclusion_table,NIVOINIT_data, by = "patid") %>% select(patid,nivotrtst)
index_table
#removing null values from the index table
$nivotrtst[index_table$nivotrtst == ""] <- NA
index_table<- index_table[!is.na(index_table$nivotrtst),]
index_table
write.csv(index_table, file = "C:/Users/Sowmiya.p/Documents/BMS-Markets/HANNA/HANNA/index_table.csv", row.names = FALSE)
#total count of the INEX TABLE
<- nrow(INEX_data)
tot_pat
#count of all variables
<- nrow(incl01)
count_age_greater_18 <- nrow(incl02)
count_prog_pat_platinum_therapy <- nrow(incl03)
count_scchn_his_cytology <- nrow(incl04)
count_descision_to_start_nivo <- nrow(inclusion_table)
count_consent_to_participate <- nrow(excl01)
count_primary_can_treat <- nrow(excl02)
count_already_treated_nivo <- nrow(exclusion_table)
count_ivstudy_scchn <- length(index_table$nivotrtst)
count_index_date <- nrow(index_table)
count_idx_tab
#percentage of all variables
<- paste0(round(tot_pat / tot_pat * 100, 2), "%")
per_tot_pat <- paste0(round(count_age_greater_18 / tot_pat * 100, 2), "%")
per_age_greater_18 <- paste0(round(count_prog_pat_platinum_therapy / tot_pat * 100, 2), "%")
per_prog_pat_platinum_therapy <- paste0(round(count_scchn_his_cytology / tot_pat * 100, 2), "%")
per_scchn_his_cytology <- paste0(round(count_descision_to_start_nivo / tot_pat * 100, 2), "%")
per_descision_to_start_nivo <- paste0(round(count_consent_to_participate / tot_pat * 100, 2), "%")
per_consent_to_participate <- paste0(round(count_primary_can_treat / tot_pat * 100, 2), "%")
per_primary_can_treat <- paste0(round(count_already_treated_nivo / tot_pat * 100, 2), "%")
per_already_treated_nivo <- paste0(round(count_ivstudy_scchn / tot_pat * 100, 2), "%")
per_ivstudy_scchn <- paste0(round(count_index_date / tot_pat * 100, 2), "%")
per_index_date <- paste0(round(count_idx_tab / tot_pat * 100, 2), "%")
per_idx_tab
# Create a data frame with the counts and percentages
<- data.frame(
Attrition_data Objectives = c("Total Patients", "Inclusion Criteria", "Patients must be 18 or older when making treatment decisions",
"Progression of patients on platinum-based therapy",
"SCCHN diagnosis is confirmed through histology or cytology",
"Decision to start using nivolumab for initial treatment of SCCHN made",
"Informed consent to participate in the study",
"Exclusion Criteria",
"Exclude patients diagnosed primarily with a cancer other than SCCHN",
"Exclude patients who have already been treated with nivolumab and/or ipilimumab",
"Exclude patients participating in an interventional study for advanced SCCHN",
"Index Date",
"Patients with Nivolumab initiation date",
"Final"),
N = c(tot_pat, "", count_age_greater_18, count_prog_pat_platinum_therapy, count_scchn_his_cytology,
"",
count_descision_to_start_nivo, count_consent_to_participate,
count_primary_can_treat,"", count_index_date, count_idx_tab),
count_already_treated_nivo, count_ivstudy_scchn,
Percentage = c(per_tot_pat, "", per_age_greater_18, per_prog_pat_platinum_therapy, per_scchn_his_cytology,
"",
per_descision_to_start_nivo, per_consent_to_participate,
per_primary_can_treat,"", per_index_date, per_idx_tab)
per_already_treated_nivo, per_ivstudy_scchn,
)
saveRDS(Attrition_data,"Results/attr_results.rds")
library(kableExtra)
<-readRDS("Results/attr_results.rds")
attr_results<-kable(attr_results, format = "html", align = "l", col.names = c("Objectives", "N", "Percentage"), escape = FALSE)
style%>% kable("html") %>% kable_styling(full_width = FALSE) %>% row_spec(row = c(0,2,8,12,14), background = "lightgray", bold = TRUE) attr_results
Objectives | N | Percentage |
---|---|---|
Total Patients | 385 | 100% |
Inclusion Criteria | ||
Patients must be 18 or older when making treatment decisions | 385 | 100% |
Progression of patients on platinum-based therapy | 376 | 97.66% |
SCCHN diagnosis is confirmed through histology or cytology | 371 | 96.36% |
Decision to start using nivolumab for initial treatment of SCCHN made | 368 | 95.58% |
Informed consent to participate in the study | 368 | 95.58% |
Exclusion Criteria | ||
Exclude patients diagnosed primarily with a cancer other than SCCHN | 367 | 95.32% |
Exclude patients who have already been treated with nivolumab and/or ipilimumab | 366 | 95.06% |
Exclude patients participating in an interventional study for advanced SCCHN | 366 | 95.06% |
Index Date | ||
Patients with Nivolumab initiation date | 351 | 91.17% |
Final | 351 | 91.17% |
#reading index table and demo table
<- index_table
index_table <- read.csv("hanna csv files/DEMO_new.csv")
DEMO_data
#joining both the table and filtering the patients with baseline visit==1
<- left_join(index_table, DEMO_data, by = "patid") %>% filter(visit==1) %>% select(patid,visit_date,brthyr,nivotrtst,sex,visit)
index_demo_joined
library(lubridate)
#extracting only the year in nivotrtst to get age
$nivotrtst <- as.Date(index_demo_joined$nivotrtst, format = "%d/%m/%Y")
index_demo_joined$index_year <- year(index_demo_joined$nivotrtst)
index_demo_joined
$brthyr <- as.numeric(index_demo_joined$brthyr)
index_demo_joined
#creating the age column
$age <- index_demo_joined$index_year - index_demo_joined$brthyr
index_demo_joined
$age_category <- ifelse(index_demo_joined$age < 70, "<70 yrs", "≥70 yrs")
index_demo_joined
#Age at nivolumab initiation
# Mean
<- round(mean(index_demo_joined$age),2)
mean_age
# Standard Deviation
<- round(sd(index_demo_joined$age),2)
sd_age
# Minimum
<- min(index_demo_joined$age)
min_age
<- sort(index_demo_joined$age)
sorted_age
# 1st Quartile
<- quantile(sorted_age, 0.25)
q1_age
# Median
<- median(sorted_age)
median_age
# 3rd Quartile
<- quantile(sorted_age, 0.75)
q3_age
# Maximum
<- max(index_demo_joined$age)
max_age
<- sum(is.na(index_demo_joined$age))
missing_count_age
<- nrow(index_demo_joined)
count_total_pat <- paste0(round(count_total_pat / count_total_pat *100,2),"%")
per_total_pat
<- index_demo_joined %>% filter(age_category == "≥70 yrs") %>% nrow()
count_age_above_70 <- index_demo_joined %>% filter(age_category == "<70 yrs") %>% nrow()
count_age_below_70 <- sum(is.na(index_demo_joined$age_category))
mis_count_age_cat
<- paste0(round(count_age_above_70 / count_total_pat * 100,2), "%")
per_age_above_70 <- paste0(round(count_age_below_70 / count_total_pat * 100,2), "%")
per_age_below_70 <- paste0(round(mis_count_age_cat / count_total_pat * 100,2), "%")
per_mis_age_cat
<- index_demo_joined %>% filter(sex == 1) %>% nrow()
count_male <- index_demo_joined %>% filter(sex ==2) %>% nrow()
count_female <- sum(is.na(index_demo_joined$gender_category))
mis_count_sex_cat
<- paste0(round(count_male / count_total_pat * 100,2), "%")
per_count_male <- paste0(round(count_female / count_total_pat * 100,2), "%")
per_count_female <- paste0(round(mis_count_sex_cat / count_total_pat * 100,2), "%")
per_mis_sex_cat
#left joining the anam table with the above table
<- read.csv("hanna csv files/ANAM_new.csv")
ANAM_data <- left_join(index_demo_joined, ANAM_data, by = "patid") %>% filter(visit.x==1)
final_anam_join
#final_anam_join[final_anam_join == "."] <- NA
<- apply(final_anam_join, 2, function(x) replace(x, x == ".", NA))
final_anam_join <- as.data.frame(final_anam_join)
final_anam_join
#count and percentage for smoking status
<- final_anam_join %>% filter(smokstat == 1) %>% nrow()
count_smok_his <- final_anam_join %>% filter(smokstat == 0) %>% nrow()
count_smok_no_his <- sum(is.na(final_anam_join$smokstat))
mis_count_smok_cat
<- paste0(round(count_smok_his / count_total_pat * 100,2), "%")
per_smok_his <- paste0(round(count_smok_no_his / count_total_pat * 100,2), "%")
per_smok_no_his <- paste0(round(mis_count_smok_cat / count_total_pat * 100,2), "%")
per_mis_smok_cat
#count and percentage for alcohol status
<- final_anam_join %>% filter(alcohol == 1) %>% nrow()
count_alco_status_yes <- final_anam_join %>% filter(alcohol == 0) %>% nrow()
count_alco_status_no <- final_anam_join %>% filter(alcohol == 2) %>% nrow()
count_alco_status_occasionally <- final_anam_join %>% filter(alcohol == 3) %>% nrow()
count_alco_status_no_longer <- sum(is.na(final_anam_join$alcohol))
mis_count_alco_status_cat
<- paste0(round(count_alco_status_yes / count_total_pat * 100,2), "%")
per_alco_status_yes <- paste0(round(count_alco_status_no / count_total_pat * 100,2), "%")
per_alco_status_no <- paste0(round(count_alco_status_occasionally / count_total_pat * 100,2), "%")
per_alco_status_occasionally <- paste0(round(count_alco_status_no_longer / count_total_pat * 100,2), "%")
per_alco_status_no_longer <- paste0(round(mis_count_alco_status_cat / count_total_pat * 100,2), "%")
per_mis_alco_status_cat
<- final_anam_join %>%
final_anam_join mutate(alc_past = ifelse(alcpast == 99, NA, alcpast))
#count and percentage for alcohol past
<- final_anam_join %>% filter(alcpast == 1) %>% nrow()
count_alco_past_yes <- final_anam_join %>% filter(alcpast == 0) %>% nrow()
count_alco_past_no <- sum(is.na(final_anam_join$alc_past))
mis_count_alco_past_cat
# Count missing values
<- sum(is.na(final_anam_join$column_name))
missing_count <- paste0(round(count_alco_past_yes / count_total_pat * 100,2), "%")
per_alco_past_yes <- paste0(round(count_alco_past_no / count_total_pat * 100,2), "%")
per_alco_past_no<- paste0(round(mis_count_alco_past_cat / count_total_pat * 100,2), "%")
per_mis_alco_past_cat
#missing count and percentage for alcohol since
<- sum(!is.na(final_anam_join$alcsincepst))
count_since_years <- paste0(round(count_since_years / count_total_pat * 100,2), "%")
per_since_year
#final_anam_join$alcsincepst[final_anam_join$alcsincepst == "0"] <- NA
#final_anam_join$alcuntil[final_anam_join$alcuntil == "0"] <- NA
$alcsincepst <- as.numeric(final_anam_join$alcsincepst)
final_anam_join<- round(mean(final_anam_join$alcsincepst, na.rm = TRUE))
mean_acl_since
# Standard Deviation
<- round(sd(final_anam_join$alcsincepst, na.rm = TRUE),2)
sd_acl_since
# Minimum
<- min(final_anam_join$alcsincepst, na.rm = TRUE)
min_acl_since
<- sort(final_anam_join$alcsincepst)
sorted_acl_since
# 1st Quartile
<- quantile(sorted_acl_since, 0.25)
q1_acl_since
# Median
<- median(sorted_acl_since)
median_acl_since
# 3rd Quartile
<- quantile(sorted_acl_since, 0.75)
q3_acl_since
# Maximum
<- max(final_anam_join$alcsincepst, na.rm = TRUE)
max_acl_since