library(tidyverse)
library(janitor)
library(lubridate)
library(readxl)
library(tidycensus)
options(scipen=999)
knitr::opts_chunk$set(warning = FALSE, message=FALSE)
#knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
start_time <- Sys.time()
#Set working directory in console to find data files to knit
setwd('/Users/austinfast/Documents/GitHub/nursing-licenses/R/')
This document details NPR’s methodology for an investigation that aired March 9, 2022, on All Things Considered. Using data from 32 states, we found almost 1 in 10 nurses waited at least six months to get licensed in 2021, and 35% waited at least three months.
Two member station reporters produced localized versions and participated in a reporter roundtable on Morning Edition on March 10, 2022:
Full GitHub repo with code, raw data and methodology is at: https://github.com/austinfast/nursing-licenses
Click links below to examine the products created through this R script.
The following summary tables include only new RN and LPN licenses, who applied by:
On Sept. 23, 2021, NPR requested records for all licensed practical nurses and registered nurses who applied for licensure from 2019 to 2021 from 54 nursing boards, including every state and the District of Columbia. (California, Louisiana and West Virginia have separate RN and LPN boards.)
We asked for each nurse’s:
California and Virginia provided anonymized records, citing the nurses’ privacy, and Connecticut and Virginia couldn’t provide details on application type.
States responded as follows:
Thirty-three boards in 32 states granted NPR’s request:
Five boards provided partial data:
Two boards did not respond to repeated requests for records:
Ten boards denied NPR’s request:
Four boards wanted fees that NPR did not pay:
Some boards use different terms for the same idea. For example, licensed practical nurses can also be called licensed vocational nurses, and states refer to licensed nurses applying in a new state as “endorsement” or “reciprocity.” NPR standardized these terms among the 32 states’ records and combined them into one dataset.
The first states to respond to NPR’s request provided records through Sept. 23, 2021, so NPR removed all records after that date from subsequent states to standardize the timeframe. This resulted in a final dataset containing over 226,000 nurses issued new, permanent licenses in 2021.
NPR subtracted the application date from the license issue date to calculate each nurse’s processing time in days. We removed 77 nurses’ records showing an issue date earlier than their application date, apparently in error. NPR then grouped by state, license type and application type to find median processing times for each of the four major types and to count how many of the nurses’ processing times stretched longer than three months, six months, etc.
Arkansas doubled names, one temp permit and one permanent license.
state <- read_excel("../state-data/AR-Application Management for FOIA 09_23_2021.xlsx", skip=1) %>%
clean_names()
#Standardize fields
ar_state <- state %>%
filter (application_type != "Renewal") %>% #removes 200 renewals erroneously included with negative process times
#Format date fields
mutate (issue_date = as.Date(original_issuance_date),
application_date = as.Date(application_submit_date),
docs_date = as.Date(application_approval_date),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = case_when(
#specify temporary vs permanent license applications
license_type=="RN" & license_permit=="Temporary" ~ "RN-Temp",
license_type=="LPN" & license_permit=="Temporary" ~ "LPN-Temp",
TRUE ~ license_type),
app_type = case_when (
application_type == "Initial - Exam" ~ "Exam",
application_type == "Retest" ~ "Exam-retest",
TRUE ~ application_type),
process_time = issue_date - application_date,
docs_time = issue_date - docs_date,
data_state = "AR",
year = as.character(year))
#Any mismatches?
ar_state %>% filter (as.Date(original_issuance_date) != issue_date)
## # A tibble: 0 × 22
## # … with 22 variables: last_name <chr>, first_name <chr>, license_type <chr>,
## # license_number <chr>, original_issuance_date <dttm>, license_permit <chr>,
## # application_type <chr>, application_submit_date <dttm>,
## # checklist_item_name <chr>, checklist_status <chr>,
## # application_approval_date <dttm>, issue_date <date>,
## # application_date <date>, docs_date <date>, gather_time <drtn>,
## # docs_time <drtn>, month <chr>, year <chr>, lic_type <chr>, …
ar_state %>% filter (as.Date(application_submit_date) != application_date)
## # A tibble: 0 × 22
## # … with 22 variables: last_name <chr>, first_name <chr>, license_type <chr>,
## # license_number <chr>, original_issuance_date <dttm>, license_permit <chr>,
## # application_type <chr>, application_submit_date <dttm>,
## # checklist_item_name <chr>, checklist_status <chr>,
## # application_approval_date <dttm>, issue_date <date>,
## # application_date <date>, docs_date <date>, gather_time <drtn>,
## # docs_time <drtn>, month <chr>, year <chr>, lic_type <chr>, …
#Remove unformatted date fields and unnecessary fields
ar_state <- ar_state %>%
select (-c(original_issuance_date, application_submit_date, application_approval_date, original_issuance_date, application_submit_date, license_type, application_type, checklist_item_name, checklist_status)) %>%
rename (duration = license_permit)
ar_state %>%
count(lic_type, app_type)
## # A tibble: 13 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 125
## 2 LPN Exam 1302
## 3 LPN Exam-retest 141
## 4 LPN-Temp Endorsement 65
## 5 LPN-Temp Exam 957
## 6 RN Endorsement 1040
## 7 RN Exam 2610
## 8 RN Exam-retest 488
## 9 RN International 2
## 10 RN-Temp Endorsement 598
## 11 RN-Temp Exam 1534
## 12 RN-Temp Exam-retest 2
## 13 RN-Temp International 1
#check for duplicate license numbers - AR uses same license number even if nurses have RN & LPN applications and temp/permanent
#For example, see # 120001
ar_state %>% filter (license_number=="120001")
## # A tibble: 3 × 15
## last_name first_name license_number duration issue_date application_date
## <chr> <chr> <chr> <chr> <date> <date>
## 1 BRIGGS MEGAN 120001 Permanent 2019-05-30 2019-05-03
## 2 BRIGGS MEGAN 120001 Temporary 2021-05-28 2021-03-30
## 3 BRIGGS MEGAN 120001 Permanent 2021-06-16 2021-03-30
## # … with 9 more variables: docs_date <date>, gather_time <drtn>,
## # docs_time <drtn>, month <chr>, year <chr>, lic_type <chr>, app_type <chr>,
## # process_time <drtn>, data_state <chr>
#These 14 people have duplicates - only difference is docs received date
dupe_numbers <- ar_state %>%
count (license_number, lic_type, duration, issue_date, application_date) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
ar_state %>%
filter (license_number %in% dupe_numbers) %>%
View
#Remove these duplicates (22 records)
ar_state <- ar_state %>%
distinct (license_number, lic_type, duration, issue_date, .keep_all=T)
California provided anonymized records with an entity_id to identify each individual. Each nurse has multiple rows, one for each stage of their application process. This code creates one row for each person, so we can calculate total processing time from the start of stage one until the license was issued.
VNs by exam progress through three stages called Exam Request, Exam Results and Initial. See MILLER, SYDNEY NICOLE for proof at https://search.dca.ca.gov/details/4301/VN/704727/97585d1c86321b5b232e49d5c7a65d38 She is the only VN license from AGUANGA, CA issued from 2019 through 2021. The Initial date approved matches her issuance date (May 23, 2019).
VNs by endorsement progress through two stages called Endorsement and Initial. See CERDA, CHRYSTAL E of Yermo, CA (entity number: 4016139) Initial date matches (May 21, 2019).
RNs by exam progress through two stages: Request for “RN Exam” and “Apprvd Exm App - Pendng Exm Pass Rslts.” See STEELE, ALEXIS ANNE LOUISE for proof at https://search.dca.ca.gov/details/4001/RN/95252157/31b623dd5e52fbc5ee0778f0edaa5cac Only RN from ALTAVILLE, CA issued 2019-2021. Her Apprvd Exm App - Pendng Exm Pass Rslts date approved matches her issuance date (July 8, 2021).
state_vn <- read_excel("../state-data/CA-BVNPT PRA Report 2022-01-04.xlsx", sheet=1) %>%
clean_names()
vns <- state_vn %>%
filter (license_type_long_name=="Vocational Nurse") %>%
mutate (approved_date = as.Date(approved_date),
received_date = as.Date(received_date)) %>%
select (-c(license_type_long_name, board_long_name, transaction_class, application_id)) %>%
group_by (entity_number) %>%
mutate (count = n())
#Most people have 3 records, 2 records, or 4 records
vns %>% ungroup() %>% count (count)
## # A tibble: 15 × 2
## count n
## * <int> <int>
## 1 1 8562
## 2 2 14638
## 3 3 45024
## 4 4 12552
## 5 5 4650
## 6 6 1680
## 7 7 637
## 8 8 288
## 9 9 162
## 10 10 40
## 11 11 66
## 12 14 28
## 13 15 15
## 14 17 17
## 15 19 19
#8524 unique test retakers of 88,000 records
retakes <- vns %>%
filter (transaction_description=="Exam Re-take") %>%
distinct (entity_number) %>%
pull (entity_number)
#565 interim permits
interim <- vns %>%
filter (transaction_description == "Issue Interim Permit" & application_status!="Withdrawn") %>%
pull (entity_number)
#Identify applicants with deficiencies
deficiencies <- vns %>%
filter (deficiency_y_n=="Y") %>%
distinct (entity_number) %>%
pull()
#Find only those that have been licensed = 20505 people
issued <- vns %>%
filter (transaction_description=="Initial" & application_status=="Approved") %>%
pull (entity_number)
#Add flag for re-testers, deficient applications, and those who applied/received interim permits
vns2 <- vns %>%
mutate (retest = if_else (entity_number %in% retakes, "retest", NA_character_),
interim = if_else (entity_number %in% interim, "interim", NA_character_),
deficient = if_else (entity_number %in% deficiencies, "deficient", "complete"),
licensed = if_else (entity_number %in% issued, "licensed", NA_character_))
#Remove exam re-takes as well as abandoned applications (expired); duplicate entries (withdrawn); expired applications (cancelled); and denied
vns3 <- vns2 %>%
filter (transaction_description != "Exam Re-take") %>% #remove 5000 rows of exam-retakes
filter (!(application_status %in% c("Expired", "Withdrawn", "Denied", "Canceled"))) %>% #remove 2000 rows of abandoned applications (expired); duplicate entries (withdrawn); expired applications (cancelled); and denied
arrange (entity_number, received_date, transaction_description) %>%
group_by (entity_number, transaction_description) %>%
slice_tail() %>% #Keeps only more recent instance if more than one Exam Results or Initial (removes 23 rows that had multiple "Exam Results" or "Exam Request")
group_by (entity_number) %>%
mutate (count_noretakes = n()) %>%
arrange (entity_number, received_date)
#What types of transactions are here? - 5 types
vns3 %>%
ungroup() %>%
count (transaction_description)
## # A tibble: 5 × 2
## transaction_description n
## * <chr> <int>
## 1 Endorsement 3010
## 2 Exam Request 24051
## 3 Exam Results 22262
## 4 Initial 20685
## 5 Issue Interim Permit 454
#Keep only first step of process (Exam Request/Endorsement) and final step (Initial)
vns4 <- vns3 %>%
filter (transaction_description %in% c("Exam Request", "Initial", "Endorsement"))
#How many applications remain open? -- 4521
vns4 %>% ungroup() %>% count (application_status, transaction_description)
## # A tibble: 6 × 3
## application_status transaction_description n
## <chr> <chr> <int>
## 1 Approved Endorsement 1811
## 2 Approved Exam Request 20911
## 3 Approved Initial 20503
## 4 Open Endorsement 1199
## 5 Open Exam Request 3140
## 6 Open Initial 182
#vns5 <- vns4 %>%
# pivot_wider (id_cols=c(entity_number, city, state, count_noretakes, retest, interim, deficient, licensed),
# names_from = c("transaction_description", "application_status"),
# values_from = c("received_date", "approved_date"))
#Pivot so each row represents an individual nurse
vns5 <- vns4 %>%
pivot_wider (id_cols=c(entity_number, city, state, count_noretakes, retest, interim, deficient, licensed),
names_from = c("transaction_description"),
values_from = c("received_date", "approved_date"))
#Each row typically is either Exam OR endorsement, so combine the received date columns into one and calculate a processing time column
vns6 <- vns5 %>%
mutate (received = coalesce (received_date_Endorsement, `received_date_Exam Request`),
process_time = if_else (is.na(licensed), as.Date("2022-01-04") - received, approved_date_Initial - received),
year = year(approved_date_Initial))
#3928 cannot process times, either pending or issued; 2898 of which only had Initial stage and most of which were issued 2019, so their Exam Results stage was likely before Jan. 1, 2019 and not sent from the California board. 149 issued in 2021 fit this category and couldn't calculate
no_process <- vns6 %>%
ungroup() %>%
filter (is.na(process_time) & count_noretakes>1) %>%
count (year)
#3264 endorsement individuals
endorse <- vns %>%
filter (transaction_description=="Endorsement") %>%
distinct (entity_number)
#27103 exam individuals
exam <- vns %>%
filter (transaction_description %in% c("Exam Request", "Exam Results"))%>%
distinct (entity_number)
#Only 21 people have both exam & endorsement
both_types <- inner_join (endorse, exam, by = "entity_number") %>%
pull()
#Create dataframe to assign the primary application type
app_types <- vns %>%
filter (entity_number %in% both_types) %>%
filter (application_status %in% c("Approved", "Open")) %>%
filter (transaction_description!="Initial") %>%
group_by (entity_number) %>%
mutate (count = n()) %>%
arrange (received_date) %>%
slice_tail () %>%
mutate (app_type = case_when (
str_detect (transaction_description, "Endorsement") ~ "Endorsement",
str_detect (transaction_description, "Exam") ~ "Exam")) %>%
select (entity_number, app_type)
exam_type <- exam %>%
filter (!entity_number %in% both_types) %>%
mutate (app_type = "Exam")
endorse_type <- endorse %>%
filter (!entity_number %in% both_types) %>%
mutate (app_type = "Endorsement")
app_types <- rbind (app_types, exam_type, endorse_type)
#Join application type with main dataframe
vns7 <- vns6 %>%
left_join (app_types, by = "entity_number")
#Standarize application type column
vns8 <- vns7 %>%
mutate (app_type = case_when (
retest == "retest" ~ "Exam-retest",
app_type=="Exam" & is.na(retest) ~ "Exam",
is.na(app_type) ~ "Unknown",
app_type=="Endorsement" ~ "Endorsement",
TRUE ~ "PROBLEM"))
#All the unknowns are from 2019, because they applied before Jan 1, 2019, and CA didn't provide their records
vns8 %>% ungroup() %>% count (year, app_type)
## # A tibble: 20 × 3
## year app_type n
## <dbl> <chr> <int>
## 1 2018 Unknown 1
## 2 2019 Endorsement 349
## 3 2019 Exam 3949
## 4 2019 Exam-retest 856
## 5 2019 Unknown 1729
## 6 2020 Endorsement 432
## 7 2020 Exam 4915
## 8 2020 Exam-retest 1177
## 9 2020 Unknown 35
## 10 2021 Endorsement 744
## 11 2021 Exam 4960
## 12 2021 Exam-retest 1325
## 13 2021 Unknown 1
## 14 2022 Endorsement 2
## 15 2022 Exam 21
## 16 2022 Exam-retest 7
## 17 NA Endorsement 1482
## 18 NA Exam 6604
## 19 NA Exam-retest 2396
## 20 NA Unknown 2
#Remove NA times and adjust year to pending for those still open
vns9 <- vns8 %>%
filter (process_time > -1) %>% #remove 3928 NA times
mutate (year = if_else (is.na(year), "Pending as of 01/04/22", as.character(year))) %>%
#Remove pending older than July 2020
filter (!(str_detect(year, "^Pending") & (received < as.Date("2020-07-01"))))
#How many from each year?
vns9 %>%
ungroup() %>%
count (year, app_type)
## # A tibble: 15 × 3
## year app_type n
## <chr> <chr> <int>
## 1 2019 Endorsement 348
## 2 2019 Exam 3427
## 3 2019 Exam-retest 215
## 4 2020 Endorsement 432
## 5 2020 Exam 4765
## 6 2020 Exam-retest 674
## 7 2021 Endorsement 744
## 8 2021 Exam 4913
## 9 2021 Exam-retest 1036
## 10 2022 Endorsement 2
## 11 2022 Exam 21
## 12 2022 Exam-retest 5
## 13 Pending as of 01/04/22 Endorsement 1313
## 14 Pending as of 01/04/22 Exam 5295
## 15 Pending as of 01/04/22 Exam-retest 1302
#Any unknowns left? They're all pulled out as NAs
unknowns <- vns9 %>%
filter (app_type == "Unknown") %>%
pull (entity_number)
unknowns_df <- vns %>%
filter (entity_number %in% unknowns)
#Clean up and standardize columns to add into full dataset
vns10 <- vns9 %>%
mutate (lic_type = "LPN") %>%
rename (issue_date = approved_date_Initial,
application_date = received,
first_name = entity_number) %>%
select (lic_type, app_type, first_name, city, state, application_date, issue_date, process_time, year, deficient)
#California vocational nurses' processing times
vns10 %>%
#group_by (year, app_type, deficient) %>%
group_by (app_type, year) %>%
summarize (mean = round(mean(process_time, na.rm=T)),
median = median (process_time),
count = n(),
over30days = sum(process_time > 30),
pct_over30days = over30days/count,
over60days = sum(process_time > 60),
pct_over60days = over60days/count,
over90days = sum(process_time > 90),
pct_over90days = over90days/count,
over120days = sum(process_time > 120),
pct_over120days = over120days/count,
over180days = sum(process_time > 180),
pct_over180days = over180days/count,
over1year = sum(process_time > 365),
pct_over1year = over1year/count ) %>%
filter (year %in% c("2021", "Pending"))
## # A tibble: 3 × 17
## # Groups: app_type [3]
## app_type year mean median count over30days pct_over30days over60days
## <chr> <chr> <drtn> <drtn> <int> <int> <dbl> <int>
## 1 Endorsement 2021 118 days 76.0 d… 744 598 0.804 456
## 2 Exam 2021 111 days 77.0 d… 4913 4630 0.942 3230
## 3 Exam-retest 2021 337 days 283.5 d… 1036 1034 0.998 1018
## # … with 9 more variables: pct_over60days <dbl>, over90days <int>,
## # pct_over90days <dbl>, over120days <int>, pct_over120days <dbl>,
## # over180days <int>, pct_over180days <dbl>, over1year <int>,
## # pct_over1year <dbl>
state_rn <- read_excel("../state-data/CA-BRN PRA Report 2022-01-04.xlsx", sheet=1) %>%
clean_names()
#Pull out only RNs
rns <- state_rn %>%
filter (license_type_long_name %in% c("Registered Nurse", "Temporary Registered Nurse License")) %>%
mutate (approved_date = as.Date(approved_date),
received_date = as.Date(received_date)) %>%
#filter (license_type_long_name %in% c("Registered Nurse")) %>% #Remove Temps for now
select (-c(license_type_long_name, board_long_name, transaction_class, application_id)) %>%
group_by (entity_number) %>%
mutate (count = n())
rns %>%
ungroup() %>%
count (transaction_description)
## # A tibble: 10 × 2
## transaction_description n
## * <chr> <int>
## 1 8 Year RN License Exam 113
## 2 8 Year RN License Exam Retake 24
## 3 Apprvd Exm App - Pendng Exm Pass Rslts 49650
## 4 Emergency RN Temp License 1144
## 5 Initial RN License Via Endorsement 67463
## 6 IP Request RETIRED 10
## 7 Reapply for RN Exam 19024
## 8 Request for RN Exam 52802
## 9 RN Request for Temporary License 20423
## 10 TL Request RETIRED 8
#Identify nurses who opted for temporary licenses
temp <- rns %>%
filter (transaction_description %in% c("RN Request for Temporary License", "Emergency RN Temp License")) %>%
pull (entity_number)
temp_df <- rns %>%
filter (entity_number %in% temp)
temp_df2 <- temp_df %>%
filter (!(transaction_description %in% c("RN Request for Temporary License", "Emergency RN Temp License")))
#Almost ALL temp permits go to endorsement applicants (only 116 of 22000+ are request for RN Exam)
temp_df2 %>%
ungroup() %>%
count (transaction_description)
## # A tibble: 6 × 2
## transaction_description n
## * <chr> <int>
## 1 Apprvd Exm App - Pendng Exm Pass Rslts 45
## 2 Initial RN License Via Endorsement 22022
## 3 IP Request RETIRED 1
## 4 Reapply for RN Exam 11
## 5 Request for RN Exam 116
## 6 TL Request RETIRED 3
#Identify licensees whose applications had deficiencies
deficiencies <- rns %>%
filter (deficiency_y_n=="Y") %>%
distinct (entity_number) %>%
pull()
#11955 unique test retakers of 210,000 records
retakes <- rns %>%
filter (transaction_description=="Reapply for RN Exam") %>%
distinct (entity_number) %>%
pull ()
#Reapply for RN Exam, comes after Pending Pass Results, but they update the approve date for pending pass results with issuance date
retakes_df <- rns %>%
filter (entity_number %in% retakes)
#Find only those that have been issued = 80688 people
issued <- rns %>%
filter (transaction_description %in% c("Apprvd Exm App - Pendng Exm Pass Rslts", "Initial RN License Via Endorsement") & application_status=="Approved")%>%
pull (entity_number)
#Add flag for re-testers, deficient applications, those who applied/received temporary permits, and those successfully licensed
rns2 <- rns %>%
mutate (retest = if_else (entity_number %in% retakes, "retest", NA_character_),
temp = if_else (entity_number %in% temp, "temp", NA_character_),
deficient = if_else (entity_number %in% deficiencies, "deficient", "complete"),
licensed = if_else (entity_number %in% issued, "licensed", NA_character_))
rns2 %>% ungroup() %>% count (transaction_description)
## # A tibble: 10 × 2
## transaction_description n
## * <chr> <int>
## 1 8 Year RN License Exam 113
## 2 8 Year RN License Exam Retake 24
## 3 Apprvd Exm App - Pendng Exm Pass Rslts 49650
## 4 Emergency RN Temp License 1144
## 5 Initial RN License Via Endorsement 67463
## 6 IP Request RETIRED 10
## 7 Reapply for RN Exam 19024
## 8 Request for RN Exam 52802
## 9 RN Request for Temporary License 20423
## 10 TL Request RETIRED 8
rns3 <- rns2 %>%
# filter (transaction_description != "Reapply for RN Exam") %>% #remove 20000 rows of exam-retakes
filter (is.na(retest)) %>%
filter (!(application_status %in% c("Expired", "Withdrawn", "Denied", "Canceled"))) %>% #remove 17000 rows of abandoned applications (expired); duplicate entries (withdrawn); expired applications (cancelled); and denied
mutate (count_noretakes = n())
#137 people have more than 2 records, including people who applied by exam, then succeeded by endorsement typically
rns3 %>% ungroup() %>% distinct (entity_number, .keep_all=T) %>% count (count_noretakes)
## # A tibble: 5 × 2
## count_noretakes n
## * <int> <int>
## 1 1 51352
## 2 2 54854
## 3 3 117
## 4 4 10
## 5 5 1
This code pulls out endorsement applicants, which only have one stage in CA’s records. It totals 65662 endorsement individuals (including expired, withdrawn, denied, canceled).
rn_endorse <- rns3 %>%
filter (transaction_description=="Initial RN License Via Endorsement") %>%
group_by (entity_number) %>%
mutate (count_noretakes = n())
rn_endorse %>% ungroup() %>% count (application_status)
## # A tibble: 2 × 2
## application_status n
## * <chr> <int>
## 1 Approved 39660
## 2 Open 18807
rn_endorse2 <- rn_endorse %>%
mutate (app_type = "Endorsement",
lic_type = "RN",
process_time = if_else (is.na(licensed), as.Date("2022-01-04") - received_date, approved_date - received_date),
year = year(approved_date)) %>%
#Standardize column names to join with endorsement records
rename (issue_date = approved_date,
application_date = received_date,
first_name = entity_number) %>%
select (lic_type, app_type, first_name, city, state, application_date, issue_date, process_time, year, deficient)
#5 people couldn't calculate, these are people licensed with duplicate entries (7522244, for example)
rn_endorse2 %>%
ungroup() %>%
filter (is.na(process_time))
## # A tibble: 4 × 10
## lic_type app_type first_name city state application_date issue_date
## <chr> <chr> <dbl> <chr> <chr> <date> <date>
## 1 RN Endorsement 7522244 CASA GRANDE AZ 2021-11-03 NA
## 2 RN Endorsement 7453194 MOORE SC 2021-05-18 NA
## 3 RN Endorsement 7326982 STEWART MS 2021-11-15 NA
## 4 RN Endorsement 7323631 MCDONOUGH GA 2021-03-03 NA
## # … with 3 more variables: process_time <drtn>, year <dbl>, deficient <chr>
This code pulls out endorsement applicants, which only have one stage in CA’s records. It totals 65662 endorsement individuals (including expired, withdrawn, denied, canceled).
rn_exam <- rns3 %>%
filter (transaction_description %in% c("Apprvd Exm App - Pendng Exm Pass Rslts", "Request for RN Exam")) %>%
mutate (count_noretakes = n()) %>%
arrange (entity_number, received_date, desc(transaction_description))
#45 have 3 entries
rn_exam %>% ungroup() %>% count (count_noretakes)
## # A tibble: 3 × 2
## count_noretakes n
## * <int> <int>
## 1 1 9605
## 2 2 75268
## 3 3 39
rn_exam2 <- rn_exam %>%
group_by (entity_number, transaction_description) %>%
slice_tail() %>% #Keeps only more recent instance if more than one Request for RN Exam or Pendng Exm Pass Rslts (removes 19 rows that had multiple "Exam Results" or "Exam Request")
group_by (entity_number) %>%
mutate (count_noretakes = n()) %>%
arrange (entity_number, received_date)
#How many remain open?
rn_exam2 %>% ungroup() %>% count (application_status, transaction_description)
## # A tibble: 4 × 3
## application_status transaction_description n
## <chr> <chr> <int>
## 1 Approved Apprvd Exm App - Pendng Exm Pass Rslts 37393
## 2 Approved Request for RN Exam 37802
## 3 Open Apprvd Exm App - Pendng Exm Pass Rslts 4223
## 4 Open Request for RN Exam 5478
rn_exam3 <- rn_exam2 %>%
pivot_wider (id_cols=c(entity_number, city, state, count_noretakes, retest, temp, deficient, licensed),
names_from = c("transaction_description"),
values_from = c("received_date", "approved_date"))
rn_exam4 <- rn_exam3 %>%
mutate (process_time = if_else (is.na(licensed), as.Date("2022-01-04") - `received_date_Request for RN Exam`, `approved_date_Apprvd Exm App - Pendng Exm Pass Rslts` - `received_date_Request for RN Exam`),
year = year(`approved_date_Apprvd Exm App - Pendng Exm Pass Rslts`))
#6061 cannot calculate because they only have 1 entry (missing Request for RN Exam), or they originally applied by exam and were approved by endorsement
nas <- rn_exam4 %>%
ungroup() %>%
filter (is.na(process_time))
nas %>%
count (count_noretakes)
## # A tibble: 2 × 2
## count_noretakes n
## * <int> <int>
## 1 1 3999
## 2 2 37
#Flag retests versus first-time exams
rn_exam5 <- rn_exam4 %>%
mutate (app_type = case_when (
retest == "retest" ~ "Exam-retest",
TRUE ~ "Exam"),
lic_type = "RN") %>%
#Standardize column names to join with endorsement records
rename (issue_date = `approved_date_Apprvd Exm App - Pendng Exm Pass Rslts`,
application_date = `received_date_Request for RN Exam`,
first_name = entity_number) %>%
select (lic_type, app_type, first_name, city, state, application_date, issue_date, process_time, year, deficient)
This code pulls out “Emergency RN Temp Licenses” issued since Jan. 29 to Nov. 3, 2021, and “RN Request for Temporary License” issued throughout whole period of 2019-2021.
rns_temp <- state_rn %>%
filter (license_type_long_name=="Temporary Registered Nurse License") %>%
mutate (approved_date = as.Date(approved_date),
received_date = as.Date(received_date)) %>%
select (-c(license_type_long_name, board_long_name, transaction_class, application_id)) %>%
group_by (entity_number) %>%
mutate (count = n())
rns_temp %>%
group_by (transaction_description) %>%
summarize (range = range (approved_date, na.rm=T))
## # A tibble: 4 × 2
## # Groups: transaction_description [2]
## transaction_description range
## <chr> <date>
## 1 Emergency RN Temp License 2021-01-29
## 2 Emergency RN Temp License 2021-11-03
## 3 RN Request for Temporary License 2019-01-04
## 4 RN Request for Temporary License 2022-01-04
rns_temp %>%
ungroup() %>%
count (application_status)
## # A tibble: 5 × 2
## application_status n
## * <chr> <int>
## 1 Approved 11430
## 2 Canceled 33
## 3 Expired 3477
## 4 Open 6557
## 5 Withdrawn 70
rns_temp2 <- rns_temp %>%
filter (!(application_status %in% c("Expired", "Withdrawn", "Denied", "Canceled"))) %>% #remove 2000 rows of abandoned applications (expired); duplicate entries (withdrawn); expired applications (cancelled); and denied
mutate (count2 = n(),
process_time = if_else (application_status=="Approved", approved_date - received_date, as.Date("2022-01-04") - received_date),
year = year (approved_date))
#No rows unable to calculate
rns_temp2 %>%
filter (is.na(process_time))
## # A tibble: 0 × 12
## # Groups: entity_number [0]
## # … with 12 variables: transaction_description <chr>, entity_number <dbl>,
## # application_status <chr>, received_date <date>, approved_date <date>,
## # deficiency_y_n <chr>, city <chr>, state <chr>, count <int>, count2 <int>,
## # process_time <drtn>, year <dbl>
rns_temp3 <- rns_temp2 %>%
mutate (lic_type = "RN-Temp",
app_type = case_when (
transaction_description=="Emergency RN Temp License" ~ "Emergency",
transaction_description=="RN Request for Temporary License" ~ "Application"),
deficient = if_else (deficiency_y_n=="Y", "deficient", "complete")) %>%
#Standardize column names to join with endorsement records
rename (issue_date = approved_date,
application_date = received_date,
first_name = entity_number) %>%
select (lic_type, app_type, first_name, city, state, application_date, issue_date, process_time, year, deficient)
rns_all <- rbind (rn_exam5, rn_endorse2, rns_temp3)
rns_all2 <- rns_all %>%
#Remove NA times and adjust year to pending for those still open
filter (process_time > -1) %>% #remove 2 negative times and 6108 NA times, mostly from 2019
mutate (year = if_else (is.na(year), "Pending as of 01/04/22", as.character(year))) %>%
#Remove 1767 pending apps older than July 2020
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01"))))
ca_state <- rbind (rns_all2, vns10) %>%
mutate (data_state = "CA",
first_name = as.character(first_name)) %>%
rename (process_time2 = process_time) #to match other states
#Quick look at processing times by license/application type. Can also separate by deficiency status
ca_state %>%
group_by (lic_type, app_type, year) %>% #, deficient
filter (app_type != "Exam-retest" & lic_type %in% c("RN", "LPN", "RN-Temp", "LPN-Temp")) %>%
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-12-31")))) %>%
#group_by (year, lic_type, app_type) %>%
summarize (mean = round(mean(process_time2, na.rm=T)),
median = median (process_time2),
count = n(),
over30days = sum(process_time2 > 30),
pct_over30days = over30days/count,
over60days = sum(process_time2 > 60),
pct_over60days = over60days/count,
over90days = sum(process_time2 > 90),
pct_over90days = over90days/count,
over120days = sum(process_time2 > 120),
pct_over120days = over120days/count,
over180days = sum(process_time2 > 180),
pct_over180days = over180days/count,
over1year = sum(process_time2 > 365),
pct_over1year = over1year/count ) %>%
filter (year %in% c("2021", "Pending"))
## # A tibble: 6 × 18
## # Groups: lic_type, app_type [6]
## lic_type app_type year mean median count over30days pct_over30days
## <chr> <chr> <chr> <drtn> <drtn> <int> <int> <dbl>
## 1 LPN Endorsement 2021 118 days 76 days 744 598 0.804
## 2 LPN Exam 2021 111 days 77 days 4913 4630 0.942
## 3 RN Endorsement 2021 131 days 102 days 16181 15220 0.941
## 4 RN Exam 2021 116 days 95 days 12885 12736 0.988
## 5 RN-Temp Application 2021 39 days 30 days 4466 2223 0.498
## 6 RN-Temp Emergency 2021 0 days 0 days 1127 3 0.00266
## # … with 10 more variables: over60days <int>, pct_over60days <dbl>,
## # over90days <int>, pct_over90days <dbl>, over120days <int>,
## # pct_over120days <dbl>, over180days <int>, pct_over180days <dbl>,
## # over1year <int>, pct_over1year <dbl>
ca_state %>%
filter (lic_type=="LPN" & app_type=="Endorsement" & year=="2021") %>%
filter (process_time2 > 60) %>%
nrow()
## [1] 456
#How large are the pending nurse backlogs?
ca_state %>%
filter (app_type != "Exam-retest" & lic_type %in% c("RN", "RN-Temp")) %>%
filter (year == "Pending") %>%
filter (between(application_date, as.Date("2021-01-01"), as.Date("2021-10-06"))) %>%
ungroup () %>%
#mutate (month = month(application_date)) %>%
count (lic_type, app_type, deficient)
## # A tibble: 0 × 4
## # … with 4 variables: lic_type <chr>, app_type <chr>, deficient <chr>, n <int>
#REMOVE INTERMEDIARY FILES, leaving ar_state & ca_state
rm(list=setdiff(ls(), c("start_time", "ar_state", "ca_state")))
#How many total applications by license/app
ca_state %>%
filter (app_type != "Exam-retest" & lic_type %in% c("RN", "LPN", "RN-Temp", "LPN-Temp")) %>%
#Pending vs. actually licensed
filter (year %in% c("2021", "Pending")) %>%
#remove pending before Jan. 1, 2021
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-12-31")))) %>%
#remove pending from 2022
filter (!(str_detect(year, "^Pending") & (application_date > as.Date("2021-12-31")))) %>%
#filter ((str_detect(year, "^Pending") & between(application_date, as.Date("2021-01-01"), as.Date("2021-12-31")))) %>%
#filter (year=="2021" | (str_detect(year, "^Pending") & between(application_date, as.Date("2021-01-01"), as.Date("2021-12-31")))) %>%
ungroup() %>%
count (lic_type, app_type, year)
## # A tibble: 6 × 4
## lic_type app_type year n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement 2021 744
## 2 LPN Exam 2021 4913
## 3 RN Endorsement 2021 16181
## 4 RN Exam 2021 12885
## 5 RN-Temp Application 2021 4466
## 6 RN-Temp Emergency 2021 1127
State board combined renewal/new dates.
state <- read_excel("../state-data/CO-250-2021_RN_TRN_PN_TPN_Applicants.xlsx") %>% clean_names()
#Calculate processing time, taking into account renewals
co_state <- state %>%
filter (!is.na(last_name)) %>% #removes one blank row
filter (last_name != "22739 Items Found") %>% #removes summary row
mutate (issue_date = as.Date(effective_date),
application_date = as.Date(application_date),
initial_license_date = as.Date(initial_license_date),
effective_renewal_date = as.Date(effective_date),
expiration_date = as.Date(expiration_date),
month = substr(initial_license_date, 1,7),
year = year(initial_license_date),
app_year = year(application_date),
lic_type = case_when(
license_type=="Temporary - Registered Nurse" ~ "RN-Temp",
license_type=="Temporary - Licensed Practical Nurse" ~ "LPN-Temp",
license_type=="Registered Nurse" ~ "RN",
license_type=="Licensed Practical Nurse" ~ "LPN"),
app_type = case_when (
license_method == "Examination" ~ "Exam",
license_method == "Initial Temporary" ~ "Application",
is.na(license_method) ~ "Unknown",
TRUE ~ license_method)) %>%
mutate (license_number = as.character(license_number),
process_time = if_else (initial_license_date == issue_date,
issue_date - application_date,
initial_license_date - application_date)) %>%
#For those that have been renewed at some point, renewal date is in in issue_date column. Swap those out here and double check they calculate the same.
mutate (issue_date2 = if_else (initial_license_date == issue_date,
issue_date, initial_license_date),
process_time_test = issue_date2 - application_date,
#Calculate how much time those still pending have been waiting
diff = process_time_test - process_time)
#None are different
co_state %>% filter (diff != 0)
## # A tibble: 0 × 26
## # … with 26 variables: last_name <chr>, first_name <chr>, middle_name <chr>,
## # suffix <chr>, city <chr>, state <chr>, license_type <chr>,
## # license_type_prefix <chr>, license_number <chr>, application_date <date>,
## # initial_license_date <date>, effective_date <dttm>, expiration_date <date>,
## # status <chr>, license_method <chr>, issue_date <date>,
## # effective_renewal_date <date>, month <chr>, year <dbl>, app_year <dbl>,
## # lic_type <chr>, app_type <chr>, process_time <drtn>, issue_date2 <date>, …
#Check statuses for those that couldn't calculate process_time, so we can remove clearly inactive applications and calculate pending time for all others as process_time2 in next step
co_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 13 × 2
## status n
## <chr> <int>
## 1 Application Incomplete 1084
## 2 Pending Examination 796
## 3 Application Withdrawn 303
## 4 Application Expired 253
## 5 Online Application Received 196
## 6 Program Area Review 32
## 7 SPECIALIST INITIAL REVIEW 24
## 8 Application Denied 18
## 9 Pending - Exam Eligible 18
## 10 Application Incomplete - Background Check Needed 13
## 11 Denied Licensure 3
## 12 Expired 2
## 13 <NA> 1
#Calculates how long those left pending have been pending as process_time2
co_state <- co_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("Application Incomplete", "Application Incomplete - Background Check Needed", "Online Application Received", "Pending - Exam Eligible", "Pending Examination", "Program Area Review", "SPECIALIST INITIAL REVIEW"),
as.Date("2021-09-27") - application_date,
process_time),
data_state = "CO",
#Mark those still pending
year = if_else (is.na(year), "Pending as of 09/27/21", as.character(year)))
#Any mismatches?
co_state %>% filter (as.Date(effective_renewal_date) != issue_date2)
## # A tibble: 9,384 × 28
## last_name first_name middle_name suffix city state license_type
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Cady Rachel <NA> <NA> Grand J… CO Registered Nu…
## 2 Avalos Marissa Gomez <NA> Fort Mo… CO Registered Nu…
## 3 Quigley Molly Rita <NA> Decatur IL Registered Nu…
## 4 Strnad Elvia Lorena <NA> Thornton CO Licensed Prac…
## 5 Hollingsworth Taylor Jean <NA> Denver CO Registered Nu…
## 6 Smith Karissa <NA> <NA> Parma OH Registered Nu…
## 7 Anderson Christi Renee <NA> Hotchki… CO Registered Nu…
## 8 Godard Shepherd Trisha <NA> <NA> Lakewood CO Registered Nu…
## 9 Pace Voltaire V <NA> Romulus MI Licensed Prac…
## 10 Barnes Jennifer Dawn Anderson <NA> Parker CO Registered Nu…
## # … with 9,374 more rows, and 21 more variables: license_type_prefix <chr>,
## # license_number <chr>, application_date <date>, initial_license_date <date>,
## # effective_date <dttm>, expiration_date <date>, status <chr>,
## # license_method <chr>, issue_date <date>, effective_renewal_date <date>,
## # month <chr>, year <chr>, app_year <dbl>, lic_type <chr>, app_type <chr>,
## # process_time <drtn>, issue_date2 <date>, process_time_test <drtn>,
## # diff <drtn>, process_time2 <drtn>, data_state <chr>
#9384 have different issue/initial license dates, meaning there's been renewals, but we don't know when those renewal applications were submitted.
co_state %>% filter (as.Date(application_date) != application_date)
## # A tibble: 0 × 28
## # … with 28 variables: last_name <chr>, first_name <chr>, middle_name <chr>,
## # suffix <chr>, city <chr>, state <chr>, license_type <chr>,
## # license_type_prefix <chr>, license_number <chr>, application_date <date>,
## # initial_license_date <date>, effective_date <dttm>, expiration_date <date>,
## # status <chr>, license_method <chr>, issue_date <date>,
## # effective_renewal_date <date>, month <chr>, year <chr>, app_year <dbl>,
## # lic_type <chr>, app_type <chr>, process_time <drtn>, issue_date2 <date>, …
#Remove unformatted date fields and unnecessary fields
co_state <- co_state %>%
select (-c(license_type, license_type_prefix, license_method, effective_date, issue_date, process_time_test, diff, initial_license_date)) %>%
rename (app_status = status,
issue_date = issue_date2)
co_state %>%
count(lic_type, app_type)
## # A tibble: 8 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 689
## 2 LPN Exam 1199
## 3 LPN Unknown 3
## 4 LPN-Temp Application 259
## 5 RN Endorsement 11469
## 6 RN Exam 8176
## 7 RN Unknown 12
## 8 RN-Temp Application 932
str(co_state)
## tibble [22,739 × 20] (S3: tbl_df/tbl/data.frame)
## $ last_name : chr [1:22739] "Cady" "Avalos" "Quigley" "Strnad" ...
## $ first_name : chr [1:22739] "Rachel" "Marissa" "Molly" "Elvia" ...
## $ middle_name : chr [1:22739] NA "Gomez" "Rita" "Lorena" ...
## $ suffix : chr [1:22739] NA NA NA NA ...
## $ city : chr [1:22739] "Grand Junction" "Fort Morgan" "Decatur" "Thornton" ...
## $ state : chr [1:22739] "CO" "CO" "IL" "CO" ...
## $ license_number : chr [1:22739] "1660223" "1660533" "1664398" "335854" ...
## $ application_date : Date[1:22739], format: "2019-01-09" "2019-02-21" ...
## $ expiration_date : Date[1:22739], format: "2023-09-30" "2023-09-30" ...
## $ app_status : chr [1:22739] "Active" "Active" "Active" "Active" ...
## $ effective_renewal_date: Date[1:22739], format: "2021-10-01" "2021-10-01" ...
## $ month : chr [1:22739] "2019-02" "2019-02" "2019-09" "2019-11" ...
## $ year : chr [1:22739] "2019" "2019" "2019" "2019" ...
## $ app_year : num [1:22739] 2019 2019 2019 2019 2019 ...
## $ lic_type : chr [1:22739] "RN" "RN" "RN" "LPN" ...
## $ app_type : chr [1:22739] "Exam" "Exam" "Exam" "Exam" ...
## $ process_time : 'difftime' num [1:22739] 35 7 16 31 ...
## ..- attr(*, "units")= chr "days"
## $ issue_date : Date[1:22739], format: "2019-02-13" "2019-02-28" ...
## $ process_time2 : 'difftime' num [1:22739] 35 7 16 31 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:22739] "CO" "CO" "CO" "CO" ...
#co_state %>%
# filter (issue_date != initial_license_date)
#co_state2 <- co_state %>%
# mutate (issue_date2 = if_else (initial_license_date == issue_date,
# issue_date, initial_license_date))
#Check for license number duplicates
str(state)
## tibble [22,741 × 15] (S3: tbl_df/tbl/data.frame)
## $ last_name : chr [1:22741] "Cady" "Avalos" "Quigley" "Strnad" ...
## $ first_name : chr [1:22741] "Rachel" "Marissa" "Molly" "Elvia" ...
## $ middle_name : chr [1:22741] NA "Gomez" "Rita" "Lorena" ...
## $ suffix : chr [1:22741] NA NA NA NA ...
## $ city : chr [1:22741] "Grand Junction" "Fort Morgan" "Decatur" "Thornton" ...
## $ state : chr [1:22741] "CO" "CO" "IL" "CO" ...
## $ license_type : chr [1:22741] "Registered Nurse" "Registered Nurse" "Registered Nurse" "Licensed Practical Nurse" ...
## $ license_type_prefix : chr [1:22741] "RN" "RN" "RN" "PN" ...
## $ license_number : num [1:22741] 1660223 1660533 1664398 335854 1662595 ...
## $ application_date : POSIXct[1:22741], format: "2019-01-09" "2019-02-21" ...
## $ initial_license_date: POSIXct[1:22741], format: "2019-02-13" "2019-02-28" ...
## $ effective_date : POSIXct[1:22741], format: "2021-10-01" "2021-10-01" ...
## $ expiration_date : POSIXct[1:22741], format: "2023-09-30" "2023-09-30" ...
## $ status : chr [1:22741] "Active" "Active" "Active" "Active" ...
## $ license_method : chr [1:22741] "Examination" "Examination" "Examination" "Examination" ...
#These 211 licenses have duplicates -
dupe_numbers <- co_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
#Duplicate license numbers are all temporary licenses, so no removal necessary
co_state %>%
filter (license_number %in% dupe_numbers) %>%
count (lic_type, app_type)
## # A tibble: 2 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN-Temp Application 211
## 2 RN-Temp Application 211
# View
state <- read_excel("../state-data/CT-NPR FOI Austin Fast - RN, LPN - Update-clean.xlsx") %>%
clean_names() %>%
mutate (across(c(effective_date, grant_date, expiration_date, reinstate_date, application_date, graduation_date, date_of_birth), ~ymd(.x)))
#112196 records have no application date, almost exclusively licenses granted before 2010, as far back as 1923
state %>%
filter (is.na(application_date)) %>%
count (year(grant_date))
## # A tibble: 99 × 2
## `year(grant_date)` n
## * <dbl> <int>
## 1 1901 408
## 2 1923 1
## 3 1925 2
## 4 1926 3
## 5 1927 3
## 6 1928 3
## 7 1929 6
## 8 1930 7
## 9 1931 12
## 10 1932 7
## # … with 89 more rows
#Calculate processing time
ct_state <- state %>%
filter (!is.na(application_date)) %>% #keep only those with application dates -- others will not calculate anything
mutate (issue_date = as.Date(grant_date),
application_date = as.Date(application_date),
effective_renewal_date = as.Date(effective_date),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = case_when(
professional_title=="LPN" & application_type==10 ~ "Unknown", #2 LPN apps coded 10 (RN), marking as unknown
professional_title=="RN" ~ "RN",
application_type==10 ~ "RN",
professional_title=="LPN Temporary Permit" ~ "LPN-Temp",
professional_title=="LPN" ~ "LPN",
TRUE ~ "Unknown"), # 7 records without professional_title and application_type of "101"
app_type = "Unknown") %>%
mutate (year = replace_na (year, "NA")) %>%
#Remove 76,000 licenses issued before 2019
filter (year %in% c("2019", "2020", "2021", "NA")) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
ct_state %>%
filter (is.na(process_time)) %>%
count (status_reason) %>%
arrange (desc(n))
## # A tibble: 21 × 2
## status_reason n
## <chr> <int>
## 1 APPLICATION NO LONGER VALID 7579
## 2 ONLINE APPLICATION 3704
## 3 APPLICATION FOR LICENSE PENDING 263
## 4 APPROVED FOR EXAM 180
## 5 NEW ONLINE APPLICATION 168
## 6 FEE REFUNDED 117
## 7 STRIKE APPLICANT 46
## 8 APPLICATION WITHDRAWN 41
## 9 ISSUED IN ERROR 33
## 10 LAPSED DUE TO NON-RENEWAL 20
## # … with 11 more rows
#Calculates how long those left pending have been pending as process_time2
ct_state <- ct_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status_reason %in% c("ONLINE APPLICATION", "APPLICATION FOR LICENSE PENDING", "APPROVED FOR EXAM", "NEW ONLINE APPLICATION", "APPLICATION UNDER REVIEW", "ONLINE REINSTATEMENT", "EDUCATION REVIEW"),
as.Date("2021-11-04") - application_date,
process_time),
data_state = "CT",
year = if_else (year=="NA", "Pending as of 11/04/21", as.character(year)))
#Any mismatches?
ct_state %>% filter (as.Date(grant_date) != issue_date)
## # A tibble: 0 × 37
## # … with 37 variables: license_type <dbl>, license_number <chr>,
## # business_name <chr>, last_name <chr>, first_name <chr>,
## # middle_initial <chr>, address1 <chr>, address2 <chr>, city <chr>,
## # state <chr>, zip_code <chr>, county <chr>, email <chr>, phone <chr>,
## # fax_number <chr>, date_of_birth <date>, professional_title <chr>,
## # effective_date <date>, grant_date <date>, reinstate_date <date>,
## # expiration_date <date>, application_date <date>, application_type <dbl>, …
ct_state %>% filter (as.Date(application_date) != application_date)
## # A tibble: 0 × 37
## # … with 37 variables: license_type <dbl>, license_number <chr>,
## # business_name <chr>, last_name <chr>, first_name <chr>,
## # middle_initial <chr>, address1 <chr>, address2 <chr>, city <chr>,
## # state <chr>, zip_code <chr>, county <chr>, email <chr>, phone <chr>,
## # fax_number <chr>, date_of_birth <date>, professional_title <chr>,
## # effective_date <date>, grant_date <date>, reinstate_date <date>,
## # expiration_date <date>, application_date <date>, application_type <dbl>, …
#Remove unformatted date fields and unnecessary fields
ct_state <- ct_state %>%
select (-c(business_name, fax_number, professional_title, grant_date, reinstate_date, application_type, specialty, license_type, effective_date)) %>%
rename (middle_name = middle_initial,
address_line1 = address1,
address_line2 = address2,
app_status = status,
lic_status = status_reason)
ct_state %>%
count(lic_type, app_type)
## # A tibble: 4 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Unknown 3461
## 2 LPN-Temp Unknown 276
## 3 RN Unknown 39114
## 4 Unknown Unknown 5
str(ct_state)
## tibble [42,856 × 28] (S3: tbl_df/tbl/data.frame)
## $ license_number : chr [1:42856] "179793" "175594" "174997" "158997" ...
## $ last_name : chr [1:42856] "JENNEY" "KING TROTMAN" "KING TROTMAN" "." ...
## $ first_name : chr [1:42856] "JUDITH" "KAY" "KAY" "GEETIKA" ...
## $ middle_name : chr [1:42856] "ANN" "PATRICIA" "PATRICIA" NA ...
## $ address_line1 : chr [1:42856] "7675 CROTON RD" "56 SCHOOL ST" "56 SCHOOL ST" "6 Bismarck St" ...
## $ address_line2 : chr [1:42856] NA NA NA "Apt# 12" ...
## $ city : chr [1:42856] "JOHNSTOWN" "MANCHESTER" "MANCHESTER" "Mattapan" ...
## $ state : chr [1:42856] "OH" "CT" "CT" "MA" ...
## $ zip_code : chr [1:42856] "43031-8192" "06040-6117" "06040-6117" "2126" ...
## $ county : chr [1:42856] "Licking" "Hartford" "Hartford" "New Haven" ...
## $ email : chr [1:42856] "judithjenney@hotmail.com" "lightcolours@hotmail.com" "lightcolours@hotmail.com" "geetikalatifi@gmail.com" ...
## $ phone : chr [1:42856] "NULL" "2035239858" "2035239858" "2035607652" ...
## $ date_of_birth : Date[1:42856], format: "1964-03-02" "1972-06-19" ...
## $ expiration_date : Date[1:42856], format: "2022-03-31" "2022-06-30" ...
## $ application_date : Date[1:42856], format: "2021-03-30" "2020-08-11" ...
## $ school_name : chr [1:42856] "NULL" "Barbados Community College" "NULL" "All India Institute of Medical Sciences" ...
## $ graduation_date : Date[1:42856], format: NA "2013-11-01" ...
## $ app_status : chr [1:42856] "ACTIVE" "ACTIVE" "INACTIVE" "ACTIVE" ...
## $ lic_status : chr [1:42856] "CURRENT" "CURRENT" "PERMIT EXPIRED" "RENEWAL APPLICATION SENT" ...
## $ issue_date : Date[1:42856], format: "2021-04-20" "2020-10-19" ...
## $ effective_renewal_date: Date[1:42856], format: "2021-04-20" "2021-07-01" ...
## $ month : chr [1:42856] "2021-04" "2020-10" "2020-09" "2019-03" ...
## $ year : chr [1:42856] "2021" "2020" "2020" "2019" ...
## $ lic_type : chr [1:42856] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:42856] "Unknown" "Unknown" "Unknown" "Unknown" ...
## $ process_time : 'difftime' num [1:42856] 21 69 0 56 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:42856] 21 69 0 56 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:42856] "CT" "CT" "CT" "CT" ...
#ct_state %>%
# filter (!(is.na(business_name)|business_name=="NULL"))
# filter (!is.na(fax_number))
#Check for license number duplicates
str(state)
## tibble [231,660 × 28] (S3: tbl_df/tbl/data.frame)
## $ license_type : num [1:231660] 10 10 10 10 10 10 10 10 10 10 ...
## $ license_number : chr [1:231660] "145583" "141441" "140325" "179793" ...
## $ business_name : chr [1:231660] "NULL" "NULL" "NULL" "NULL" ...
## $ last_name : chr [1:231660] "CHAPMAN" "CHAPMAN" "COBLE" "JENNEY" ...
## $ first_name : chr [1:231660] "EMI" "EMI" "LISA" "JUDITH" ...
## $ middle_initial : chr [1:231660] "JO" "JO" "RENE" "ANN" ...
## $ address1 : chr [1:231660] "2960 Edgebrooke drive" "2960 Edgebrooke drive" "5010 SAMET DR APT 3" "7675 CROTON RD" ...
## $ address2 : chr [1:231660] NA NA NA NA ...
## $ city : chr [1:231660] "Marion" "Marion" "HIGH POINT" "JOHNSTOWN" ...
## $ state : chr [1:231660] "IA" "IA" "NC" "OH" ...
## $ zip_code : chr [1:231660] "52302" "52302" "27265-1509" "43031-8192" ...
## $ county : chr [1:231660] "NULL" "NULL" "Guilford" "Licking" ...
## $ email : chr [1:231660] "echapman@encoreunlimited.com" "echapman@encoreunlimited.com" "lisa.coble@cvscaremark.com" "judithjenney@hotmail.com" ...
## $ phone : chr [1:231660] "3195606953" "3195606953" "3362179625" "NULL" ...
## $ fax_number : chr [1:231660] "NULL" "NULL" NA NA ...
## $ date_of_birth : Date[1:231660], format: "1971-02-09" "1971-02-09" ...
## $ professional_title: chr [1:231660] "RN" "RN" "RN" "RN" ...
## $ effective_date : Date[1:231660], format: "2017-10-10" "2017-04-05" ...
## $ grant_date : Date[1:231660], format: "2017-10-10" "2017-04-05" ...
## $ reinstate_date : Date[1:231660], format: NA NA ...
## $ expiration_date : Date[1:231660], format: "2018-02-28" "2017-08-02" ...
## $ application_date : Date[1:231660], format: "2017-03-30" "2017-04-05" ...
## $ application_type : num [1:231660] 10 10 10 10 10 10 10 10 10 10 ...
## $ school_name : chr [1:231660] "KIRKWOOD COMMUNITY COLLEGE" "NULL" "GUILFORD TECHNICAL CC" "NULL" ...
## $ graduation_date : Date[1:231660], format: "1996-05-01" NA ...
## $ status : chr [1:231660] "INACTIVE" "INACTIVE" "INACTIVE" "ACTIVE" ...
## $ status_reason : chr [1:231660] "LAPSED DUE TO NON-RENEWAL" "PERMIT EXPIRED" "LAPSED DUE TO NON-RENEWAL" "CURRENT" ...
## $ specialty : logi [1:231660] NA NA NA NA NA NA ...
#No duplicates (other than NA license_number)
dupe_numbers <- ct_state %>%
count (license_number) %>%
filter (n > 1)
#NA license_numbers are all pending, expired, inactive, etc.
ct_state %>%
filter (is.na(license_number)) %>%
count (lic_type, app_type, app_status)
## # A tibble: 12 × 4
## lic_type app_type app_status n
## <chr> <chr> <chr> <int>
## 1 LPN Unknown EXPIRED APPLICATION 916
## 2 LPN Unknown INACTIVE 9
## 3 LPN Unknown PENDING 759
## 4 LPN-Temp Unknown EXPIRED APPLICATION 87
## 5 LPN-Temp Unknown INACTIVE 3
## 6 LPN-Temp Unknown PENDING 7
## 7 RN Unknown ACTIVE 1
## 8 RN Unknown DENIED 14
## 9 RN Unknown EXPIRED APPLICATION 6701
## 10 RN Unknown INACTIVE 81
## 11 RN Unknown PENDING 3590
## 12 Unknown Unknown EXPIRED APPLICATION 3
#No duplicate removal necessary
fl18_19 <- read_excel("../state-data/FL/BSC_FY18-19_Annual_M1_Detail_1701_1702.xlsx") %>%
clean_names()
fl19_20 <- read_excel("../state-data/FL/BSC_FY19-20_Annual_M1_Detail_1701_1702.xlsx") %>%
clean_names()
fl20_21 <- read_excel("../state-data/FL/BSC_FY20-21_Annual_M1_Detail_1701__1702.xlsx") %>%
clean_names()
fl21_q1 <- read_excel("../state-data/FL/BSC_FY21-22_Q1_M1_Detail_1701_1702.xlsx") %>%
clean_names()
fl21_q2 <- read_excel("../state-data/FL/BSC_FY21-22_Q2_M1_Detail_1701_1702.xlsx") %>%
clean_names()
fl_state <- rbind (fl18_19, fl19_20, fl20_21, fl21_q1, fl21_q2) %>%
mutate (application_date = as.Date(format((app_dte), format="%Y-%m-%d")),
issue_date = ymd(lic_issue_dte),
fee_paid_date = ymd(fee_pd_dte),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
lic_type = case_when (
prof_cde == "1701" ~ "RN",
prof_cde == "1702" ~ "LPN"),
app_type = case_when (
str_detect (tran_desc, "Endorsement") ~ "Endorsement",
str_detect (tran_desc, "Re-Exam") ~ "Exam-retest",
tran_desc == "Exam Application" ~ "Exam",
#Might want to combine 12 Internet Exam Application + 164 military expedited apps
TRUE ~ tran_desc)) %>%
mutate (process_time2 = issue_date - application_date,
diff = process_time2 - days_to_issue#,
#docs_time = issue_date - fee_paid_date,
#diff2 = docs_time - days_to_issue
) %>%
filter (year!="2018") #remove those before Jan. 1, 2019
#Any negatives?
fl_state %>%
filter (process_time2 < 0) %>%
nrow()
## [1] 0
#Any NAs?
fl_state %>%
filter (is.na(process_time2)) %>%
nrow()
## [1] 0
fl_state %>%
count (lic_type, app_type)
## # A tibble: 11 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 5601
## 2 LPN Exam 7119
## 3 LPN Exam-retest 1175
## 4 LPN Initial Temporary Military 4
## 5 LPN Military Vet Expedited Initial Licensure 34
## 6 RN Endorsement 40696
## 7 RN Exam 36978
## 8 RN Exam-retest 6813
## 9 RN Initial Temporary Military - RN 12
## 10 RN Internet Exam Application 34
## 11 RN Military Vet Expedited Initial Licensure 284
fl_state %>%
count (prof_cde, tran_desc)
## # A tibble: 11 × 3
## prof_cde tran_desc n
## <chr> <chr> <int>
## 1 1701 Exam Application 36978
## 2 1701 Initial Temporary Military - RN 12
## 3 1701 Internet Exam Application 34
## 4 1701 Military Vet Expedited Initial Licensure 284
## 5 1701 Re-Exam Application 6813
## 6 1701 RN by Endorsement 40696
## 7 1702 Exam Application 7119
## 8 1702 Initial Temporary Military 4
## 9 1702 LPN by Endorsement 5601
## 10 1702 Military Vet Expedited Initial Licensure 34
## 11 1702 Re-Exam Application 1175
str(fl_state)
## tibble [98,750 × 20] (S3: tbl_df/tbl/data.frame)
## $ prof_cde : chr [1:98750] "1701" "1701" "1701" "1701" ...
## $ file_nbr : num [1:98750] 238745 241832 242093 248194 272557 ...
## $ lic_nbr : chr [1:98750] "9506182" "9505105" "9514022" "9502847" ...
## $ app_nbr : num [1:98750] 3587295 3582415 3578501 3190314 3613059 ...
## $ licensee_name : chr [1:98750] "Franklin, Ismay Elacen" "Schulze, Amber Brett" "Rodriguez-Pardo, Ehra Maria" "Brown, Angela A" ...
## $ tran_code : chr [1:98750] "1016" "1016" "1016" "1011" ...
## $ tran_desc : chr [1:98750] "RN by Endorsement" "RN by Endorsement" "RN by Endorsement" "Re-Exam Application" ...
## $ app_dte : POSIXct[1:98750], format: "2019-02-15 00:00:00" "2019-02-04 00:00:00" ...
## $ fee_pd_dte : POSIXct[1:98750], format: "2019-02-15" "2019-02-04" ...
## $ lic_issue_dte : POSIXct[1:98750], format: "2019-02-25" "2019-02-12" ...
## $ days_to_issue : num [1:98750] 10 8 151 335 22 15 9 107 9 2 ...
## $ application_date: Date[1:98750], format: "2019-02-15" "2019-02-04" ...
## $ issue_date : Date[1:98750], format: "2019-02-25" "2019-02-12" ...
## $ fee_paid_date : Date[1:98750], format: "2019-02-15" "2019-02-04" ...
## $ month : chr [1:98750] "2019-02" "2019-02" "2019-06" "2019-01" ...
## $ year : chr [1:98750] "2019" "2019" "2019" "2019" ...
## $ lic_type : chr [1:98750] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:98750] "Endorsement" "Endorsement" "Endorsement" "Exam-retest" ...
## $ process_time2 : 'difftime' num [1:98750] 10 8 151 335 ...
## ..- attr(*, "units")= chr "days"
## $ diff : 'difftime' num [1:98750] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
#Any mismatches?
fl_state %>% filter (as.Date(lic_issue_dte) != issue_date) %>% nrow()
## [1] 0
fl_state %>% filter (as.Date(app_dte) != application_date) %>% nrow()
## [1] 0
#2 duplicates - might be because they issued yearlong military temp license, then regular license
dupe_numbers <- fl_state %>%
count (lic_nbr) %>%
filter (n > 1)
#1 NA license_number
fl_state %>%
filter (is.na(lic_nbr))
## # A tibble: 1 × 20
## prof_cde file_nbr lic_nbr app_nbr licensee_name tran_code tran_desc
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 1702 214304 <NA> 1107257 Jackson, Jatasha 1016 LPN by Endorseme…
## # … with 13 more variables: app_dte <dttm>, fee_pd_dte <dttm>,
## # lic_issue_dte <dttm>, days_to_issue <dbl>, application_date <date>,
## # issue_date <date>, fee_paid_date <date>, month <chr>, year <chr>,
## # lic_type <chr>, app_type <chr>, process_time2 <drtn>, diff <drtn>
Match raw application and license issued datasets to re-create what Florida’s Department of Health provided.
#Import FY19-Q3 initial applications (Jan-March 2019)
fy18_19apps <- read_excel("../state-data/FL/FY18-19/AR_FY_18-19_Q3_Tbl_4_INITIALS_Col_1_INITIAL_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx") %>%
mutate (PROF_CDE = as.character (CLNT_CDE)) %>%
rename (CLNT_ID = AR_CLNT_ID,
#PROF_CDE = CLNT_CDE,
PROF_DESCRIPTION = CLNT_NME,
LICENSEE_NAME = KEY_NME,
APP_NBR = APPLC_ID,
APP_DTE = APPL_DTE,
TRAN_CODE = XACT_CDE,
TRAN_DESC = XACT_DESC) %>%
select (-c(AR_TABLE, AR_COLUMN, AR_MEASURE, SELECTED_PERIOD, XACT_CLS_CDE, CLNT_CDE))
#Import FY19-Q3 re-exam applications (Jan-March 2019)
fy18_19apps_re <- read_excel("../state-data/FL/FY18-19/AR_FY_18-19_Q3_Tbl_4_INITIALS_Col_2_INITIAL_RE-EXAM_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx") %>%
mutate (PROF_CDE = as.character (CLNT_CDE)) %>%
rename (CLNT_ID = AR_CLNT_ID,
#PROF_CDE = CLNT_CDE,
PROF_DESCRIPTION = CLNT_NME,
LICENSEE_NAME = KEY_NME,
APP_NBR = APPLC_ID,
APP_DTE = APPL_DTE,
TRAN_CODE = XACT_CDE,
TRAN_DESC = XACT_DESC) %>%
select (-c(AR_TABLE, AR_COLUMN, AR_MEASURE, SELECTED_PERIOD, XACT_CLS_CDE, CLNT_CDE))
#Import FY20 initial applications (July 2019-June 2020)
fy19_20apps <- read_excel("../state-data/FL/FY19-20/AR_FY_19-20_Tbl_4_INITIALS_Col_1_INITIAL_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY20 re-exam applications (July 2019-June 2020)
fy19_20apps_re <- read_excel("../state-data/FL/FY19-20/AR_FY_19-20_Tbl_4_INITIALS_Col_2_INITIAL_RE-EXAM_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Compare columns to see what we can remove and rename in FY18 documents.
compare <- compare_df_cols(fy18_19apps, fy19_20apps)
#Import FY21 initial applications (July 2020-June 2021)
fy20_21apps <- read_excel("../state-data/FL/FY20-21/AR_FY_20-21_Tbl_4_INITIALS_Col_1_INITIAL_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY20 re-exam applications (July 2020-June 2021)
fy20_21apps_re <- read_excel("../state-data/FL/FY20-21/AR_FY_20-21_Tbl_4_INITIALS_Col_2_INITIAL_RE-EXAM_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY22-Q1 initial applications (July 2021-Sept 2021)
fy21_22apps_q1 <- read_excel("../state-data/FL/FY21-22Q1/AR_FY_21-22_Q1_Tbl_4_INITIALS_Col_1_INITIAL_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY22-Q1 re-exam applications (July 2021-Sept 2021)
fy21_22apps_re_q1 <- read_excel("../state-data/FL/FY21-22Q1/AR_FY_21-22_Q1_Tbl_4_INITIALS_Col_2_INITIAL_RE-EXAM_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY22-Q2 initial applications (Oct 2021-Dec 2021)
fy21_22apps_q2 <- read_excel("../state-data/FL/FY21-22Q2/AR_FY_21-22_Q2_Tbl_4_INITIALS_Col_1_INITIAL_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY22-Q2 re-exam applications (Oct 2021-Dec 2021)
fy21_22apps_re_q2 <- read_excel("../state-data/FL/FY21-22Q2/AR_FY_21-22_Q2_Tbl_4_INITIALS_Col_2_INITIAL_RE-EXAM_APPLICATIONS_RECEIVED_Detail_1701_1702.xlsx")
#Import FY19-Q3 licenses issued (Jan-March 2019)
fy18_19iss <- read_excel("../state-data/FL/FY18-19/AR_FY_18-19_Q3_Tbl_4_INITIALS_Col_3_INITIAL_LICENSES_ISSUED_Detail_1701_1702.xlsx") %>%
mutate (PROF_CDE = as.character (CLNT_CDE)) %>%
rename (CLNT_ID = AR_CLNT_ID,
#PROF_CDE = CLNT_CDE,
PROF_DESCRIPTION = CLNT_NME,
LICENSEE_NAME = KEY_NME,
LIC_ISSUE_DTE = ORIG_DTE,
LIC_STAT_DESC = LIC_STA_DESC
#APP_NBR = APPLC_ID,
#APP_DTE = APPL_DTE,
#TRAN_CODE = XACT_CDE,
#TRAN_DESC = XACT_DESC
) %>%
select (-c(AR_TABLE, AR_COLUMN, AR_MEASURE, SELECTED_PERIOD, CLNT_CDE))
#FL DOH did not provide FY19 Q4 (April-June 2019), apparently in error
#Import FY20 licenses issued (July 2019-June 2020)
fy19_20iss <- read_excel("../state-data/FL/FY19-20/AR_FY_19-20_Tbl_4_INITIALS_Col_3_INITIAL_LICENSES_ISSUED_Detail_1701_1702.xlsx")
#Import FY21 licenses issued (July 2020-June 2021)
fy20_21iss <- read_excel("../state-data/FL/FY20-21/AR_FY_20-21_Tbl_4_INITIALS_Col_3_INITIAL_LICENSES_ISSUED_Detail_1701_1702.xlsx")
#Import FY22-Q1 licenses issued (July 2021-Sept 2021)
fy21_22iss_q1 <- read_excel("../state-data/FL/FY21-22Q1/AR_FY_21-22_Q1_Tbl_4_INITIALS_Col_3_INITIAL_LICENSES_ISSUED_Detail_1701_1702.xlsx")
#Import FY22-Q2 licenses issued (Oct 2021-Dec 2021)
fy21_22iss_q2 <- read_excel("../state-data/FL/FY21-22Q2/AR_FY_21-22_Q2_Tbl_4_INITIALS_Col_3_INITIAL_LICENSES_ISSUED_Detail_1701_1702.xlsx")
#Combine all licenses issued and keep only records that have unique file numbers, which is how FL DOH identifies an individual. This removes any people who were issued more than once just to verify the easy ones.
all_issued <- bind_rows (fy18_19iss, fy19_20iss, fy20_21iss, fy21_22iss_q1, fy21_22iss_q2) %>%
group_by (FILE_NBR) %>%
mutate (count = n())
all_issued <- all_issued %>%
filter ( count==1)
#Combine all applications
all_apps <- bind_rows (fy18_19apps, fy18_19apps_re, fy19_20apps, fy19_20apps_re, fy20_21apps, fy20_21apps_re, fy21_22apps_q1, fy21_22apps_re_q1, fy21_22apps_q2, fy21_22apps_re_q2) %>%
#remove withdrawn applications, which caused unnecessary duplications when joining
filter (!(APP_STATUS %in% c("WITHDRAWN", "Withdrawn"))) %>%
arrange (FILE_NBR, desc(APP_DTE)) %>%
mutate (LIC_NBR = as.character(LIC_NBR))
all_apps %>%
count (APP_STATUS)
## # A tibble: 4 × 2
## APP_STATUS n
## * <chr> <int>
## 1 Approved 6565
## 2 APPROVED 97007
## 3 OPEN 29247
## 4 TO BE DENIED 12
#13000 people have multiple apps, mostly re-exam apps
all_apps %>%
group_by (FILE_NBR) %>%
mutate (count = n()) %>%
filter (count >1) %>%
ungroup() %>%
count (TRAN_DESC) %>%
arrange (desc(n))
## # A tibble: 7 × 2
## TRAN_DESC n
## <chr> <int>
## 1 Re-Exam Application 18651
## 2 Exam Application 10731
## 3 RN by Endorsement 1630
## 4 LPN by Endorsement 170
## 5 Military Vet Expedited Initial Licensure 20
## 6 Initial Temporary Military - RN 6
## 7 Initial Temporary Military 1
#Keep only records that have unique file numbers, which is how FL DOH identifies an individual. This removes any people who have applied more than once just to verify the easy ones.
all_apps2 <- all_apps %>%
group_by (FILE_NBR) %>%
mutate (count = n()) %>%
filter ( count==1)
#Join issued dataframe with the application dataframe by the file number and calculate processing time.
test_join <- inner_join (all_issued, all_apps2, by="FILE_NBR") %>%
mutate (issue_date = as.Date (LIC_ISSUE_DTE),
application_date = as.Date(APP_DTE)) %>%
mutate (process_time = issue_date - application_date)
#Pare down columns
test_join2 <- test_join %>%
select (LICENSEE_NAME.x, FILE_NBR, issue_date, application_date, process_time, APP_STATUS )
#Keep only people who have one record only.
fl_state2 <- fl_state %>%
group_by (file_nbr) %>%
mutate (count = n()) %>%
filter ( count==1)
#Join by file number.
test_join3 <- left_join (test_join2, fl_state2, by=c("FILE_NBR"="file_nbr"))
#Only 188 processing times don't match, mostly Re-Exam, meaning I think we can trust FL Dept of Health joined correctly and it's fine to use their matched data.
test_join3 %>%
filter (process_time != process_time2) %>%
ungroup() %>%
count (tran_desc)
## # A tibble: 4 × 2
## tran_desc n
## * <chr> <int>
## 1 Exam Application 2
## 2 LPN by Endorsement 1
## 3 Re-Exam Application 168
## 4 RN by Endorsement 17
Which applications are still pending?
open_apps <- all_apps %>%
#filter (APP_STATUS=="OPEN") %>%
filter (APP_DTE > as.Date("2020-06-30")) %>%
rename (file_nbr = FILE_NBR,
tran_desc = TRAN_DESC) %>%
#distinct (FILE_NBR, .keep_all=T) %>%
anti_join (fl_state, by = c("file_nbr", "tran_desc"))
open_apps %>% count (APP_STATUS)
## # A tibble: 3 × 2
## APP_STATUS n
## * <chr> <int>
## 1 APPROVED 17329
## 2 OPEN 13800
## 3 TO BE DENIED 7
open_apps2 <- open_apps %>% filter (APP_STATUS!="OPEN")
#Almost all Exam apps and re-exam apps
open_apps2 %>% count (PROF_DESCRIPTION, tran_desc)
## # A tibble: 6 × 3
## PROF_DESCRIPTION tran_desc n
## <chr> <chr> <int>
## 1 Licensed Practical Nurse Exam Application 1374
## 2 Licensed Practical Nurse LPN by Endorsement 3
## 3 Licensed Practical Nurse Re-Exam Application 1321
## 4 Registered Nurse (RN) Exam Application 7596
## 5 Registered Nurse (RN) Re-Exam Application 7022
## 6 Registered Nurse (RN) RN by Endorsement 20
open_apps %>% count (tran_desc, APP_STATUS)
## # A tibble: 14 × 3
## tran_desc APP_STATUS n
## <chr> <chr> <int>
## 1 Exam Application APPROVED 8967
## 2 Exam Application OPEN 5775
## 3 Exam Application TO BE DENIED 3
## 4 Initial Temporary Military - RN OPEN 6
## 5 LPN by Endorsement APPROVED 2
## 6 LPN by Endorsement OPEN 927
## 7 LPN by Endorsement TO BE DENIED 1
## 8 Military Vet Expedited Initial Licensure OPEN 76
## 9 Re-Exam Application APPROVED 8342
## 10 Re-Exam Application OPEN 1061
## 11 Re-Exam Application TO BE DENIED 1
## 12 RN by Endorsement APPROVED 18
## 13 RN by Endorsement OPEN 5955
## 14 RN by Endorsement TO BE DENIED 2
#Look for names in these "Open apps" that have had licenses issued. Also copy over license type, application type and application date to filter out those who were issued licenses in other ways, so these are abandoned applications.
names <- open_apps %>%
mutate (LICENSEE_NAME = str_to_upper (LICENSEE_NAME)) %>%
pull ( LICENSEE_NAME)
trans <- open_apps %>%
pull ( tran_desc)
apps <- open_apps %>%
pull ( APP_DTE)
profs <- open_apps %>%
pull ( PROF_DESCRIPTION)
fl_state2 <- fl_state %>%
mutate (licensee_name = str_to_upper (licensee_name))
#Run through for loop
counter <- 0
output <- tibble()
for (i in seq_along(names)){
counter <- counter + 1
name <- names[i]
tran <- trans[i]
date <- apps[i]
prof <- profs[i]
step_df <- fl_state2 %>%
filter (licensee_name == name) %>%
mutate (APP_TRANS = tran,
APP_DATE = date,
PROF = prof)
output <- bind_rows (output, step_df)
#print (paste("Finished", counter, "of", length(names)))
}
output <- output %>%
rename (LICENSEE_NAME = licensee_name)
Combine dataframes of issued licenses with still-pending applications.
#Remove unformatted date fields and unnecessary fields on issued dataframe
fl_state <- fl_state %>%
select (-c(app_dte, fee_pd_dte, lic_issue_dte, prof_cde, file_nbr, app_nbr, tran_code, tran_desc, days_to_issue, diff)) %>%
rename (name = licensee_name,
license_number = lic_nbr) %>%
mutate (data_state="FL")
#Remove those people identified above to keep pending applications
open_apps2 <- open_apps %>%
anti_join (output, by="LICENSEE_NAME") %>%
select (-c(CLNT_ID, LIC_NBR, LIC_STA_DESC, ORIG_DTE, BTCH_RCPT_DTE, APPL_FEE_PAID, APPL_FEE_TOTAL, PERIOD)) %>%
clean_names() %>%
mutate (application_date = as.Date(app_dte),
#issue_date = ymd(lic_issue_dte),
fee_paid_date = ymd(feepd_dte),
#month = substr(issue_date, 1,7),
year = "Pending as of 12/31/21",
lic_type = case_when (
prof_cde == "1701 RN" ~ "RN",
prof_cde == "1702" ~ "LPN"),
app_type = case_when (
str_detect (tran_desc, "Endorsement") ~ "Endorsement",
str_detect (tran_desc, "Re-Exam") ~ "Exam-retest",
tran_desc == "Exam Application" ~ "Exam",
#Might want to combine 12 Internet Exam Application + 164 military expedited apps
TRUE ~ tran_desc)) %>%
mutate (process_time2 = as.Date("2021-12-31") - application_date,
data_state = "FL") %>%
rename (name = licensee_name) %>%
select (-c(app_dte, feepd_dte, prof_cde, file_nbr, app_nbr, tran_code, tran_desc, prof_description, applc_id, lic_id))
#How do I add this into fl_state?
compare <- compare_df_cols(fl_state, open_apps2)
#Combine issued licenses with still-pending applications
fl_state <- bind_rows (fl_state, open_apps2)
#Remove little used type from issued dataframe
fl_state <- fl_state %>%
filter (!str_detect(app_type, "^Initial Temporary")) %>% #Removes 16 Florida temps + 4 pending)
filter (app_type != "Internet Exam Application") #removes small number of this type
#Check to see where FL stands
fl_state %>%
group_by (lic_type, app_type, year) %>% #, deficient
filter (app_type != "Exam-retest" & lic_type %in% c("RN", "LPN", "RN-Temp", "LPN-Temp")) %>%
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-12-31")))) %>%
#group_by (year, lic_type, app_type) %>%
summarize (mean = round(mean(process_time2, na.rm=T)),
median = median (process_time2),
count = n(),
over30days = sum(process_time2 > 30),
pct_over30days = over30days/count,
over60days = sum(process_time2 > 60),
pct_over60days = over60days/count,
over90days = sum(process_time2 > 90),
pct_over90days = over90days/count,
over120days = sum(process_time2 > 120),
pct_over120days = over120days/count,
over180days = sum(process_time2 > 180),
pct_over180days = over180days/count,
over1year = sum(process_time2 > 365),
pct_over1year = over1year/count ) %>%
filter (year %in% c("2021", "Pending as of 12/31/21"))
## # A tibble: 12 × 18
## # Groups: lic_type, app_type [6]
## lic_type app_type year mean median count over30days pct_over30days
## <chr> <chr> <chr> <drt> <drtn> <int> <int> <dbl>
## 1 LPN Endorsement 2021 43 … 17.0… 2205 733 0.332
## 2 LPN Endorsement Pending… 173 … 170.0… 709 616 0.869
## 3 LPN Exam 2021 81 … 58.0… 2279 1913 0.839
## 4 LPN Exam Pending… 156 … 150.0… 1606 1315 0.819
## 5 LPN Military Vet … 2021 77 … 26.5… 12 6 0.5
## 6 LPN Military Vet … Pending… 152 … 181.0… 10 8 0.8
## 7 RN Endorsement 2021 39 … 14.0… 18445 5379 0.292
## 8 RN Endorsement Pending… 140 … 121.0… 4816 3939 0.818
## 9 RN Exam 2021 84 … 63.0… 12437 11347 0.912
## 10 RN Exam Pending… 128 … 101.0… 8147 6152 0.755
## 11 RN Military Vet … 2021 55 … 25.0… 87 40 0.460
## 12 RN Military Vet … Pending… 127 … 91.5… 40 29 0.725
## # … with 10 more variables: over60days <int>, pct_over60days <dbl>,
## # over90days <int>, pct_over90days <dbl>, over120days <int>,
## # pct_over120days <dbl>, over180days <int>, pct_over180days <dbl>,
## # over1year <int>, pct_over1year <dbl>
state <- read_excel("../state-data/HI-RN_LPN Applications Report-2021-11-19-18-32-54.xlsx", skip=9) %>%
clean_names() %>%
filter (application_received_date != "Total") %>%
select (-x2)
state %>% count (type,method_of_licensure)
## # A tibble: 19 × 3
## type method_of_licensure n
## <chr> <chr> <int>
## 1 ADDC - Additional Class E - By Exam 1
## 2 LICF - New License after Forfeiture E - By Exam 39
## 3 LICF - New License after Forfeiture R - By Reciprocity, Endorsement or… 391
## 4 LICF - New License after Forfeiture <NA> 3
## 5 LICN - New License E - By Exam 3917
## 6 LICN - New License R - By Reciprocity, Endorsement or… 11072
## 7 LICN - New License <NA> 1
## 8 REAC - Reactivation E - By Exam 13
## 9 REAC - Reactivation R - By Reciprocity, Endorsement or… 17
## 10 REAC - Reactivation <NA> 10
## 11 RNEW - Renewal 5 - CT Renewal - Active RME (Categ… 1
## 12 RNEW - Renewal 6 - CT Renewal - Active Firm; Pape… 2
## 13 RNEW - Renewal E - By Exam 10812
## 14 RNEW - Renewal M - Graduate Level Degree 1
## 15 RNEW - Renewal R - By Reciprocity, Endorsement or… 12454
## 16 RNEW - Renewal <NA> 2476
## 17 RSTR - Restore - No A/I Status E - By Exam 1
## 18 RSTR - Restore - No A/I Status <NA> 765
## 19 RSTRA - Restore Active R - By Reciprocity, Endorsement or… 2
state %>% filter (is.na(method_of_licensure))
## # A tibble: 3,255 × 9
## application_rece… applicant_name license_type status type current_license
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 4/23/2021 LEONA SOARES LPN HC - A… RNEW… LPN-5120-0
## 2 4/21/2021 RUTH T YANELLAS LPN HC - A… RNEW… LPN-5137-0
## 3 6/28/2021 KATHLEEN E CAD… LPN HC - A… RNEW… LPN-5145-0
## 4 5/24/2021 CATALINA C TANO LPN HC - A… RNEW… LPN-8619-0
## 5 6/14/2021 KEAHELAUMAKANIO… LPN HC - A… RNEW… LPN-8637-0
## 6 4/27/2021 MARGARET L MAK… LPN HC - A… RNEW… LPN-8647-0
## 7 6/4/2021 PATRICIA S CRA… LPN HC - A… RNEW… LPN-8652-0
## 8 4/21/2021 KEVIN H KUBOTA LPN HC - A… RNEW… LPN-9319-0
## 9 5/4/2021 DARLEEN K PALM… LPN HC - A… RNEW… LPN-9332-0
## 10 6/13/2021 HUESTRUS A RAM… LPN HC - A… RNEW… LPN-9349-0
## # … with 3,245 more rows, and 3 more variables: method_of_licensure <chr>,
## # current_license_orig_date_of_licensure <chr>, license_expiration_date <chr>
check <- state %>%
mutate (app_type = case_when (
str_detect(type, "New License") & str_detect(method_of_licensure, "Exam") ~ "Exam",
str_detect(type, "New License") & str_detect(method_of_licensure, "Endorsement") ~ "Endorsement",
str_detect(type, "Reactivation") & str_detect(method_of_licensure, "Exam") ~ "Reinstatement-Exam",
str_detect(type, "Reactivation") & str_detect(method_of_licensure, "Endorsement") ~ "Reinstatement-Endorsement",
str_detect(type, "Reactivation") & is.na(method_of_licensure) ~ "Reinstatement-Unknown",
type=="ADDC - Additional Class" ~ "Exam",
str_detect(type, "Renewal") & str_detect(method_of_licensure, "Exam") ~ "Renewal-Exam",
str_detect(type, "Renewal") & str_detect(method_of_licensure, "Endorsement") ~ "Renewal-Endorsement",
str_detect(type, "Renewal") & !(str_detect(method_of_licensure, "Exam")|str_detect(method_of_licensure, "Endorsement")) ~ "Renewal-Unknown",
str_detect(type, "Renewal") & is.na(method_of_licensure) ~ "Renewal-Unknown",
str_detect(type, "Restore") & str_detect(method_of_licensure, "Exam") ~ "Reinstatement-Exam",
str_detect(type, "Restore") & str_detect(method_of_licensure, "Endorsement") ~ "Reinstatement-Endorsement",
str_detect(type, "Restore") & is.na(method_of_licensure) ~ "Reinstatement-Unknown",
TRUE ~ "Unknown")) %>%
count (type,method_of_licensure, app_type)
#Calculate processing time
hi_state <- state %>%
mutate (application_date = mdy(application_received_date),
issue_date = mdy(current_license_orig_date_of_licensure),
expiration_date = mdy(license_expiration_date),
month = substr(issue_date, 1,7),
year = year(issue_date),
app_type = case_when (
str_detect(type, "New License") & str_detect(method_of_licensure, "Exam") ~ "Exam",
str_detect(type, "New License") & str_detect(method_of_licensure, "Endorsement") ~ "Endorsement",
str_detect(type, "Reactivation") & str_detect(method_of_licensure, "Exam") ~ "Reinstatement-Exam",
str_detect(type, "Reactivation") & str_detect(method_of_licensure, "Endorsement") ~ "Reinstatement-Endorsement",
str_detect(type, "Reactivation") & is.na(method_of_licensure) ~ "Reinstatement-Unknown",
type=="ADDC - Additional Class" ~ "Exam",
str_detect(type, "Renewal") & str_detect(method_of_licensure, "Exam") ~ "Renewal-Exam",
str_detect(type, "Renewal") & str_detect(method_of_licensure, "Endorsement") ~ "Renewal-Endorsement",
str_detect(type, "Renewal") & !(str_detect(method_of_licensure, "Exam")|str_detect(method_of_licensure, "Endorsement")) ~ "Renewal-Unknown",
str_detect(type, "Renewal") & is.na(method_of_licensure) ~ "Renewal-Unknown",
str_detect(type, "Restore") & str_detect(method_of_licensure, "Exam") ~ "Reinstatement-Exam",
str_detect(type, "Restore") & str_detect(method_of_licensure, "Endorsement") ~ "Reinstatement-Endorsement",
str_detect(type, "Restore") & is.na(method_of_licensure) ~ "Reinstatement-Unknown",
TRUE ~ "Unknown")) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
hi_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 20 × 2
## status n
## <chr> <int>
## 1 HC - Application Completed 6805
## 2 A3 - Deficiency Notice Sent 2469
## 3 X1 - Awaiting Exam 1000
## 4 A1 - Application Submitted, Awaiting Review 505
## 5 AW - Application Withdrawal 369
## 6 X3 - Must Take Remedial Course 267
## 7 L1 - Applications Submitted, Awaiting Notice 105
## 8 HD - Application Terminated 58
## 9 E1 - To Executive Officer Awaiting Review 53
## 10 E3 - Executive Officer Requests Add'l Information 16
## 11 B7 - Denial Letter Sent 13
## 12 BH - Board Deferral 13
## 13 L3 - Deficiency Notice Sent 11
## 14 HW - Application Withdrawn 6
## 15 XW - Withdrew From Exam 5
## 16 H1 - Awaiting Licensing 3
## 17 A2 - Deficient, Awaiting Notice 2
## 18 A5 - Rejected, Notice Sent 1
## 19 C7 - Denial Letter Sent 1
## 20 EW - Application Withdrawn After 1
#Calculates how long those left pending have been pending as process_time2
hi_state <- hi_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("A3 - Deficiency Notice Sent", "X1 - Awaiting Exam", "A1 - Application Submitted, Awaiting Review", "X3 - Must Take Remedial Course", "L1 - Applications Submitted, Awaiting Notice", "E1 - To Executive Officer Awaiting Review", "E3 - Executive Officer Requests Add'l Information", "L3 - Deficiency Notice Sent", "H1 - Awaiting Licensing", "A2 - Deficient, Awaiting Notice"),
as.Date("2021-11-19") - application_date,
process_time),#Calculates how long those left pending have been pending
data_state = "HI",
year = if_else (is.na(year), "Pending as of 11/19/21", as.character(year))) %>%
rename (lic_type = license_type)
#Any negatives? 26,825 - only 432 are not renewals or reinstatements
#160 after Feb 20, 2021 have negative times
hi_state %>%
filter (application_date > as.Date("2021-02-20")) %>%
filter (process_time < 0) %>%
filter (!(str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement"))) %>%
count (app_type) %>%
arrange (desc(n))
## # A tibble: 3 × 2
## app_type n
## <chr> <int>
## 1 Endorsement 145
## 2 Exam 15
## 3 Unknown 1
#Any failed to calculate? 7266, almost all endorse/exam
hi_state %>%
#filter (application_date > as.Date("2021-02-20")) %>%
#filter (is.na(process_time2)) %>%
filter (!(str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement"))) %>%
count (year)
## # A tibble: 47 × 2
## year n
## * <chr> <int>
## 1 1972 1
## 2 1973 1
## 3 1978 1
## 4 1980 3
## 5 1981 2
## 6 1982 1
## 7 1983 1
## 8 1984 4
## 9 1985 1
## 10 1986 3
## # … with 37 more rows
# count ( app_type) %>%
# arrange (desc(n))
#Only 3450 calculated properly...
hi_state %>%
filter (application_date > as.Date("2021-02-20")) %>%
filter (process_time2 > -1) %>%
count (type)
## # A tibble: 7 × 2
## type n
## * <chr> <int>
## 1 ADDC - Additional Class 1
## 2 LICF - New License after Forfeiture 8
## 3 LICN - New License 5068
## 4 REAC - Reactivation 1
## 5 RNEW - Renewal 139
## 6 RSTR - Restore - No A/I Status 1
## 7 RSTRA - Restore Active 1
# summarize (range = range(issue_date))
#15,424 records are endorsement/exam, but only 3450 calculated anything usable
hi_state %>%
filter (!(str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement"))) %>%
summarize (range = range(issue_date, na.rm=T))
## # A tibble: 2 × 1
## range
## <date>
## 1 1972-11-02
## 2 2023-03-30
#All 41978 have an application date
state %>% filter (!is.na(application_received_date))
## # A tibble: 41,978 × 9
## application_rece… applicant_name license_type status type current_license
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 8/12/2021 Carolyn Talhe… RN HC - A… LICN … RN-102490-0
## 2 8/13/2021 Jason Cole Sem… RN HC - A… LICN … RN-102491-0
## 3 7/27/2021 Alexander Spr… RN HC - A… LICN … RN-102492-0
## 4 7/23/2021 DIANA R SCHEMB… RN HC - A… LICN … RN-102493-0
## 5 8/14/2021 Yessenia Annet… RN HC - A… LICN … RN-102494-0
## 6 8/15/2021 Victorio Razon… RN HC - A… LICN … RN-102495-0
## 7 8/15/2021 BARBIE JOLINE … RN HC - A… LICN … RN-102496-0
## 8 8/13/2021 Shandrika Pri… RN HC - A… LICN … RN-102497-0
## 9 7/27/2021 CELINE S TAPIA RN HC - A… LICN … RN-102498-0
## 10 8/13/2021 Bahareh Jones RN HC - A… LICN … RN-102499-0
## # … with 41,968 more rows, and 3 more variables: method_of_licensure <chr>,
## # current_license_orig_date_of_licensure <chr>, license_expiration_date <chr>
#Only 30275 have an issue date. Missing 11682 are virtually all new licenses.
state %>%
filter (is.na(current_license_orig_date_of_licensure)) %>%
count (type)
## # A tibble: 5 × 2
## type n
## * <chr> <int>
## 1 ADDC - Additional Class 1
## 2 LICF - New License after Forfeiture 13
## 3 LICN - New License 11682
## 4 RNEW - Renewal 5
## 5 RSTRA - Restore Active 2
#9332 records have issue date after Jan 1, 2019; 3305 are new licenses all issued in 2021
hi_state %>%
filter (issue_date >= as.Date("2019-01-01")) %>%
count (type)
## # A tibble: 5 × 2
## type n
## * <chr> <int>
## 1 LICF - New License after Forfeiture 7
## 2 LICN - New License 3305
## 3 REAC - Reactivation 2
## 4 RNEW - Renewal 5895
## 5 RSTR - Restore - No A/I Status 123
hi_state %>%
filter (application_date < as.Date("2020-01-01")) %>%
arrange (application_date)
## # A tibble: 5,150 × 18
## application_recei… applicant_name lic_type status type current_license
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1/1/2019 <NA> RN A3 - Def… LICN … <NA>
## 2 1/1/2019 <NA> LPN A3 - Def… LICN … <NA>
## 3 1/2/2019 MARY P TIFFIN RN HC - App… LICN … <NA>
## 4 1/2/2019 JUSTINE A TALL… RN HC - App… LICN … <NA>
## 5 1/2/2019 KEVIN J PALUAY RN HC - App… LICN … <NA>
## 6 1/2/2019 CAMELLIA G GARN… RN HC - App… LICN … <NA>
## 7 1/2/2019 <NA> RN A3 - Def… LICN … <NA>
## 8 1/2/2019 <NA> RN HD - App… LICN … <NA>
## 9 1/2/2019 CHARDEY LEWIS RN HC - App… LICN … <NA>
## 10 1/2/2019 MEGAN L FROSHE… RN HC - App… LICN … <NA>
## # … with 5,140 more rows, and 12 more variables: method_of_licensure <chr>,
## # current_license_orig_date_of_licensure <chr>,
## # license_expiration_date <chr>, application_date <date>, issue_date <date>,
## # expiration_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
hi_state %>%
filter (application_date > as.Date("2021-02-21")) %>%
count (type)
## # A tibble: 7 × 2
## type n
## * <chr> <int>
## 1 ADDC - Additional Class 1
## 2 LICF - New License after Forfeiture 158
## 3 LICN - New License 5444
## 4 REAC - Reactivation 39
## 5 RNEW - Renewal 25640
## 6 RSTR - Restore - No A/I Status 359
## 7 RSTRA - Restore Active 2
avg_by_type_app <- hi_state %>%
ungroup() %>%
filter (process_time2 > -1) %>% #removes renewals/reinstatements, which all calculated wrong
filter (!is.na(process_time2)) %>%
filter (application_date > as.Date("2021-02-21")) %>% #System migration on Feb. 22, 2021 means all records before could be wrong
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01")))) %>% #remove pending apps older than July 2020
group_by (lic_type, app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
over30days = sum(process_time2 > 30),
pct_over30days = over30days/count,
over60days = sum(process_time2 > 60),
pct_over60days = over60days/count,
over90days = sum(process_time2 > 90),
pct_over90days = over90days/count,
over120days = sum(process_time2 > 120),
pct_over120days = over120days/count,
over180days = sum(process_time2 > 180),
pct_over180days = over180days/count,
over1year = sum(process_time2 > 365),
pct_over1year = over1year/count)
hi_21 <- avg_by_type_app %>%
filter (year=="2021"|str_detect(year, "^Pending"))
#Any mismatches?
hi_state %>% filter (mdy(application_received_date) != application_date)
## # A tibble: 0 × 18
## # … with 18 variables: application_received_date <chr>, applicant_name <chr>,
## # lic_type <chr>, status <chr>, type <chr>, current_license <chr>,
## # method_of_licensure <chr>, current_license_orig_date_of_licensure <chr>,
## # license_expiration_date <chr>, application_date <date>, issue_date <date>,
## # expiration_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Check conversion of type/method to app_type column
hi_state %>%
count (lic_type, app_type, method_of_licensure, type)
## # A tibble: 27 × 5
## lic_type app_type method_of_licensure type n
## <chr> <chr> <chr> <chr> <int>
## 1 LPN Endorsement R - By Reciprocity, Endo… LICF - New Li… 30
## 2 LPN Endorsement R - By Reciprocity, Endo… LICN - New Li… 441
## 3 LPN Exam E - By Exam ADDC - Additi… 1
## 4 LPN Exam E - By Exam LICF - New Li… 6
## 5 LPN Exam E - By Exam LICN - New Li… 827
## 6 LPN Reinstatement-Unknown <NA> RSTR - Restor… 97
## 7 LPN Renewal-Endorsement R - By Reciprocity, Endo… RNEW - Renewal 487
## 8 LPN Renewal-Exam E - By Exam RNEW - Renewal 924
## 9 LPN Renewal-Unknown <NA> RNEW - Renewal 256
## 10 RN Endorsement R - By Reciprocity, Endo… LICF - New Li… 361
## # … with 17 more rows
#Remove unformatted date fields and unnecessary fields
hi_state <- hi_state %>%
filter (application_date > as.Date("2021-02-21")) %>% #System migration on Feb. 22, 2021 means all records before could be wrong
select (-c(application_received_date, current_license_orig_date_of_licensure, license_expiration_date, type, method_of_licensure)) %>%
rename (name = applicant_name,
license_number = current_license,
lic_status = status)
hi_state %>%
count (lic_type, app_type)
## # A tibble: 15 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 184
## 2 LPN Exam 195
## 3 LPN Reinstatement-Unknown 45
## 4 LPN Renewal-Endorsement 487
## 5 LPN Renewal-Exam 924
## 6 LPN Renewal-Unknown 238
## 7 RN Endorsement 4314
## 8 RN Exam 909
## 9 RN Reinstatement-Endorsement 19
## 10 RN Reinstatement-Exam 14
## 11 RN Reinstatement-Unknown 322
## 12 RN Renewal-Endorsement 11967
## 13 RN Renewal-Exam 9888
## 14 RN Renewal-Unknown 2136
## 15 RN Unknown 1
str(hi_state)
## tibble [31,643 × 13] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:31643] "Carolyn Talhelm" "Jason Cole Semchak" "Alexander Springer" "DIANA R SCHEMBRI" ...
## $ lic_type : chr [1:31643] "RN" "RN" "RN" "RN" ...
## $ lic_status : chr [1:31643] "HC - Application Completed" "HC - Application Completed" "HC - Application Completed" "HC - Application Completed" ...
## $ license_number : chr [1:31643] "RN-102490-0" "RN-102491-0" "RN-102492-0" "RN-102493-0" ...
## $ application_date: Date[1:31643], format: "2021-08-12" "2021-08-13" ...
## $ issue_date : Date[1:31643], format: "2021-08-12" "2021-08-13" ...
## $ expiration_date : Date[1:31643], format: "2023-06-30" "2023-06-30" ...
## $ month : chr [1:31643] "2021-08" "2021-08" "2021-09" "2021-09" ...
## $ year : chr [1:31643] "2021" "2021" "2021" "2021" ...
## $ app_type : chr [1:31643] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
## $ process_time : 'difftime' num [1:31643] 0 0 58 62 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:31643] 0 0 58 62 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:31643] "HI" "HI" "HI" "HI" ...
#Check for license number duplicates
str(state)
## tibble [41,978 × 9] (S3: tbl_df/tbl/data.frame)
## $ application_received_date : chr [1:41978] "8/12/2021" "8/13/2021" "7/27/2021" "7/23/2021" ...
## $ applicant_name : chr [1:41978] "Carolyn Talhelm" "Jason Cole Semchak" "Alexander Springer" "DIANA R SCHEMBRI" ...
## $ license_type : chr [1:41978] "RN" "RN" "RN" "RN" ...
## $ status : chr [1:41978] "HC - Application Completed" "HC - Application Completed" "HC - Application Completed" "HC - Application Completed" ...
## $ type : chr [1:41978] "LICN - New License" "LICN - New License" "LICN - New License" "LICN - New License" ...
## $ current_license : chr [1:41978] "RN-102490-0" "RN-102491-0" "RN-102492-0" "RN-102493-0" ...
## $ method_of_licensure : chr [1:41978] "R - By Reciprocity, Endorsement or Credentials" "R - By Reciprocity, Endorsement or Credentials" "R - By Reciprocity, Endorsement or Credentials" "R - By Reciprocity, Endorsement or Credentials" ...
## $ current_license_orig_date_of_licensure: chr [1:41978] "8/12/2021" "8/13/2021" "9/23/2021" "9/23/2021" ...
## $ license_expiration_date : chr [1:41978] "6/30/2023" "6/30/2023" "6/30/2023" "6/30/2023" ...
#376 duplicate license numbers; drops to 51 after removing negative process times
dupe_numbers <- hi_state %>%
filter (!is.na(license_number)) %>% #removes pending licenses
filter (process_time2 > -1) %>% #remove negative times, errors
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
#Pull records for these duplicated people -- they all have different application types, except RN-100636-0 has two renewal-unknowns
hi_state %>%
filter (license_number %in% dupe_numbers) %>%
count (license_number, app_type) %>%
filter (n > 1)
## # A tibble: 1 × 3
## license_number app_type n
## <chr> <chr> <int>
## 1 RN-100636-0 Renewal-Unknown 2
#No duplicate removal necessary
il1 <- read_excel("../state-data/IL-Responsive Documents/2019 Nurses Applied and Issued by Endorsement.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Endorsement",
app_year = 2019,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
il2 <- read_excel("../state-data/IL-Responsive Documents/2019 Nurses Applied and Issued by Exam.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Exam",
app_year = 2019,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
il3 <- read_excel("../state-data/IL-Responsive Documents/2020 Nurses Applied and Issued by Endorsement.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Endorsement",
app_year = 2020,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
il4 <- read_excel("../state-data/IL-Responsive Documents/2020 Nurses Applied and Issued by Exam.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Exam",
app_year = 2020,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
il5 <- read_excel("../state-data/IL-Responsive Documents/2021 Nurses Applied and Issued by Endorsement.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Endorsement",
app_year = 2021,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
il6 <- read_excel("../state-data/IL-Responsive Documents/2021 Nurses Applied and Issued by Exam.xls.xlsx", skip=8) %>%
clean_names() %>%
mutate (application_type = "Exam",
app_year = 2021,
license_type = case_when (
str_detect(license_number, "^41") ~ "RN",
str_detect(license_number, "^43") ~ "LPN",
TRUE ~ "PROBLEM"))
state <- rbind (il1, il2, il3, il4, il5, il6)
#How many are missing the type? -- 12 rows, summary and blanks
state %>% filter (license_type == "PROBLEM")
## # A tibble: 12 × 12
## name license_number application_date issue_date
## <chr> <dbl> <dttm> <dttm>
## 1 <NA> NA NA NA
## 2 6098 Items Found NA NA NA
## 3 <NA> NA NA NA
## 4 10501 Items Found NA NA NA
## 5 <NA> NA NA NA
## 6 5710 Items Found NA NA NA
## 7 <NA> NA NA NA
## 8 8864 Items Found NA NA NA
## 9 <NA> NA NA NA
## 10 4635 Items Found NA NA NA
## 11 <NA> NA NA NA
## 12 4923 Items Found NA NA NA
## # … with 8 more variables: effective_date <dttm>, expiration_date <dttm>,
## # cred_city <chr>, cred_state <chr>, cred_zip_code <chr>,
## # application_type <chr>, app_year <dbl>, license_type <chr>
#Standardize columns
il_state <- state %>%
filter (license_type != "PROBLEM") %>% #removes summary and blank rows
mutate (issue_date = as.Date(issue_date),
application_date = as.Date(application_date),
effective_renewal_date = as.Date(effective_date),
expiration_date = as.Date(expiration_date),
license_number = as.character(license_number),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))
) %>%
mutate (process_time = issue_date - application_date,
data_state = "IL")
#Remove unformatted date fields and unnecessary fields
il_state <- il_state %>%
select (-c(effective_date)) %>%
rename (city = cred_city,
state = cred_state,
zip_code = cred_zip_code,
app_type = application_type,
lic_type = license_type)
il_state %>%
count(lic_type, app_type)
## # A tibble: 4 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 782
## 2 LPN Exam 3152
## 3 RN Endorsement 15661
## 4 RN Exam 21136
str(il_state)
## tibble [40,731 × 16] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:40731] "Aaren Stankovsky" "Aaron Bradley Matthews" "Aaron Mangin" "Aaron Paul Gough" ...
## $ license_number : chr [1:40731] "41.478384" "41.476619" "41.487066" "41.482213" ...
## $ application_date : Date[1:40731], format: "2019-04-20" "2019-01-28" ...
## $ issue_date : Date[1:40731], format: "2019-04-22" "2019-03-08" ...
## $ expiration_date : Date[1:40731], format: "2022-05-31" "2022-05-31" ...
## $ city : chr [1:40731] "CHICAGO" "Saint Louis" "INDIANAPOLIS" "Celina" ...
## $ state : chr [1:40731] "IL" "MO" "IN" "TX" ...
## $ zip_code : chr [1:40731] "60657" "63141-7067" "46220-3330" "75009-6329" ...
## $ app_type : chr [1:40731] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
## $ app_year : num [1:40731] 2019 2019 2019 2019 2019 ...
## $ lic_type : chr [1:40731] "RN" "RN" "RN" "RN" ...
## $ effective_renewal_date: Date[1:40731], format: "2020-05-04" "2020-03-16" ...
## $ month : chr [1:40731] "2019-04" "2019-03" "2019-10" "2019-07" ...
## $ year : chr [1:40731] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:40731] 2 39 21 47 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:40731] "IL" "IL" "IL" "IL" ...
#il_state %>%
# filter (!(is.na(business_name)|business_name=="NULL"))
# filter (!is.na(fax_number))
#Check for license number duplicates
str(state)
## tibble [40,743 × 12] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:40743] "Aaren Stankovsky" "Aaron Bradley Matthews" "Aaron Mangin" "Aaron Paul Gough" ...
## $ license_number : num [1:40743] 41.5 41.5 41.5 41.5 41.5 ...
## $ application_date: POSIXct[1:40743], format: "2019-04-20" "2019-01-28" ...
## $ issue_date : POSIXct[1:40743], format: "2019-04-22" "2019-03-08" ...
## $ effective_date : POSIXct[1:40743], format: "2020-05-04" "2020-03-16" ...
## $ expiration_date : POSIXct[1:40743], format: "2022-05-31" "2022-05-31" ...
## $ cred_city : chr [1:40743] "CHICAGO" "Saint Louis" "INDIANAPOLIS" "Celina" ...
## $ cred_state : chr [1:40743] "IL" "MO" "IN" "TX" ...
## $ cred_zip_code : chr [1:40743] "60657" "63141-7067" "46220-3330" "75009-6329" ...
## $ application_type: chr [1:40743] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
## $ app_year : num [1:40743] 2019 2019 2019 2019 2019 ...
## $ license_type : chr [1:40743] "RN" "RN" "RN" "RN" ...
#No duplicates, so no removal necessary
dupe_numbers <- il_state %>%
count (license_number) %>%
filter (n > 1)
state <- read_excel("../state-data/IA-Records Request.xlsx") %>% clean_names()
#Calculate processing time
ia_state <- state %>%
mutate (application_date = mdy(app_recd_date),
docs_date = mdy(doc_recd_date),
issue_date = mdy(org_issue),
effective_renewal_date = mdy(last_renewal),
approved_to_test_date = mdy(approved_to_test_date),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
expiration_date = mdy(exp_date),
month = substr(issue_date, 1,7),
year = if_else (app_type %in% c("LPN Renewal", "RN Renewal", "LPN Reactivation", "RN Reactivaton"), as.character(year(effective_renewal_date)), as.character(year(issue_date)))) %>%
mutate (process_time = if_else (app_type %in% c("LPN Renewal", "RN Renewal", "LPN Reactivation", "RN Reactivaton"),
effective_renewal_date - application_date, #calculates for ^^ these four types
issue_date - application_date)) #calculates for everything else that's been issued
#Check statuses for those that couldn't calculate process_time, so we can remove clearly inactive applications and calculate pending time for all others as process_time2 in next step -- all appear to be actively pending
ia_state %>%
filter (is.na(process_time)) %>%
count (lic_status) %>%
arrange (desc(n))
## # A tibble: 15 × 2
## lic_status n
## <chr> <int>
## 1 Pending Review 758
## 2 Approved to Test 428
## 3 Pending Re-exam 178
## 4 Pending Reactivation 137
## 5 Pending Approval 124
## 6 Partial Return 64
## 7 Waiver Denied 20
## 8 Pending Fee Waiver 17
## 9 New 6
## 10 Pending Board Review 5
## 11 Pending Renewal 3
## 12 Expired Temporary 2
## 13 Flagged 1
## 14 Hold 1
## 15 Ready Issue 1
ia_state <- ia_state %>%
#Calculates how long those left pending have been pending as process_time2
mutate (process_time2 = if_else (is.na(process_time), as.Date("2021-10-20") - application_date, process_time)) %>%
separate (app_type, into=c("lic_type", "app_type"), sep=" ") %>%
mutate (app_type = case_when (
app_type == "Re-Exam" ~ "Exam-retest",
app_type %in% c("Reactivaton", "Reactivation") & basis_of_lic=="Examination" ~ "Reinstatement-Exam",
app_type %in% c("Reactivaton", "Reactivation") & basis_of_lic=="Endorsement" ~ "Reinstatement-Endorsement",
app_type == "Renewal" & basis_of_lic=="Examination" ~ "Renewal-Exam",
app_type == "Renewal" & basis_of_lic=="Endorsement" ~ "Renewal-Endorsement",
TRUE ~ app_type),
data_state = "IA",
year = if_else (is.na(year), "Pending as of 10/20/21", as.character(year)))
#Any mismatches?
ia_state %>% filter (mdy(org_issue) != issue_date)
## # A tibble: 0 × 28
## # … with 28 variables: f_name <chr>, m_name <chr>, l_name <chr>, city <chr>,
## # state <chr>, duration <chr>, basis_of_lic <chr>, lic_type <chr>,
## # app_type <chr>, app_recd_date <chr>, doc_recd_date <chr>, lic_status <chr>,
## # org_issue <chr>, last_renewal <chr>, exp_date <chr>,
## # approved_to_test_date <date>, application_date <date>, docs_date <date>,
## # issue_date <date>, effective_renewal_date <date>, gather_time <drtn>,
## # docs_time <drtn>, expiration_date <date>, month <chr>, year <chr>, …
ia_state %>% filter (mdy(app_recd_date) != application_date)
## # A tibble: 0 × 28
## # … with 28 variables: f_name <chr>, m_name <chr>, l_name <chr>, city <chr>,
## # state <chr>, duration <chr>, basis_of_lic <chr>, lic_type <chr>,
## # app_type <chr>, app_recd_date <chr>, doc_recd_date <chr>, lic_status <chr>,
## # org_issue <chr>, last_renewal <chr>, exp_date <chr>,
## # approved_to_test_date <date>, application_date <date>, docs_date <date>,
## # issue_date <date>, effective_renewal_date <date>, gather_time <drtn>,
## # docs_time <drtn>, expiration_date <date>, month <chr>, year <chr>, …
#Remove unformatted date fields and unnecessary fields
ia_state <- ia_state %>%
select (-c(basis_of_lic, app_recd_date, doc_recd_date, org_issue, last_renewal, approved_to_test_date, duration, exp_date)) %>%
rename (first_name = f_name,
middle_name = m_name,
last_name = l_name)
ia_state %>%
count(lic_type, app_type)
## # A tibble: 14 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 343
## 2 LPN Exam 1988
## 3 LPN Exam-retest 134
## 4 LPN Reinstatement-Endorsement 66
## 5 LPN Reinstatement-Exam 250
## 6 LPN Renewal-Endorsement 824
## 7 LPN Renewal-Exam 5957
## 8 RN Endorsement 3902
## 9 RN Exam 5670
## 10 RN Exam-retest 886
## 11 RN Reinstatement-Endorsement 388
## 12 RN Reinstatement-Exam 866
## 13 RN Renewal-Endorsement 9026
## 14 RN Renewal-Exam 36418
str(ia_state)
## tibble [66,718 × 20] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:66718] "OLIVIA" "KIMBERLEE" "REGINA" "JENNIE" ...
## $ middle_name : chr [1:66718] "CATHERINE" "A" NA "SUZANNE" ...
## $ last_name : chr [1:66718] "BRINEGAR" "WALKER" "SEDORE" "NUEHRING" ...
## $ city : chr [1:66718] "CENTERVILLE" "OTTUMWA" "BLOOMFIELD" "WEBSTER CITY" ...
## $ state : chr [1:66718] "IA" "IA" "IA" "IA" ...
## $ lic_type : chr [1:66718] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:66718] "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" ...
## $ lic_status : chr [1:66718] "Active" "Active" "Active" "Active" ...
## $ application_date : Date[1:66718], format: "2019-01-01" "2019-01-01" ...
## $ docs_date : Date[1:66718], format: NA NA ...
## $ issue_date : Date[1:66718], format: "2007-11-21" "1983-08-01" ...
## $ effective_renewal_date: Date[1:66718], format: "2019-01-01" "2019-01-01" ...
## $ gather_time : 'difftime' num [1:66718] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:66718] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ expiration_date : Date[1:66718], format: "2022-02-15" "2022-02-15" ...
## $ month : chr [1:66718] "2007-11" "1983-08" "1998-07" "1998-08" ...
## $ year : chr [1:66718] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:66718] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:66718] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:66718] "IA" "IA" "IA" "IA" ...
#Check for license number duplicates -- license number not included, using name, city and license type instead
str(ia_state)
## tibble [66,718 × 20] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:66718] "OLIVIA" "KIMBERLEE" "REGINA" "JENNIE" ...
## $ middle_name : chr [1:66718] "CATHERINE" "A" NA "SUZANNE" ...
## $ last_name : chr [1:66718] "BRINEGAR" "WALKER" "SEDORE" "NUEHRING" ...
## $ city : chr [1:66718] "CENTERVILLE" "OTTUMWA" "BLOOMFIELD" "WEBSTER CITY" ...
## $ state : chr [1:66718] "IA" "IA" "IA" "IA" ...
## $ lic_type : chr [1:66718] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:66718] "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" ...
## $ lic_status : chr [1:66718] "Active" "Active" "Active" "Active" ...
## $ application_date : Date[1:66718], format: "2019-01-01" "2019-01-01" ...
## $ docs_date : Date[1:66718], format: NA NA ...
## $ issue_date : Date[1:66718], format: "2007-11-21" "1983-08-01" ...
## $ effective_renewal_date: Date[1:66718], format: "2019-01-01" "2019-01-01" ...
## $ gather_time : 'difftime' num [1:66718] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:66718] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ expiration_date : Date[1:66718], format: "2022-02-15" "2022-02-15" ...
## $ month : chr [1:66718] "2007-11" "1983-08" "1998-07" "1998-08" ...
## $ year : chr [1:66718] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:66718] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:66718] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:66718] "IA" "IA" "IA" "IA" ...
#These 908 records have duplicates -
dupe_numbers <- ia_state %>%
# filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#Most are exam-retests
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 6 × 2
## app_type n
## <chr> <int>
## 1 Exam-retest 434
## 2 Exam 362
## 3 Endorsement 98
## 4 Renewal-Exam 8
## 5 Reinstatement-Exam 4
## 6 Renewal-Endorsement 2
#417 have the same processing time, meaning they're true duplicates
#Only four people have differing dates/process times, only one is a pending endorsement, others are reinstatements
dupe_numbers2 <- dupe_numbers %>%
ungroup() %>%
group_by (first_name, middle_name, last_name, city, lic_type, app_type, process_time2) %>%
mutate (count = n()) %>%
# filter (count == 1) #
filter (count > 1) #to see those with repeating process_time2
#Duplicate license numbers are all temporary licenses, so no removal necessary
ia_state2 <- ia_state %>%
distinct (first_name, middle_name, last_name, city, lic_type, app_type, process_time2, .keep_all=T)
# filter (license_number %in% dupe_numbers) %>%
# count (lic_type, app_type)
# View
#Every person has a match after removing duplicates, so should be OK
anti_join (ia_state, ia_state2, by = c("first_name", "middle_name", "last_name", "city", "lic_type", "app_type", "process_time2")) #%>% View
## # A tibble: 0 × 20
## # … with 20 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state <chr>, lic_type <chr>, app_type <chr>, lic_status <chr>,
## # application_date <date>, docs_date <date>, issue_date <date>,
## # effective_renewal_date <date>, gather_time <drtn>, docs_time <drtn>,
## # expiration_date <date>, month <chr>, year <chr>, process_time <drtn>,
## # process_time2 <drtn>, data_state <chr>
#For example, Heather Lynn ALTEMEIER of West Des Moines, IA has duplicate records in ia_state, but not in ia_state2
ia_state %>% filter (first_name=="HEATHER" & last_name=="ALTEMEIER" & city=="WEST DES MOINES")
## # A tibble: 2 × 20
## first_name middle_name last_name city state lic_type app_type lic_status
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 HEATHER LYNN ALTEMEIER WEST DES … IA RN Exam Active
## 2 HEATHER LYNN ALTEMEIER WEST DES … IA RN Exam Active
## # … with 12 more variables: application_date <date>, docs_date <date>,
## # issue_date <date>, effective_renewal_date <date>, gather_time <drtn>,
## # docs_time <drtn>, expiration_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
ia_state2 %>% filter (first_name=="HEATHER" & last_name=="ALTEMEIER" & city=="WEST DES MOINES")
## # A tibble: 1 × 20
## first_name middle_name last_name city state lic_type app_type lic_status
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 HEATHER LYNN ALTEMEIER WEST DES … IA RN Exam Active
## # … with 12 more variables: application_date <date>, docs_date <date>,
## # issue_date <date>, effective_renewal_date <date>, gather_time <drtn>,
## # docs_time <drtn>, expiration_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Rename to join to full dataset
ia_state <- ia_state2
rm(ia_state2)
Import Indiana Includes Exam, Endorsement. Only includes pending/denied reinstatements (232) & renewals (14). Includes 3959 temp licenses. Includes pending
state <- read_excel("../state-data/IN-RN and LPN Applications Received 1.1.19-10.6.21.xls",
skip=1,
col_types = c("text", "text", "text", "date", "text", "date", "text","date", "text","date")) %>%
clean_names() %>%
mutate (lic_type = case_when (
between(row_number(), 2, 3319) ~ "LPN",
between(row_number(), 3322, 30754) ~ "RN",
between(row_number(), 30757, 31162) ~ "LPN-Temp",
between(row_number(), 31165, 34717) ~ "RN-Temp",
TRUE ~ "PROBLEM")) #leaves 10 rows of subtotals and total as problem to be filtered out
state %>% filter (lic_type=="PROBLEM") #10 rows of subtotals and total as problem to be filtered out
## # A tibble: 10 × 11
## name city state app_received method issue_date license_no
## <chr> <chr> <chr> <dttm> <chr> <dttm> <chr>
## 1 Licensed Practical Nur… <NA> <NA> NA <NA> NA <NA>
## 2 Licensed Practical Nur… 3318 <NA> NA <NA> NA <NA>
## 3 Registered Nurse <NA> <NA> NA <NA> NA <NA>
## 4 Registered Nurse 27433 <NA> NA <NA> NA <NA>
## 5 Temporary LPN Permit <NA> <NA> NA <NA> NA <NA>
## 6 Temporary LPN Permit 406 <NA> NA <NA> NA <NA>
## 7 Temporary RN Permit <NA> <NA> NA <NA> NA <NA>
## 8 Temporary RN Permit 3553 <NA> NA <NA> NA <NA>
## 9 GRAND TOTAL 34710 <NA> NA <NA> NA <NA>
## 10 Page -1 of 1 <NA> <NA> NA <NA> NA <NA>
## # … with 4 more variables: expiration_date <dttm>, status <chr>,
## # last_renewed <dttm>, lic_type <chr>
#Calculate processing time
in_state <- state %>%
filter (lic_type != "PROBLEM") %>%
mutate (application_date = as.Date(app_received),
issue_date = as.Date(issue_date),
effective_renewal_date = as.Date(last_renewed),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
lic_type = case_when (
lic_type == "Temp-LPN" ~ "LPN-Temp",
lic_type == "Temp-RN" ~ "RN-Temp",
TRUE ~ lic_type),
app_type = case_when (
method == "Application" ~ "Application",
method == "Examination" ~ "Exam",
TRUE ~ method)) %>%
mutate (process_time = issue_date - application_date)
#Check statuses for those that couldn't calculate process_time, so we can remove clearly inactive applications and calculate pending time for all others as process_time2 in next step -- all appear to be actively pending
in_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 6 × 2
## status n
## <chr> <int>
## 1 Pending Application 5832
## 2 Application Denied 2263
## 3 Abandoned Application 863
## 4 Reinstatement Pending 212
## 5 Withdrawn Application 196
## 6 Superseded 72
#Calculates how long those left pending have been pending as process_time2
in_state <- in_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("Pending Application", "Reinstatement Pending"),
as.Date("2021-10-07") - application_date,
process_time),
data_state = "IN", #Calculates how long those left pending have been pending
year = if_else (is.na(year), "Pending as of 10/07/21", as.character(year)))
#Any mismatches?
in_state %>% filter (as.Date(issue_date) != issue_date)
## # A tibble: 0 × 19
## # … with 19 variables: name <chr>, city <chr>, state <chr>,
## # app_received <dttm>, method <chr>, issue_date <date>, license_no <chr>,
## # expiration_date <date>, status <chr>, last_renewed <dttm>, lic_type <chr>,
## # application_date <date>, effective_renewal_date <date>, month <chr>,
## # year <chr>, app_type <chr>, process_time <drtn>, process_time2 <drtn>,
## # data_state <chr>
in_state %>% filter (as.Date(app_received) != application_date)
## # A tibble: 0 × 19
## # … with 19 variables: name <chr>, city <chr>, state <chr>,
## # app_received <dttm>, method <chr>, issue_date <date>, license_no <chr>,
## # expiration_date <date>, status <chr>, last_renewed <dttm>, lic_type <chr>,
## # application_date <date>, effective_renewal_date <date>, month <chr>,
## # year <chr>, app_type <chr>, process_time <drtn>, process_time2 <drtn>,
## # data_state <chr>
#Remove unformatted date fields and unnecessary fields
in_state <- in_state %>%
select (-c(app_received, last_renewed, method)) %>%
rename (license_number = license_no,
app_status = status)
in_state %>%
#count(lic_type, method, app_type)
count (lic_type, app_type)
## # A tibble: 9 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1389
## 2 LPN Exam 1832
## 3 LPN Reinstatement 97
## 4 LPN-Temp Application 406
## 5 RN Endorsement 13548
## 6 RN Exam 13736
## 7 RN Reinstatement 135
## 8 RN Renewal 14
## 9 RN-Temp Application 3553
str(in_state)
## tibble [34,710 × 16] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:34710] "Bolden, Marcus Levar" "Willis, Sophronia Avant" "Dornseif, Monika A" "Grant, Mari C." ...
## $ city : chr [1:34710] "Chicago" "Saint Louis" "Fort Wayne" "Lansing" ...
## $ state : chr [1:34710] "IL" "MO" "IN" "IL" ...
## $ issue_date : Date[1:34710], format: NA NA ...
## $ license_number : chr [1:34710] NA NA NA NA ...
## $ expiration_date : Date[1:34710], format: NA NA ...
## $ app_status : chr [1:34710] "Abandoned Application" "Abandoned Application" "Pending Application" "Abandoned Application" ...
## $ lic_type : chr [1:34710] "LPN" "LPN" "LPN" "LPN" ...
## $ application_date : Date[1:34710], format: "2019-03-08" "2019-03-11" ...
## $ effective_renewal_date: Date[1:34710], format: NA NA ...
## $ month : chr [1:34710] NA NA NA NA ...
## $ year : chr [1:34710] "Pending as of 10/07/21" "Pending as of 10/07/21" "Pending as of 10/07/21" "Pending as of 10/07/21" ...
## $ app_type : chr [1:34710] "Endorsement" "Endorsement" "Exam" "Endorsement" ...
## $ process_time : 'difftime' num [1:34710] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:34710] NA NA 940 NA ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:34710] "IN" "IN" "IN" "IN" ...
#Check for license number duplicates
str(state)
## tibble [34,720 × 11] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:34720] "Licensed Practical Nurse" "Bolden, Marcus Levar" "Willis, Sophronia Avant" "Dornseif, Monika A" ...
## $ city : chr [1:34720] NA "Chicago" "Saint Louis" "Fort Wayne" ...
## $ state : chr [1:34720] NA "IL" "MO" "IN" ...
## $ app_received : POSIXct[1:34720], format: NA "2019-03-08" ...
## $ method : chr [1:34720] NA "Endorsement" "Endorsement" "Examination" ...
## $ issue_date : POSIXct[1:34720], format: NA NA ...
## $ license_no : chr [1:34720] NA NA NA NA ...
## $ expiration_date: POSIXct[1:34720], format: NA NA ...
## $ status : chr [1:34720] NA "Abandoned Application" "Abandoned Application" "Pending Application" ...
## $ last_renewed : POSIXct[1:34720], format: NA NA ...
## $ lic_type : chr [1:34720] "PROBLEM" "LPN" "LPN" "LPN" ...
#1 license is duplicated
dupe_numbers <- in_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
#Only one license number is duplicated. First application was denied and reinstatement pending, so no removal necessary
in_state %>%
filter (license_number %in% dupe_numbers)
## # A tibble: 2 × 16
## name city state issue_date license_number expiration_date app_status
## <chr> <chr> <chr> <date> <chr> <date> <chr>
## 1 Tolson, K… Knox IN NA 27049201A NA Application …
## 2 Tolson, K… Knox IN NA 27049201A NA Reinstatemen…
## # … with 9 more variables: lic_type <chr>, application_date <date>,
## # effective_renewal_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Second version from KY
state <- read_excel("../state-data/KY/KY-NPR_ORR_20211109.xlsx",
col_types = c("text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "text", "date", "text", "date", "date", "date", "text", "date", "date", "date")
) %>%
clean_names() %>%
mutate (ky_licensure_date = mdy(ky_licensure_date),
ky_license_expiration_date = mdy(ky_license_expiration_date))
#Earliest and latest license issuance? (Jan 3, 2019-Nov. 9, 2021)
state %>%
summarize( range (ky_licensure_date))
## # A tibble: 2 × 1
## `range(ky_licensure_date)`
## <date>
## 1 2019-01-03
## 2 2021-11-09
#Earliest and latest application (Jan 1, 2019-Nov. 9, 2021)
state %>%
summarize( range (application_received_on))
## # A tibble: 2 × 1
## `range(application_received_on)`
## <dttm>
## 1 2019-01-01 00:00:00
## 2 2021-11-09 00:00:00
#How many of each type exist? Exam, endorsement, reinstatement (148)
state %>%
count (license_type, application_type) %>%
arrange (desc(n))
## # A tibble: 6 × 3
## license_type application_type n
## <chr> <chr> <int>
## 1 RN Examination 9028
## 2 RN Endorsement 4707
## 3 LPN Examination 1863
## 4 LPN Endorsement 544
## 5 RN Reinstatement 128
## 6 LPN Reinstatement 20
#3000 examples where a provisional license was issued before an application received
#150 cases where the licensure_date is before the application received date
state %>%
filter (ky_licensure_date < application_received_on)
## # A tibble: 150 × 28
## first_name last_name middle_name maiden_name address_line1 address_line2
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AARON QUINN KELTON <NA> 295 HEMLOCK LANE <NA>
## 2 AARON MCCOY <NA> <NA> 11509 STAFFORDSBU… <NA>
## 3 ABIGAIL DIAMOND FAITH <NA> 2615 S 9TH ST <NA>
## 4 ALEAH GRAHAM DAWN CLARK 1328 MILLDALE ROAD <NA>
## 5 ALEXIS TAMESIS JOHN <NA> 52 VALLEY VIEW RO… <NA>
## 6 ALICE JONES DIANA HOPKINS 1587 KY HWY 3004 <NA>
## 7 ALLISON HELLMANN <NA> <NA> 840 EASTERN PARKW… <NA>
## 8 ALLISON HENN MARIE BRODERICK 341 MARBLE CLIFF … <NA>
## 9 ALLISON KEOWN BROOKE PAYNE 4401 STATE ROUTE … <NA>
## 10 AMANDA JEWELL ELAINE JEWELL 3348 HANNA AVENUE <NA>
## # … with 140 more rows, and 22 more variables: city <chr>, state <chr>,
## # zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <date>, application_type <chr>,
## # application_received_on <dttm>, provisional_license_number <chr>, …
#Add count
state2 <- state %>%
filter (application_type != "Reinstatement") %>% #reinstatements will not come out correctly so remove them
group_by (ky_license_number) %>%
mutate (count = n())
#Calculate for people who applied more than once because their times will be off.
#Use process time from first application and process_time_prov from first application
dupes <- state2 %>%
filter (count > 1) %>%
arrange (last_name, first_name, application_received_on) %>%
mutate (issue_date = as.Date(ky_licensure_date),
expiration_date = as.Date(ky_license_expiration_date),
application_date = as.Date(application_received_on),
provisional_issue_date = as.Date(provisional_issue_date),
temporary_issue_date = as.Date(temporary_issue_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
app_year = year (application_date)) %>%
mutate (process_time = issue_date - application_date,
process_time_prov = provisional_issue_date - application_date,
process_time_temp = temporary_issue_date - application_date)
#Keep only first application date, leaves no negative process times
#Few negative provisional license times, all from early 2019, meaning initial application was likely in 2018 and not included here. Could pull from other document for 10 licenses affected
dupes2 <- dupes %>%
slice_head (n=1)
#Compare all the various applications with their process times, looks like provisional licenses were always issued on first application
dupes_count <- dupes %>%
group_by (ky_license_number, count) %>%
mutate (lic_app = paste0(license_type, "-", application_type)) %>%
summarise(Type = toString(lic_app),
App_Dates = toString(application_received_on),
Issue_Dates = toString(ky_licensure_date),
Issue_time = toString(process_time),
Prov_date = toString(provisional_issue_date),
Prov_time = toString(process_time_prov)
) %>%
separate(Type, into = paste0("Type", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
separate(App_Dates, into = paste0("App", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
separate(Issue_Dates, into = paste0("Iss", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
separate(Issue_time, into = paste0("Time", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
separate(Prov_date, into = paste0("Prov", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
separate(Prov_time, into = paste0("PTime", 1:9), sep = ", ", fill = "right", extra = "drop") %>%
ungroup()
#Clean up, keeping only first application for those with multiple
dupes <- state2 %>%
filter (count > 1) %>%
arrange (last_name, first_name, application_received_on) %>%
slice_head (n=1)
#Only 1856 temporary licenses given out, from uncleaned data
state %>%
filter (!is.na(temporary_issue_date))
## # A tibble: 1,856 × 28
## first_name last_name middle_name maiden_name address_line1 address_line2
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AARON MCCOY <NA> <NA> 11509 STAFFORD… <NA>
## 2 AARON MCCOY <NA> <NA> 11509 STAFFORD… <NA>
## 3 ABAGAIL LUCAS MARIE <NA> 6705 KERN DRIVE <NA>
## 4 ABBEY MORENO-GAFT WINTER <NA> 860 SUMMERVILL… <NA>
## 5 ABHILASHA NEUMAIER SAMUEL RAGINI CHAND 501 INDEPENDEN… <NA>
## 6 ABHILASHA NEUMAIER SAMUEL RAGINI CHAND 501 INDEPENDEN… <NA>
## 7 ABIGAEL ESTER VICTORIA ESTER 229 FRONT STRE… <NA>
## 8 ABIGAIL CHUA ASHLEY <NA> 3310 CARDIFF A… APT 103
## 9 ABIGAIL SHIRLEY LEE <NA> 106 GREENVIEW … <NA>
## 10 ADA MALONE C <NA> PO BOX 2865 <NA>
## # … with 1,846 more rows, and 22 more variables: city <chr>, state <chr>,
## # zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <date>, application_type <chr>,
## # application_received_on <dttm>, provisional_license_number <chr>, …
#Calculate processing time
state3 <- state2 %>%
filter (count == 1) %>% #keep only people in there once
rbind (dupes) %>% #add correct record for those with multiples back in
ungroup() %>%
mutate (issue_date = as.Date(ky_licensure_date),
application_date = as.Date(application_received_on),
expiration_date = as.Date(ky_license_expiration_date),
provisional_issue_date = as.Date(provisional_issue_date),
temporary_issue_date = as.Date(temporary_issue_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
app_year = year (application_date),
app_type = case_when (
application_type == "Examination" ~ "Exam",
application_type == "Endorsement" ~ "Endorsement") ) %>%
mutate (process_time = issue_date - application_date,
process_time_prov = provisional_issue_date - application_date,
process_time_temp = temporary_issue_date - application_date)
#How many license times come out negative? 150, 148 are reinstatements
state3 %>%
filter (process_time < 0 ) %>%
#arrange (process_time)
count (application_type)
## # A tibble: 0 × 2
## # … with 2 variables: application_type <chr>, n <int>
#filter (application_type != "Reinstatement")
#How many license times don't calculate? #no errors
state3 %>%
filter (is.na(process_time))#%>%
## # A tibble: 0 × 39
## # … with 39 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <date>, application_type <chr>, …
#arrange (process_time)
#count (application_type)
#How many prov license times come out negative? 30, after removing duplicates. 1241, almost all are examination before removing dupes
state3 %>%
filter (process_time_prov < 0 ) %>%
count (application_type)
## # A tibble: 1 × 2
## application_type n
## * <chr> <int>
## 1 Examination 30
#How many prov license times don't calculate? #5502, mostly endorsement
state3 %>%
ungroup() %>%
filter (is.na(process_time_prov)) %>%
count (application_type)
## # A tibble: 2 × 2
## application_type n
## * <chr> <int>
## 1 Endorsement 5160
## 2 Examination 342
#How many temp license times come out negative? 10
state3 %>%
filter (process_time_temp < 0 )
## # A tibble: 0 × 39
## # … with 39 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <date>, application_type <chr>, …
#How many temp license times don't calculate? #13000, mostly exam
state2 %>%
ungroup () %>%
filter (is.na(temporary_issue_date)) %>%
count (application_type)
## # A tibble: 2 × 2
## application_type n
## * <chr> <int>
## 1 Endorsement 3502
## 2 Examination 10863
#Create dataframe of only provisional licenses
ky_prov <- state3 %>%
filter (!is.na(process_time_prov)) %>% #remove NA times
filter (process_time_prov > -1) %>% #remove 10 negative times
mutate (lic_type = paste0(license_type, "-Provisional")) %>%
#Keep only details for provisional license issuance and rename
select (-c(process_time, process_time_temp, ky_license_number, ky_licensure_date, ky_license_expiration_date, temporary_license_number, temporary_issue_date, temporary_expiration_date, temporary_void_date)) %>%
rename (process_time = process_time_prov,
ky_license_number = provisional_license_number,
ky_licensure_date = provisional_issue_date,
ky_license_expiration_date = provisional_expiration_date)
#Any provisional void dates different from expiration date? NONE
ky_prov %>%
filter (ky_license_expiration_date != provisional_void_date)
## # A tibble: 0 × 31
## # … with 31 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # application_type <chr>, application_received_on <dttm>,
## # ky_license_number <chr>, ky_licensure_date <date>, …
#Create dataframe of only temporary licenses
ky_temp <- state3 %>%
filter (!is.na(process_time_temp)) %>% #remove NA times
filter (process_time_temp > -1) %>% #remove 10 negative times
mutate (lic_type = paste0(license_type, "-Temp")) %>%
select (-c(process_time, process_time_prov, ky_license_number, ky_licensure_date, ky_license_expiration_date, provisional_license_number, provisional_issue_date, provisional_expiration_date, provisional_void_date)) %>%
rename (process_time = process_time_temp,
ky_license_number = temporary_license_number,
ky_licensure_date = temporary_issue_date ,
ky_license_expiration_date = temporary_expiration_date )
#Any temp void dates different from exporation date? NONE
ky_temp %>%
filter (ky_license_expiration_date != temporary_void_date)
## # A tibble: 0 × 31
## # … with 31 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # application_type <chr>, application_received_on <dttm>,
## # ky_license_number <chr>, ky_licensure_date <date>, …
state4 <- state3 %>%
select (-c(process_time_prov, process_time_temp, provisional_license_number, provisional_issue_date, provisional_expiration_date, provisional_void_date, temporary_license_number, temporary_issue_date, temporary_expiration_date, temporary_void_date)) %>%
mutate (lic_type = license_type)
#Current data has one row if a person got a permanent, provisional and temporary license. This create new one row per person and their multiple license types.
ky_state <- bind_rows (state4, ky_prov, ky_temp) %>%
select (-c(temporary_void_date, provisional_void_date)) #These are same as expiration dates, so remove
#Check for missing data
ky_state %>%
filter (is.na(lic_type))
## # A tibble: 0 × 30
## # … with 30 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <dttm>, application_type <chr>, …
ky_state %>%
filter (is.na(license_type))
## # A tibble: 0 × 30
## # … with 30 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <dttm>, application_type <chr>, …
ky_state %>%
filter (is.na(ky_licensure_date))
## # A tibble: 0 × 30
## # … with 30 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <dttm>, application_type <chr>, …
ky_state %>%
filter (is.na(application_date))
## # A tibble: 0 × 30
## # … with 30 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <dttm>, application_type <chr>, …
#Any mismatches?
ky_state %>% filter (as.Date(application_received_on) != application_date)
## # A tibble: 0 × 30
## # … with 30 variables: first_name <chr>, last_name <chr>, middle_name <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, city <chr>,
## # state <chr>, zip <chr>, county_of_residence <chr>, ethnic_group <chr>,
## # highest_education_level <chr>, prelicensure_education_preparation <chr>,
## # license_type <chr>, state_of_original_license <chr>,
## # ky_license_number <chr>, ky_licensure_date <date>,
## # ky_license_expiration_date <dttm>, application_type <chr>, …
#Remove unformatted date fields and unnecessary fields
ky_state <- ky_state %>%
mutate (data_state = "KY") %>%
select (-c(license_type, application_received_on, ky_licensure_date, ky_license_expiration_date, application_type, count)) %>%
rename (license_number = ky_license_number,
zip_code = zip,
county = county_of_residence,
ethnicity = ethnic_group)
ky_state %>%
count(lic_type, app_type)
## # A tibble: 12 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 541
## 2 LPN Exam 1715
## 3 LPN-Provisional Endorsement 2
## 4 LPN-Provisional Exam 1639
## 5 LPN-Temp Endorsement 173
## 6 LPN-Temp Exam 7
## 7 RN Endorsement 4623
## 8 RN Exam 7926
## 9 RN-Provisional Endorsement 2
## 10 RN-Provisional Exam 7630
## 11 RN-Temp Endorsement 1536
## 12 RN-Temp Exam 16
str(ky_state)
## tibble [25,810 × 25] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:25810] "AALAIAH" "AARON" "AARON" "AARON" ...
## $ last_name : chr [1:25810] "ALDRIDGE" "COLE" "GUSTIN" "HUTCHISON" ...
## $ middle_name : chr [1:25810] "KENDAL" "BRANDT" "MATTHEW" "KYLE" ...
## $ maiden_name : chr [1:25810] NA NA NA NA ...
## $ address_line1 : chr [1:25810] "121 BREIGHTON CIRCLE" "74 RACHEL WAY" "612 SOUTH 6TH STREET" "3570 OLYMPIA RD" ...
## $ address_line2 : chr [1:25810] "APT 925" NA NA NA ...
## $ city : chr [1:25810] "SHELBYVILLE" "LANCASTER" "IRONTON" "LEXINGTON" ...
## $ state : chr [1:25810] "KY" "KY" "OH" "KY" ...
## $ zip_code : chr [1:25810] "40065" "40444" "45638" "40517" ...
## $ county : chr [1:25810] "SHELBY" "GARRARD" "OUT OF STATE" "FAYETTE" ...
## $ ethnicity : chr [1:25810] "CAUCASIAN" "CAUCASIAN" "CAUCASIAN" "CAUCASIAN" ...
## $ highest_education_level : chr [1:25810] "ASSOCIATE DEGREE/NURSING" "ASSOCIATE DEGREE/NURSING" "DIPLOMA NURSING (RN)" "BACCALAUREATE/NURSING" ...
## $ prelicensure_education_preparation: chr [1:25810] "RN-ASSOCIATE DEGREE" "RN-ASSOCIATE DEGREE" "RN-DIPLOMA" "RN-BACCALAUREATE OR HIGHER" ...
## $ state_of_original_license : chr [1:25810] "KY" "KY" "KY" "KY" ...
## $ license_number : chr [1:25810] "1165750" "1171701" "1169353" "1167630" ...
## $ issue_date : Date[1:25810], format: "2020-01-10" "2021-03-30" ...
## $ application_date : Date[1:25810], format: "2019-10-22" "2020-11-17" ...
## $ expiration_date : Date[1:25810], format: "2022-10-31" "2022-10-31" ...
## $ month : chr [1:25810] "2020-01" "2021-03" "2020-09" "2020-06" ...
## $ year : chr [1:25810] "2020" "2021" "2020" "2020" ...
## $ app_year : num [1:25810] 2019 2020 2020 2020 2019 ...
## $ app_type : chr [1:25810] "Exam" "Exam" "Exam" "Exam" ...
## $ process_time : 'difftime' num [1:25810] 80 133 28 99 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:25810] "RN" "RN" "RN" "RN" ...
## $ data_state : chr [1:25810] "KY" "KY" "KY" "KY" ...
#Check for license number duplicates
str(state)
## tibble [16,290 × 28] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:16290] "AALAIAH" "AALIYAH" "AALIYAH" "AARON" ...
## $ last_name : chr [1:16290] "ALDRIDGE" "ELMORE" "ELMORE" "COLE" ...
## $ middle_name : chr [1:16290] "KENDAL" "SUSAN" "SUSAN" "BRANDT" ...
## $ maiden_name : chr [1:16290] NA NA NA NA ...
## $ address_line1 : chr [1:16290] "121 BREIGHTON CIRCLE" "3612 W WARWICK DRIVE" "3612 W WARWICK DRIVE" "74 RACHEL WAY" ...
## $ address_line2 : chr [1:16290] "APT 925" NA NA NA ...
## $ city : chr [1:16290] "SHELBYVILLE" "PEORIA" "PEORIA" "LANCASTER" ...
## $ state : chr [1:16290] "KY" "IL" "IL" "KY" ...
## $ zip : chr [1:16290] "40065" "61615" "61615" "40444" ...
## $ county_of_residence : chr [1:16290] "SHELBY" "OUT OF STATE" "OUT OF STATE" "GARRARD" ...
## $ ethnic_group : chr [1:16290] "CAUCASIAN" "AFRICAN AMERICAN" "AFRICAN AMERICAN" "CAUCASIAN" ...
## $ highest_education_level : chr [1:16290] "ASSOCIATE DEGREE/NURSING" "BACCALAUREATE/NURSING" "BACCALAUREATE/NURSING" "ASSOCIATE DEGREE/NURSING" ...
## $ prelicensure_education_preparation: chr [1:16290] "RN-ASSOCIATE DEGREE" "RN-BACCALAUREATE OR HIGHER" "RN-BACCALAUREATE OR HIGHER" "RN-ASSOCIATE DEGREE" ...
## $ license_type : chr [1:16290] "RN" "RN" "RN" "RN" ...
## $ state_of_original_license : chr [1:16290] "KY" "KY" "KY" "KY" ...
## $ ky_license_number : chr [1:16290] "1165750" "1171921" "1171921" "1171701" ...
## $ ky_licensure_date : Date[1:16290], format: "2020-01-10" "2021-05-02" ...
## $ ky_license_expiration_date : Date[1:16290], format: "2022-10-31" "2022-10-31" ...
## $ application_type : chr [1:16290] "Examination" "Examination" "Examination" "Examination" ...
## $ application_received_on : POSIXct[1:16290], format: "2019-10-22" "2020-11-23" ...
## $ provisional_license_number : chr [1:16290] "20193094" "20203649" "20203649" "20210396" ...
## $ provisional_issue_date : POSIXct[1:16290], format: "2019-12-18" "2020-12-16" ...
## $ provisional_expiration_date : POSIXct[1:16290], format: "2020-01-10" "2021-02-21" ...
## $ provisional_void_date : POSIXct[1:16290], format: NA "2021-02-21" ...
## $ temporary_license_number : chr [1:16290] NA NA NA NA ...
## $ temporary_issue_date : POSIXct[1:16290], format: NA NA ...
## $ temporary_expiration_date : POSIXct[1:16290], format: NA NA ...
## $ temporary_void_date : POSIXct[1:16290], format: NA NA ...
#No license numbers are duplicated. No removal necessary.
dupe_numbers <- ky_state %>%
#filter (!is.na(ky_license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n = n()) %>%
filter (n > 1)
state <- read_excel("../state-data/LA-NPR Public Records Request.LSBPNE.xlsx",
col_types = c("date", "text", "text", "text", "text", "text", "text", "text", "text", "text", "date", "date")) %>%
clean_names()
#All renewals have negative processing times -- they provided original application, not renewal application data
la_state <- state %>%
filter (!str_detect (application_type, "Renewal")) %>%
#combine 59 miscategorized rows
mutate (application_type = if_else (str_detect(application_type, "^Use this application"), "LPN Initial by Exam Retest Application", application_type)) %>%
separate (application_type, into=c("lic_type", "app_type"), sep="\\s", extra = "merge") %>%
mutate (app_type = case_when (
app_type == "Endorsement Application" ~ "Endorsement",
app_type == "Initial by Exam Application" ~ "Exam",
app_type == "Initial by Exam Retest Application" ~ "Exam-retest")) %>%
mutate (application_date = as.Date(application_submitted_date),
issue_date = as.Date(issue_date),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
la_state %>%
filter (is.na(process_time)) %>%
count (application_status) %>%
arrange (desc(n))
## # A tibble: 12 × 2
## application_status n
## <chr> <int>
## 1 Void 316
## 2 Processing 284
## 3 Incomplete 184
## 4 Stopped 156
## 5 Approved-Retest 152
## 6 Retest 112
## 7 Expired 106
## 8 Closed 34
## 9 Administrative Denial 13
## 10 Denied 3
## 11 Ready to issue 1
## 12 Withdrawn 1
#Calculates how long those left pending have been pending as process_time2
la_state <- la_state %>%
mutate (process_time2 = if_else (is.na(process_time) & application_status %in% c("Approved-Retest", "Incomplete", "Processing", "Ready to issue", "Retest"),
as.Date("2021-10-06") - application_date,
process_time),#Calculates how long those left pending have been pending
data_state = "LA",
year = if_else (is.na(year), "Pending as of 10/06/21", as.character(year)))
#Any mismatches?
la_state %>% filter (as.Date(application_submitted_date) != application_date)
## # A tibble: 0 × 19
## # … with 19 variables: application_submitted_date <dttm>, lic_type <chr>,
## # app_type <chr>, application_status <chr>, license_status <chr>,
## # first_name <chr>, middle_name <chr>, last_name <chr>, city <chr>,
## # state_province <chr>, duration <chr>, issue_date <date>,
## # expiration_date <date>, application_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
la_state <- la_state %>%
select (-c(application_submitted_date)) %>%
rename (state = state_province,
app_status = application_status,
lic_status = license_status)
la_state %>%
count(lic_type, app_type)
## # A tibble: 3 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 745
## 2 LPN Exam 2737
## 3 LPN Exam-retest 662
str(la_state)
## tibble [4,144 × 18] (S3: tbl_df/tbl/data.frame)
## $ lic_type : chr [1:4144] "LPN" "LPN" "LPN" "LPN" ...
## $ app_type : chr [1:4144] "Endorsement" "Exam" "Endorsement" "Exam" ...
## $ app_status : chr [1:4144] "Expired" "Approved" "Expired" "Approved" ...
## $ lic_status : chr [1:4144] "NULL" "Inactive - Delinquent" "NULL" "Active" ...
## $ first_name : chr [1:4144] "KAYLEIGH" "SEMONE" "EMMA" "MEGAN" ...
## $ middle_name : chr [1:4144] "MARY" "RENEE" "KATHERINE" "MARIE" ...
## $ last_name : chr [1:4144] "COTTO" "REED" "LADELFA" "BROWN" ...
## $ city : chr [1:4144] "BERLIN" "NATCHEZ" "PALMYRA" "WEST MONROE" ...
## $ state : chr [1:4144] "CT" "MS" "NY" "LA" ...
## $ duration : chr [1:4144] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ issue_date : Date[1:4144], format: NA "2019-07-09" ...
## $ expiration_date : Date[1:4144], format: NA "2020-01-31" ...
## $ application_date: Date[1:4144], format: "2019-06-03" "2019-05-20" ...
## $ month : chr [1:4144] NA "2019-07" NA "2019-07" ...
## $ year : chr [1:4144] "Pending as of 10/06/21" "2019" "Pending as of 10/06/21" "2019" ...
## $ process_time : 'difftime' num [1:4144] NA 50 NA 44 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:4144] NA 50 NA 44 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:4144] "LA" "LA" "LA" "LA" ...
#Check for license number duplicates
str(state)
## tibble [46,998 × 12] (S3: tbl_df/tbl/data.frame)
## $ application_submitted_date: POSIXct[1:46998], format: "2019-06-03 05:00:00" "2019-05-20 05:00:00" ...
## $ application_type : chr [1:46998] "LPN Endorsement Application" "LPN Initial by Exam Application" "LPN Endorsement Application" "LPN Initial by Exam Application" ...
## $ application_status : chr [1:46998] "Expired" "Approved" "Expired" "Approved" ...
## $ license_status : chr [1:46998] "NULL" "Inactive - Delinquent" "NULL" "Active" ...
## $ first_name : chr [1:46998] "KAYLEIGH" "SEMONE" "EMMA" "MEGAN" ...
## $ middle_name : chr [1:46998] "MARY" "RENEE" "KATHERINE" "MARIE" ...
## $ last_name : chr [1:46998] "COTTO" "REED" "LADELFA" "BROWN" ...
## $ city : chr [1:46998] "BERLIN" "NATCHEZ" "PALMYRA" "WEST MONROE" ...
## $ state_province : chr [1:46998] "CT" "MS" "NY" "LA" ...
## $ duration : chr [1:46998] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ issue_date : POSIXct[1:46998], format: NA "2019-07-09" ...
## $ expiration_date : POSIXct[1:46998], format: NA "2020-01-31" ...
dupe_numbers <- la_state %>%
filter (!is.na(process_time2)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 3 × 2
## app_type n
## <chr> <int>
## 1 Exam 363
## 2 Exam-retest 223
## 3 Endorsement 184
#57 have the same processing time, meaning they're true duplicates
dupe_numbers2 <- dupe_numbers %>%
ungroup() %>%
group_by (first_name, middle_name, last_name, city, lic_type, app_type, process_time2) %>%
mutate (count = n()) %>%
# filter (count == 1) #
filter (count > 1) #to see those with repeating process_time2
#Duplicate license numbers are all temporary licenses, so no removal necessary
la_state2 <- la_state %>%
distinct (first_name, middle_name, last_name, city, lic_type, app_type, process_time2, .keep_all=T)
#Every person has a match after removing duplicates, so should be OK
anti_join (la_state, la_state2, by = c("first_name", "middle_name", "last_name", "city", "lic_type", "app_type", "process_time2")) #%>%
## # A tibble: 0 × 18
## # … with 18 variables: lic_type <chr>, app_type <chr>, app_status <chr>,
## # lic_status <chr>, first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state <chr>, duration <chr>, issue_date <date>,
## # expiration_date <date>, application_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#View
#For example, Heather Lynn ALTEMEIER of West Des Moines, IA has duplicate records in ia_state, but not in ia_state2
la_state %>% filter (first_name=="EDITH" & last_name=="CAMPBELL" & city=="Abbeville")
## # A tibble: 4 × 18
## lic_type app_type app_status lic_status first_name middle_name last_name city
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 LPN Exam Approved Inactive EDITH MERIE KNEA… CAMPBELL Abbe…
## 2 LPN Exam Approved Active EDITH MERIE KNEA… CAMPBELL Abbe…
## 3 LPN Exam Approved Inactive EDITH MERIE KNEA… CAMPBELL Abbe…
## 4 LPN Exam Approved Active EDITH MERIE KNEA… CAMPBELL Abbe…
## # … with 10 more variables: state <chr>, duration <chr>, issue_date <date>,
## # expiration_date <date>, application_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
la_state2 %>% filter (first_name=="EDITH" & last_name=="CAMPBELL" & city=="Abbeville")
## # A tibble: 2 × 18
## lic_type app_type app_status lic_status first_name middle_name last_name city
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 LPN Exam Approved Inactive EDITH MERIE KNEA… CAMPBELL Abbe…
## 2 LPN Exam Approved Active EDITH MERIE KNEA… CAMPBELL Abbe…
## # … with 10 more variables: state <chr>, duration <chr>, issue_date <date>,
## # expiration_date <date>, application_date <date>, month <chr>, year <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Rename to join to full dataset
la_state <- la_state2
rm(la_state2)
state <- read_excel("../state-data/MA/Copy of Application Status Report - Nursing 9.23.21.xlsx") %>% clean_names()
str(state)
## tibble [53,843 × 6] (S3: tbl_df/tbl/data.frame)
## $ license_type : chr [1:53843] "Certified Nurse Midwife -- CNM" "Certified Nurse Midwife -- CNM" "Certified Nurse Midwife -- CNM" "Certified Nurse Midwife -- CNM" ...
## $ full_name : chr [1:53843] "Julie Patel" "Jessica Marie Bridge" "Amy Elizabeth DeSorgher" "Lisa Anne Kelley" ...
## $ addr_line_4 : chr [1:53843] "Milford MA 01757" "Weymouth MA 02190" "Myerstown PA 17067" "Holden MA 01520" ...
## $ app_date : POSIXct[1:53843], format: "2019-01-17" "2019-01-28" ...
## $ license_status: chr [1:53843] "Current" "Current" "Current" "Current" ...
## $ obtained_by : chr [1:53843] "Application" "Application" "Application" "Application" ...
#Calculate processing time
state2 <- state %>%
mutate (application_date = ymd(app_date),
app_month = substr(app_date, 1,7),
app_year = year(application_date)) %>%
rename (license_type_name = license_type)
state2 %>%
count (license_status)
## # A tibble: 17 × 2
## license_status n
## * <chr> <int>
## 1 Current 39694
## 2 Current - pending SSN 26
## 3 Current Covid-19 4
## 4 Deceased 6
## 5 Deleted 2
## 6 Expired 2102
## 7 Expired - SSN Affidavit 17
## 8 Inactive 16
## 9 Non-Disciplinary Restriction 2
## 10 Null and Void 2521
## 11 Pending 8267
## 12 Probation 1
## 13 Revoked 1
## 14 Revoked; Expired 1
## 15 Suspension 1
## 16 Voluntary Surrender; Expired 1
## 17 Withdrawn 1181
#Import license data downloaded from https://madph.mylicense.com/verification/Search.aspx on Oct. 19, 2021.
ma_rns <- read_csv ("../state-data/MA/MA_RN_data_101921_date.csv",
col_types = cols(.default = "c", expiration_date = "D", issue_date = "D"))
ma_rns_tmp <- read_csv ("../state-data/MA/MA_RNtemp_data_101921.csv",
col_types = cols(.default = "c", expiration_date = "D", issue_date = "D"))
ma_lpns <- read_csv ("../state-data/MA/MA_LPN_data_101921.csv",
col_types = cols(.default = "c", expiration_date = "D", issue_date = "D"))
ma_lpns_tmp <- read_csv ("../state-data/MA/MA_LPNtemp_data_101921.csv",
col_types = cols(.default = "c", expiration_date = "D", issue_date = "D"))
ma_nurses <- rbind (ma_rns, ma_rns_tmp, ma_lpns, ma_lpns_tmp) %>%
filter (issue_date > as.Date("2018-12-31") | is.na(issue_date)) %>% #keep only licenses issued 2019-present
filter (expiration_date > as.Date("2018-12-31") ) # keep those that didn't expire before Jan. 1, 2019
#How many are missing issue dates? >> only 75 current don't know issue date and 1 COVID-19
ma_nurses %>% filter (is.na(issue_date)) %>%
count (license_status_name) %>%
arrange (desc(n))
## # A tibble: 2 × 2
## license_status_name n
## <chr> <int>
## 1 Current 75
## 2 Expired 26
#How many LPNs/RNs applied? 34,000 RNs, 10000 Temp RNs, 3600 LPNs, 1359 Temp LPNs
state3 <- state2 %>%
filter (license_type_name %in% c("Licensed Practical Nurse", "LPN Temporary", "Registered Nurse", "RN Temporary")) %>%
filter (license_status != "Withdrawn") %>% #remove withdrawn applications
mutate (full_name = case_when ( #clean up name mismatches between provided application date spreadsheet and downloaded nurse roster
full_name == "Kaitlin Elizabeth Quirk" ~ "Kaitlin Elizabeth Quirk-Maxwell",
full_name == "Helen Suzanne Cawley" ~ "Eleni Suzanne Cawley",
full_name == "La¿Kisha Bennings" ~ "La¬øKisha Bennings",
full_name == "Stacy LeAnn Symonds" ~ "Stacy LeAnn Sylez",
full_name == "Jennifer LaChance" ~ "Jennifer L LaChance",
full_name == "Janette Robinson" ~ "Janette Marie Robinson",
full_name == "Lilibet E Pleitez Lones" ~ "Lilibet E. Ponce Pleitez",
full_name == "Candis D’Anne Calvert" ~ "Candis D‚ÄôAnne Calvert",
full_name == "Lilibet E Pleitez Lones" ~ "Lilibet E. Ponce Pleitez",
full_name == "Sara Ann Quinn" ~ "Sara Quinn-Black",
full_name == "Tye’sha Keirra Glover" ~ "Tye‚Äôsha Keirra Glover",
full_name == "Elaine Loretta Geigenmiller" ~ "Elaine Loretta Zamora",
full_name == "Ka’Juanica Donise Ervin" ~ "Ka‚ÄôJuanica Donise Ervin",
full_name == "Amanda Nache¿ Spates" ~ "Amanda Nache¬ø Spates",
full_name == "Brooke Littlefield" ~ "Brooke Ashley Littlefield",
full_name == "DEON Boris MALONEY" ~ "Deon Boris Maloney",
full_name == "Desirea Schmidt" ~ "Desirea Susanne Schmidt",
full_name == "claudia patricia Betancourt" ~ "Claudia Patricia Betancourt",
full_name == "Samantha Genova" ~ "Samantha Jacqlyn Genova",
full_name == "Jordin Alexa LeBouf" ~ "Jordin Lore",
full_name == "Anny Karolyne Lellys de Abrantes" ~ "Anny Karolyne Abrantes",
full_name == "Karleen Shanie Shorette" ~ "Karleen Shania Shorette",
full_name == "Pierre Garcès Carrenard" ~ "Pierre Garc√®s Carrenard",
full_name == "Stepahnie Agnes Onoh" ~ "Stephanie Agnes Onoh",
full_name == "Chistibel Nyanta" ~ "Christabel Nyanta",
full_name == "Rebe Dawn Toothaker" ~ "Rebecca Dawn Toothaker",
full_name == "Jaclyn Chavez" ~ "Jaclyn Jeanette Chavez",
full_name == "Elissa Crum" ~ "Elissa Surran Crum",
full_name == "Carley McQuinn" ~ "Carley Rayne McQuinn",
full_name == "kelly Jean Bond" ~ "Kelly Jean Bond",
full_name == "Mary Elise O’Sullivan" ~ "Mary Elise O‚ÄôSullivan",
full_name == "Ebony T Jenkins" ~ "Ebony Tyece Jenkins",
full_name == "Jesús J Rivera Rodríguez" ~ "Jes√∫s J Rivera Rodr√≠guez",
full_name == "Shelby Martocchio" ~ "Shelby Lauer", #name change, same city, timeframe
full_name == "Bethany Anne Borders-Whitehead" ~ "Bethany Bjork", #name change, same city, timeframe
TRUE ~ full_name
)) %>%
filter (!(full_name =="Sean Michael Burnett" & application_date=="2020-05-01")) %>% #filter out second temp RN license causing join issues
filter (!(full_name =="Joy Comasua Serrano" & application_date=="2021-06-17")) %>% #filter out second temp RN license causing join issues
filter (!(full_name =="Adrianna Baptista" & application_date=="2021-02-09")) %>% #filter out second temp RN license causing join issues
filter (!(full_name =="Michelle Andrea Smith" & license_status=="Null and Void")) %>% #filter out second temp RN license causing join issues
arrange (full_name, application_date)
#Remove pending applications from applicant list, otherwise it will screw up join later
ma_pending <- state3 %>%
filter (license_status == "Pending")
state3 <- state3 %>%
filter (license_status != "Pending")
#4 names won't match
#ma_nurses2b %>%
# filter (license_status=="Current" & is.na(year)) %>%
# arrange (app_date)
#Status of these licenses? << 7624 pending; 36111 current; 2021 expired
state3 %>%
count (obtained_by)
## # A tibble: 4 × 2
## obtained_by n
## * <chr> <int>
## 1 Archive Record 1
## 2 COVID-19 Emergency Order 11000
## 3 Examination 13646
## 4 Reciprocity 15952
repeats <- state3 %>%
filter (license_status != "Withdrawn") %>%
group_by (full_name, addr_line_4) %>%
mutate (count = n()) %>%
arrange (desc(count), full_name)
#Join app data with issuance data
ma_nurses2 <- state3 %>%
inner_join (ma_nurses, by = c("full_name", "license_type_name", "addr_line_4")) %>%
mutate (process_time = issue_date - application_date)
#Previous attempts
#ma_nurses2a <- state3 %>%
# full_join (ma_nurses, by = c("full_name", "license_type_name")) %>%
# mutate (process_time = issue_date - application_date) %>%
# group_by (full_name, license_type_name) %>%
# mutate (count = n()) %>%
# filter (!(count>1 & app_year != year (issue_date))) #remove duplicates whose app_year doesn't match issue_year, added by joining errors
two_people_same_name <- ma_nurses %>%
count (full_name, license_type_name) %>%
filter (n > 1)
#Dealing with two app dates in same year, add row id and only join first one as best guess
#https://community.rstudio.com/t/dplyr-joins-dealing-with-multiple-matches-duplicates-in-key-column/14528
print(Sys.time())
## [1] "2022-03-17 17:45:43 MST"
ma_nurses2c <- left_join ( #can't join by addr_line_4 (11,000 changes between application dataset and licensee database)
state3 %>% group_by(full_name, license_type_name) %>% mutate(id = row_number()),
ma_nurses %>% group_by(full_name, license_type_name) %>% mutate(id = row_number()),
by = c("full_name", "license_type_name", "id") ) %>%
#Manually fix duplicate names where two people in different cities have been mixed up
mutate (issue_date = case_when (
full_name == "Jessica Lynne Harwood" ~ as.Date("2021-08-31"),
full_name == "Gregory Desir" ~ as.Date("2019-04-12"), #following fixed by arranging state3 by full_name, issue_date
#full_name == "Erin Elizabeth Quigley" & addr_line_4.x == "Shrewsbury MA 01545" ~ as.Date("2021-07-07"),
#full_name == "Erin Elizabeth Quigley" & addr_line_4.x == "Lebanon NH 03766" ~ as.Date("2019-11-01"),
#full_name == "Elizabeth McNamara" & addr_line_4.x == "Lima OH 45804" & license_type_name=="Registered Nurse" ~ as.Date("2020-05-29"),
#full_name == "Elizabeth McNamara" & addr_line_4.x == "Saint Paul MN 55125" & license_type_name=="Registered Nurse" ~ as.Date("2020-05-28"),
#full_name == "Jessica Lynn Brown" & addr_line_4.x == "Brighton MA 02135" & license_type_name=="Registered Nurse" ~ as.Date("2021-07-20"),
#full_name == "Jessica Lynn Brown" & addr_line_4.x == "Marion SC 29571" & license_type_name=="Registered Nurse" ~ as.Date("2021-09-08"),
#full_name == "Stephanie Edwards" & addr_line_4.x == "Deland FL 32724" ~ as.Date("2021-09-17"),
#full_name == "Stephanie Edwards" & addr_line_4.x == "Umatilla FL 32784" ~ as.Date("2021-04-21"),
TRUE ~ issue_date),
year = year(issue_date),
process_time = issue_date - application_date) %>%
group_by (full_name, license_type_name) %>%
mutate (count = n())
print(Sys.time())
## [1] "2022-03-17 17:46:28 MST"
#checking for applications for two people with the same name -- need to manually check these people
double_check_these <- inner_join (two_people_same_name, state3, by = "full_name") %>%
distinct (full_name, addr_line_4, .keep_all=T) %>%
pull(full_name)
#Find two people with same name, non-matching addresses, likely matched backwards to fix manually above -- look for addr_line_4 switched
ma_nurses2c %>%
ungroup() %>%
filter (full_name %in% double_check_these) %>%
arrange (full_name) %>%
# filter (addr_line_4.x != addr_line_4.y) %>%
select (1:3, addr_line_4.y, license_status, obtained_by, application_date, issue_date, process_time)
## # A tibble: 70 × 9
## license_type_name full_name addr_line_4.x addr_line_4.y license_status
## <chr> <chr> <chr> <chr> <chr>
## 1 RN Temporary Amira Aljamal ALLEN PARK MI … <NA> Current
## 2 RN Temporary Amira Aljamal Allen Park MI … <NA> Current
## 3 RN Temporary Angelita O U… Edinburg TX 7… <NA> Current
## 4 RN Temporary Angelita O U… Edinburg TX 7… <NA> Current
## 5 Registered Nurse Ashley Eliza… Plymouth MA 0… Plymouth MA … Current
## 6 Registered Nurse Ashley Eliza… Bradford ME 0… Bradford ME … Current
## 7 RN Temporary Ashley Smith Newburyport MA… <NA> Current
## 8 RN Temporary Ashley Smith Lynwood IL 60… <NA> Current
## 9 LPN Temporary Caryn M Punt… Waterbury CT … <NA> Current
## 10 LPN Temporary Caryn M Punt… Waterbury CT … <NA> Current
## # … with 60 more rows, and 4 more variables: obtained_by <chr>,
## # application_date <date>, issue_date <date>, process_time <drtn>
#check for duplicates - should keep only those with multiple application dates
ma_nurses2c %>%
count (full_name, license_type_name) %>%
arrange (desc(n))
## # A tibble: 40,568 × 3
## # Groups: full_name, license_type_name [40,568]
## full_name license_type_name n
## <chr> <chr> <int>
## 1 Amira Aljamal RN Temporary 2
## 2 Angelita O Umbay RN Temporary 2
## 3 Ashley Elizabeth Smith Registered Nurse 2
## 4 Ashley Smith RN Temporary 2
## 5 Benjamin Nicholas Jacey Registered Nurse 2
## 6 Caryn M Punter LPN Temporary 2
## 7 Danielle Marie Jusseaume Registered Nurse 2
## 8 Elizabeth McNamara Registered Nurse 2
## 9 Emily Rose Matthews Registered Nurse 2
## 10 Erin Elizabeth Quigley Registered Nurse 2
## # … with 40,558 more rows
#4 currently licensed applicants don't match?!
ma_nurses2c %>%
filter (license_status=="Current" & is.na(year)) %>%
arrange (application_date)
## # A tibble: 1 × 24
## # Groups: full_name, license_type_name [1]
## license_type_name full_name addr_line_4.x app_date license_status
## <chr> <chr> <chr> <dttm> <chr>
## 1 Registered Nurse Benjamin N… Arlington MA… 2021-04-26 00:00:00 Current
## # … with 19 more variables: obtained_by <chr>, application_date <date>,
## # app_month <chr>, app_year <dbl>, id <int>, license_no <chr>,
## # license_status_name <chr>, expiration_date <date>, addr_line_1 <chr>,
## # addr_line_2 <chr>, addr_line_4.y <chr>, addr_city <chr>, addr_county <chr>,
## # addr_state <chr>, addr_zipcode <chr>, issue_date <date>, year <dbl>,
## # process_time <drtn>, count <int>
#Join pending cases back in and calculate how long they've been pending
ma_nurses3 <- ma_nurses2c %>%
bind_rows (ma_pending) %>%
mutate (addr_line_4.x = coalesce (addr_line_4.x, addr_line_4)) %>%
select (-addr_line_4) %>%
rename (addr_line_4 = addr_line_4.x) %>%
mutate (process_time2 = if_else (is.na(process_time) & license_status %in% c("Pending"),
as.Date("2021-10-15") - application_date,
process_time),
lic_type = case_when (
license_type_name == "Licensed Practical Nurse" ~ "LPN",
license_type_name == "LPN Temporary" ~ "LPN-Temp",
license_type_name == "Registered Nurse" ~ "RN",
license_type_name == "RN Temporary" ~ "RN-Temp"),
app_type = case_when (
obtained_by == "Reciprocity" ~ "Endorsement",
obtained_by == "Examination" ~ "Exam",
obtained_by == "COVID-19 Emergency Order" ~ "Application",
TRUE ~ obtained_by))
ma_state <- ma_nurses3 %>%
ungroup()
#114 null and void, temporary licenses where applicant has also received permanent license
mismatches <- ma_nurses3 %>%
filter (is.na(process_time2)) %>%
arrange (application_date)
mismatches %>% ungroup() %>% count (license_type_name, license_status)
## # A tibble: 6 × 3
## license_type_name license_status n
## <chr> <chr> <int>
## 1 LPN Temporary Null and Void 7
## 2 Registered Nurse Current 1
## 3 Registered Nurse Deleted 1
## 4 Registered Nurse Expired 8
## 5 RN Temporary Deceased 1
## 6 RN Temporary Null and Void 103
#pulls out all mismatches' names from join to confirm almost all had multiple applications, but only one issue date for permanent license included in MA data
check <- semi_join (ma_nurses3, mismatches, by = c("full_name")) %>%
arrange (full_name) %>%
select (1:6, issue_date)
#11 exceptions with only one row -- these just didn't match nurse database, some expired decades ago, others just are missing.
check %>%
ungroup() %>%
count (full_name) %>%
filter (n != 2)
## # A tibble: 11 × 2
## full_name n
## <chr> <int>
## 1 Barbara Ruth Kass 1
## 2 Carolyn Miller Cerco 1
## 3 Daphcar Depaliste 1
## 4 Elizabeth Alice Maxson 1
## 5 Lynne Patricia Jensen 1
## 6 Marguerite A Goulet 1
## 7 Mary Louise Clarkin Simmons 1
## 8 mercedes mary stapleton 1
## 9 Nance Elizabeth Eaton 1
## 10 Nancy Jane Burton 1
## 11 Sue Tree 1
address_check <- ma_nurses3 %>%
filter (addr_line_4 != addr_line_4.y)
#These two people have identical names, but obviously got switched: Elizabeth McNamara, Erin Elizabeth Quigley, Jessica Lynn Brown
address_check %>%
group_by (full_name) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (full_name)
## # A tibble: 0 × 28
## # Groups: full_name [0]
## # … with 28 variables: license_type_name <chr>, full_name <chr>,
## # addr_line_4 <chr>, app_date <dttm>, license_status <chr>,
## # obtained_by <chr>, application_date <date>, app_month <chr>,
## # app_year <dbl>, id <int>, license_no <chr>, license_status_name <chr>,
## # expiration_date <date>, addr_line_1 <chr>, addr_line_2 <chr>,
## # addr_line_4.y <chr>, addr_city <chr>, addr_county <chr>, addr_state <chr>,
## # addr_zipcode <chr>, issue_date <date>, year <dbl>, process_time <drtn>, …
#Look for missing processing times? 9463 entries
#4985 active licenses << those who applied before Jan. 1, 2019 not included in application dataset, and will be missing, so 2019's data will be off.
no_process_time <- ma_nurses3 %>%
ungroup() %>%
filter (is.na(process_time2)) %>%
count (license_type_name, license_status_name, license_status, year) %>%
arrange (desc(n))
#check types of applications
ma_nurses3 %>%
ungroup() %>%
count (obtained_by)
## # A tibble: 4 × 2
## obtained_by n
## * <chr> <int>
## 1 Archive Record 1
## 2 COVID-19 Emergency Order 11293
## 3 Examination 16041
## 4 Reciprocity 20888
#Remove unformatted date fields and unnecessary fields
ma_state <- ma_state %>%
#Combine addr_line_4 with addr_city, addr_state, addr_zipcode fields
mutate (city = if_else (is.na(addr_city),
str_remove_all(addr_line_4, " ([A-Z]{2}) +\\d{5}.*"),
addr_city),
state = if_else (is.na(addr_state),
str_extract(addr_line_4, "[A-Z]{2}"),
addr_state),
zip_code = if_else (is.na(addr_zipcode),
str_extract(addr_line_4, "[0-9]{5}"),
addr_zipcode)) %>%
mutate (zip_code = str_pad(zip_code, 5, pad="0"),
year = if_else (is.na(year), "Pending as of 10/15/21", as.character(year)),
data_state = "MA") %>%
select (-c(app_date, addr_line_4, addr_city, addr_state, addr_zipcode, id, count, license_type_name, obtained_by, addr_line_4.y)) %>%
rename (app_status = license_status,
license_number = license_no,
name = full_name,
lic_status = license_status_name,
address_line1 = addr_line_1,
address_line2 = addr_line_2,
county = addr_county)
ma_state %>%
count(lic_type, app_type)
## # A tibble: 7 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1105
## 2 LPN Exam 2402
## 3 LPN-Temp Application 1311
## 4 RN Archive Record 1
## 5 RN Endorsement 19783
## 6 RN Exam 13639
## 7 RN-Temp Application 9982
#ma_state %>%
# count(lic_status, lic_status_scrape) %>%
# arrange (n)
ma_state %>%
count (year)
## # A tibble: 4 × 2
## year n
## * <chr> <int>
## 1 2019 8689
## 2 2020 16627
## 3 2021 15162
## 4 Pending as of 10/15/21 7745
str(ma_state)
## tibble [48,223 × 21] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:48223] "A Scout Dellamia" "Aaisha Khaled Abdallah" "Aakanksha Pravin Surti" "Aaliyah Felder-Fitzpatrick" ...
## $ app_status : chr [1:48223] "Current" "Current" "Current" "Current" ...
## $ application_date: Date[1:48223], format: "2021-01-04" "2021-08-02" ...
## $ app_month : chr [1:48223] "2021-01" "2021-08" "2021-07" "2020-06" ...
## $ app_year : num [1:48223] 2021 2021 2021 2020 2021 ...
## $ license_number : chr [1:48223] "RN-TEMP4678" "RN2355935" "RN2356353" "RN2347473" ...
## $ lic_status : chr [1:48223] "Current" "Current" "Current" "Current" ...
## $ expiration_date : Date[1:48223], format: "2021-12-31" "2022-08-19" ...
## $ address_line1 : chr [1:48223] NA "6924 Hartwell St" "4711 N Hermitage Ave" "10 Pomona Drive" ...
## $ address_line2 : chr [1:48223] NA NA NA NA ...
## $ county : chr [1:48223] NA "Wayne" "Cook" "Plymouth" ...
## $ issue_date : Date[1:48223], format: "2021-01-06" "2021-08-12" ...
## $ year : chr [1:48223] "2021" "2021" "2021" "2020" ...
## $ process_time : 'difftime' num [1:48223] 2 10 33 109 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:48223] 2 10 33 109 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:48223] "RN-Temp" "RN" "RN" "RN" ...
## $ app_type : chr [1:48223] "Application" "Endorsement" "Endorsement" "Exam" ...
## $ city : chr [1:48223] "Littleton" "Dearborn" "Chicago" "Brockton" ...
## $ state : chr [1:48223] "CO" "MI" "IL" "MA" ...
## $ zip_code : chr [1:48223] "80128" "48126" "60640" "02302" ...
## $ data_state : chr [1:48223] "MA" "MA" "MA" "MA" ...
#Check for license number duplicates
#No license numbers are duplicated. No removal necessary.
dupe_numbers <- ma_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n = n()) %>%
filter (n > 1)
state <- read_excel("../state-data/ME/Days to Licensure Merge.xlsx",
#col_types = c("date", "text", "text", "text", "text", "text", "text", "text", "text", "text", "date", "date")
) %>%
clean_names()
#Calculate processing time
state2 <- state %>%
mutate (application_date = as.Date(pend_date_app_submission),
issue_date = as.Date(lic_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (lic_type = if_else (str_detect (license, "^RN"), "RN", "LPN"),
process_time = issue_date - application_date,
app_type = case_when (
origin == "ENDORSEMENT" ~ "Endorsement",
origin == "EXAMINATION" ~ "Exam")) %>%
filter (!is.na(app_type)) #remove 1 "NOT APPLICABLE" exception
#Pull in Maine's pending nurses scraped from https://www.pfr.maine.gov/ALMSOnline/ALMSQuery/SearchIndividual.aspx#validationSummary:
pending_me <- read_csv ("/Users/austinfast/Documents/GitHub/Maine-nursing-licenses/outputs/Pending SCRAPES/Pending_nurses_all_2021-10-23.csv") %>%
filter (app_date <= as.Date("2021-10-21")) %>%
filter (!(name %in% c("LORI ANN HOWARD", "ALICIA VIRGINIA CAREY", "JESSICA GERTRUDE CAMPBELL", "ELIZABETH PATRICIA COREY", "CAMILLE RALPHENE DUMOIT"))) %>% #remove those approved Oct. 21 and no longer pending
select (name, status, address, type, app_status, app_date, gender, phone) %>%
mutate (pend_date_app_submission = app_date,
lic_type = if_else (str_detect (type, "^RN"), "RN", "LPN"),
app_type = if_else (str_detect (type, "Reciprocity"), "Endorsement", "Exam"),
process_time = as.Date("2021-10-21") - app_date,
year = "Pending as of 10/21/21") %>%
rename (application_date = app_date,
lic_stat = status,
origin = type)
me_state <- bind_rows(state2, pending_me) %>%
mutate (data_state = "ME")
#Any mismatches?
me_state %>% filter (as.Date(pend_date_app_submission) != application_date)
## # A tibble: 0 × 21
## # … with 21 variables: board <dbl>, board_name <chr>, license <chr>,
## # name <chr>, pend_date_app_submission <dttm>, lic_date <dttm>,
## # lic_stat <chr>, origin <chr>, days <dbl>, application_date <date>,
## # issue_date <date>, month <chr>, year <chr>, lic_type <chr>,
## # process_time <drtn>, app_type <chr>, address <chr>, app_status <chr>,
## # gender <chr>, phone <chr>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
me_state <- me_state %>%
mutate (city = str_to_sentence(str_remove_all(address, ", ([A-Z]{2}) +\\d{5}.*")),
state = str_extract(address, " [A-Z]{2} "),
zip_code = str_extract(address, "[0-9]{5}"),
lic_status = case_when (
lic_stat == "A" ~ "Active",
lic_stat == "D" ~ "Denied",
lic_stat == "I" ~ "Inactive",
lic_stat == "T" ~ "Terminated",
lic_stat == "W" ~ "Withdrawn",
lic_stat == "X" ~ "X") ) %>%
select (-c(board, board_name, pend_date_app_submission, lic_date, origin, days, app_status, address, lic_stat)) %>%
rename (license_number = license)
me_state %>%
count(lic_type, app_type, year)
## # A tibble: 12 × 4
## lic_type app_type year n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement 2020 54
## 2 LPN Endorsement 2021 71
## 3 LPN Endorsement Pending as of 10/21/21 103
## 4 LPN Exam 2020 16
## 5 LPN Exam 2021 33
## 6 LPN Exam Pending as of 10/21/21 11
## 7 RN Endorsement 2020 1010
## 8 RN Endorsement 2021 1186
## 9 RN Endorsement Pending as of 10/21/21 807
## 10 RN Exam 2020 787
## 11 RN Exam 2021 889
## 12 RN Exam Pending as of 10/21/21 89
str(me_state)
## tibble [5,056 × 16] (S3: tbl_df/tbl/data.frame)
## $ license_number : chr [1:5056] "RN75869" "RN75867" "RN75865" "RN75864" ...
## $ name : chr [1:5056] "ASHLEY ELIZABETH MASON" "KELSEA RAELYNN KUVAJA" "LILLIE MARIE LAROCHELLE" "JORDAN JAMES ROSS" ...
## $ application_date: Date[1:5056], format: "2020-01-02" "2020-01-02" ...
## $ issue_date : Date[1:5056], format: "2020-01-27" "2020-02-01" ...
## $ month : chr [1:5056] "2020-01" "2020-02" "2020-02" "2020-02" ...
## $ year : chr [1:5056] "2020" "2020" "2020" "2020" ...
## $ lic_type : chr [1:5056] "RN" "RN" "RN" "RN" ...
## $ process_time : 'difftime' num [1:5056] 25 30 36 54 ...
## ..- attr(*, "units")= chr "days"
## $ app_type : chr [1:5056] "Exam" "Exam" "Exam" "Endorsement" ...
## $ gender : chr [1:5056] NA NA NA NA ...
## $ phone : chr [1:5056] NA NA NA NA ...
## $ data_state : chr [1:5056] "ME" "ME" "ME" "ME" ...
## $ city : chr [1:5056] NA NA NA NA ...
## $ state : chr [1:5056] NA NA NA NA ...
## $ zip_code : chr [1:5056] NA NA NA NA ...
## $ lic_status : chr [1:5056] "Active" "Active" "Active" "Active" ...
#Check for license number duplicates
#4 license numbers are duplicated.
dupe_numbers <- me_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (license_number, application_date)
dupe_numbers <- dupe_numbers %>% pull(license_number)
#Pull out correct row of duplicated lines
me_state2 <- me_state %>%
filter (license_number %in% dupe_numbers) %>%
group_by (license_number) %>%
slice_head()
#Remove all rows of these duplicated lines, and then add back in correct ones
me_state3 <- me_state %>%
filter (!(license_number %in% dupe_numbers)) %>%
rbind (me_state2)
#check correct records remain
me_state3 %>% filter (license_number %in% dupe_numbers)
## # A tibble: 4 × 16
## license_number name application_date issue_date month year lic_type
## <chr> <chr> <date> <date> <chr> <chr> <chr>
## 1 RN42797 JAMES M. BRO… 2021-03-11 2021-03-22 2021-… 2021 RN
## 2 RN75969 BARBARA LYNN… 2020-01-20 2020-02-18 2020-… 2020 RN
## 3 RN76083 PAULA LOUISE… 2020-02-13 2021-02-11 2021-… 2021 RN
## 4 RN80176 EMILY PAIGE … 2021-07-23 2021-08-02 2021-… 2021 RN
## # … with 9 more variables: process_time <drtn>, app_type <chr>, gender <chr>,
## # phone <chr>, data_state <chr>, city <chr>, state <chr>, zip_code <chr>,
## # lic_status <chr>
me_state <- me_state3
rm (me_state3, me_state2, pending_me)
state <- read_csv ("../state-data/MI-PROD__simple_saved.csv",
col_types = cols(.default = "c", LICENSE_INITIAL_ISSUE_DATE = "D", LICENSE_EXP_DATE = "D", APPLICATION_RENEWAL_RELICENSURE_FILE_DATE = "D")) %>%
clean_names ()
#Calculate processing time
mi_state <- state %>%
mutate (application_date = as.Date(application_renewal_relicensure_file_date),
issue_date = as.Date(license_initial_issue_date),
expiration_date = as.Date(license_exp_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
mi_state %>%
filter (is.na(process_time)) %>%
count (application_renewal_relicensure_status) %>%
arrange (desc(n))
## # A tibble: 7 × 2
## application_renewal_relicensure_status n
## <chr> <int>
## 1 In Review 3676
## 2 Withdrawn 1051
## 3 Authorized to Take Exam 994
## 4 Additional Info Required 145
## 5 Not Eligible for Exam 23
## 6 Application Denied 2
## 7 License Issued 1
#Calculates how long those left pending have been pending as process_time2
mi_state <- mi_state %>%
mutate (process_time2 = if_else (is.na(process_time) & application_renewal_relicensure_status %in% c("In Review", "Authorized to Take Exam", "Additional Info Required", "License Issued"),
as.Date("2021-10-20") - application_date,
process_time)) %>%
mutate (year = if_else (is.na(year), "Pending as of 10/20/21", as.character(year))) %>%
mutate (app_type = case_when (
license_obtained_by == "Examination" ~ "Exam",
license_obtained_by=="Relicensure ~" ~ "Reinstatement-Unknown",
license_obtained_by=="Relicensure ~ Examination" ~ "Reinstatement-Exam",
license_obtained_by=="Relicensure ~ Endorsement" ~ "Reinstatement-Endorsement",
license_obtained_by=="Renewal ~" ~ "Renewal-Unknown",
license_obtained_by=="Renewal ~ Examination" ~ "Renewal-Exam",
license_obtained_by=="Renewal ~ Endorsement" ~ "Renewal-Endorsement",
is.na(license_obtained_by) ~ "Unknown",
TRUE ~ license_obtained_by),
lic_type = case_when (
license_type == "Licensed Practical Nurse License" ~ "LPN",
license_type == "Registered Nurse License" & is.na(license_obtained_by) ~ "RN-Temp",
license_type == "Registered Nurse License" ~ "RN",
license_type == "Registered Nurse Temporary License" ~ "RN-Temp",
str_detect(application_renewal_relicensure_type, "^Registered Nurse") ~ "RN",
str_detect(application_renewal_relicensure_type, "^Licensed Practical Nurse Application") ~ "LPN"),
data_state = "MI")
#Any mismatches?
mi_state %>% filter (as.Date(application_renewal_relicensure_file_date) != application_date)
## # A tibble: 0 × 27
## # … with 27 variables: topic <chr>, first_name <chr>, middle_name <chr>,
## # last_name <chr>, name_suffix <chr>, city <chr>, state <chr>,
## # license_id <chr>, license_type <chr>, license_initial_issue_date <date>,
## # license_exp_date <date>, license_status <chr>, license_obtained_by <chr>,
## # application_renewal_relicensure_rec_id <chr>,
## # application_renewal_relicensure_file_date <date>,
## # application_renewal_relicensure_type <chr>, …
#Can I tell what type of application those pending are? No
mi_state %>%
ungroup() %>%
filter (is.na(license_obtained_by))
## # A tibble: 8,583 × 27
## topic first_name middle_name last_name name_suffix city state license_id
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1. Apps… JoAnn <NA> Rome-Sess… Rome-Sessi Cinc… OH <NA>
## 2 1. Apps… Kimberly Ann Mc Clure <NA> New … MI <NA>
## 3 1. Apps… Lauren E Wynalda <NA> Wyom… MI <NA>
## 4 1. Apps… Janell Louise Jones <NA> Berr… MI <NA>
## 5 1. Apps… Tammy Marie Cheatham-… <NA> Monr… MI <NA>
## 6 1. Apps… LaShonda Michelle Bynum <NA> Tayl… MI <NA>
## 7 1. Apps… William Thomas Poet <NA> Manc… MI <NA>
## 8 1. Apps… Shiny <NA> Joseph <NA> Sali… Wilt… <NA>
## 9 1. Apps… zecel mae gonzaga baylon <NA> Ster… MI <NA>
## 10 1. Apps… Stevi Danielle Dominguez <NA> Hurl… NM <NA>
## # … with 8,573 more rows, and 19 more variables: license_type <chr>,
## # license_initial_issue_date <date>, license_exp_date <date>,
## # license_status <chr>, license_obtained_by <chr>,
## # application_renewal_relicensure_rec_id <chr>,
## # application_renewal_relicensure_file_date <date>,
## # application_renewal_relicensure_type <chr>,
## # application_renewal_relicensure_status <chr>, application_date <date>, …
#Remove unformatted date fields and unnecessary fields
mi_state <- mi_state %>%
select (-c(application_renewal_relicensure_file_date, license_initial_issue_date, license_obtained_by, license_exp_date, application_renewal_relicensure_rec_id, application_renewal_relicensure_type, topic, license_type)) %>%
rename (suffix = name_suffix,
license_number = license_id,
lic_status = license_status,
app_status = application_renewal_relicensure_status) %>%
#Fix suffix irregularities in Michigan
mutate (suffix = str_replace_all (suffix, "\\.", NA_character_)) %>%
mutate (suffix = str_replace_all (suffix, "\\`", NA_character_)) %>%
mutate (suffix = str_replace_all (suffix, "03/13/1974", NA_character_)) %>%
mutate (suffix = if_else (str_to_lower(suffix)==str_to_lower(last_name) & data_state=="MI", NA_character_, suffix))
mi_state %>%
count(lic_type, app_type)
## # A tibble: 19 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 700
## 2 LPN Exam 1901
## 3 LPN Reinstatement-Endorsement 159
## 4 LPN Reinstatement-Exam 435
## 5 LPN Reinstatement-Unknown 1
## 6 LPN Renewal-Endorsement 3737
## 7 LPN Renewal-Exam 22402
## 8 LPN Renewal-Unknown 69
## 9 LPN Unknown 863
## 10 RN Endorsement 9276
## 11 RN Exam 10284
## 12 RN Reinstatement-Endorsement 1208
## 13 RN Reinstatement-Exam 1720
## 14 RN Reinstatement-Unknown 13
## 15 RN Renewal-Endorsement 31400
## 16 RN Renewal-Exam 168533
## 17 RN Renewal-Unknown 869
## 18 RN Unknown 5040
## 19 RN-Temp Unknown 2680
str(mi_state)
## tibble [261,290 × 19] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:261290] "JoAnn" "Kimberly" "Lauren" "Janell" ...
## $ middle_name : chr [1:261290] NA "Ann" "E" "Louise" ...
## $ last_name : chr [1:261290] "Rome-Session" "Mc Clure" "Wynalda" "Jones" ...
## $ suffix : chr [1:261290] "Rome-Sessi" NA NA NA ...
## $ city : chr [1:261290] "Cincinnati" "New Baltimore" "Wyoming" "Berrien Springs" ...
## $ state : chr [1:261290] "OH" "MI" "MI" "MI" ...
## $ license_number : chr [1:261290] NA NA NA NA ...
## $ lic_status : chr [1:261290] NA NA NA NA ...
## $ app_status : chr [1:261290] "In Review" "Withdrawn" "Withdrawn" "Withdrawn" ...
## $ application_date: Date[1:261290], format: "2019-01-01" "2019-01-02" ...
## $ issue_date : Date[1:261290], format: NA NA ...
## $ expiration_date : Date[1:261290], format: NA NA ...
## $ month : chr [1:261290] NA NA NA NA ...
## $ year : chr [1:261290] "Pending as of 10/20/21" "Pending as of 10/20/21" "Pending as of 10/20/21" "Pending as of 10/20/21" ...
## $ process_time : 'difftime' num [1:261290] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:261290] 1023 NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ app_type : chr [1:261290] "Unknown" "Unknown" "Unknown" "Unknown" ...
## $ lic_type : chr [1:261290] "RN" "RN" "RN" "RN" ...
## $ data_state : chr [1:261290] "MI" "MI" "MI" "MI" ...
#Check for license number duplicates
#4 license numbers are duplicated.
dupe_numbers <- mi_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (license_number, application_date)
#Lots of duplicate numbers, but most are renewals and reinstatements, which all have negative process times anyway
dupe_numbers %>%
filter (process_time2 > -1) %>%
ungroup() %>%
count (lic_type, app_type) %>%
arrange (desc(n))
## # A tibble: 7 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN Exam 358
## 2 RN Endorsement 311
## 3 RN-Temp Unknown 201
## 4 LPN Exam 118
## 5 LPN Endorsement 9
## 6 LPN Unknown 5
## 7 RN Renewal-Exam 1
dupe_numbers %>% filter (process_time2 > -1)
## # A tibble: 1,003 × 20
## # Groups: license_number [1,002]
## first_name middle_name last_name suffix city state license_number lic_status
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Shacoya <NA> Jackson <NA> Warr… MI 4703120085 Active
## 2 Ashley D Oredsen <NA> Lesl… MI 4703120092 Active
## 3 Crystal Joy Spraggins <NA> Detr… MI 4703120268 Active
## 4 Jeriesha <NA> Brown <NA> Detr… MI 4703120402 Active
## 5 Diane <NA> Gordon <NA> Detr… MI 4703120568 Active
## 6 Vanessa A Delgado <NA> Stur… MI 4703120595 Active
## 7 Charmaniq… Nicole Darnell <NA> Detr… MI 4703120676 Active
## 8 Amezeuki <NA> Aduwa <NA> warr… MI 4703120784 Active
## 9 Tobias LeMar Meux <NA> Nobl… IN 4703120806 Active
## 10 DeMelchiz… DeMille O'… Israel <NA> Jasp… AL 4703120832 Active
## # … with 993 more rows, and 12 more variables: app_status <chr>,
## # application_date <date>, issue_date <date>, expiration_date <date>,
## # month <chr>, year <chr>, process_time <drtn>, process_time2 <drtn>,
## # app_type <chr>, lic_type <chr>, data_state <chr>, n <int>
dupe_numbers2 <- dupe_numbers %>%
filter (process_time2 > -1) %>% #Keep only positive process times
mutate (count = n()) %>% #recount
filter (count > 1) #Find true duplicates, those with more than one exam/endorsement
#Keeps only one person, and one record is renewal, so no duplicate removal necessary.
sheets <- excel_sheets(path = "../state-data/MN-npr_data_request_v2.xlsx")
state <- tibble()
for (i in sheets){
#i <- "Q1 21"
step_df <-read_excel("../state-data/MN-npr_data_request_v2.xlsx",
sheet = i) %>%
clean_names() %>%
mutate (source = i)
#mutate (entry_date = as_date(entry_date),
# last_item_complete_date = as_date(last_item_complete_date),
# issue_date = as_date(issue_date)#,
#sheet = i
# )
state <- bind_rows (state, step_df)
}
#Renewals - app date is almost always same as renewal date
state %>% filter (source=="Renewals") %>%
filter (as.Date(application_date) != as.Date(renewal_date))
## # A tibble: 2,540 × 15
## last_name first_name middle_name city state application_date source
## <chr> <chr> <chr> <chr> <chr> <dttm> <chr>
## 1 Aaseth Amber Lee ELLENDA… MN 1999-07-19 00:00:00 Renew…
## 2 Abagi Francis Agboegbulem Woodbury MN 2019-02-19 13:32:00 Renew…
## 3 Abdibudul Hodan Khalif EDEN PR… MN 2017-05-01 11:32:00 Renew…
## 4 Abebe Adey <NA> Brookly… MN 1999-07-26 00:00:00 Renew…
## 5 Abernathy-L… Jessica Markel Selbyvi… DE 2020-03-03 09:31:09 Renew…
## 6 Abiaziem Caroline Onyege MAPLEWO… MN 2019-10-22 09:54:00 Renew…
## 7 Abiaziem Caroline Onyege MAPLEWO… MN 2021-10-15 09:37:04 Renew…
## 8 Abou-Zeid Molly Kate Saint P… MN 2019-07-26 14:05:00 Renew…
## 9 Abraham Jinu Plamannil Cottage… MN 2021-03-15 10:14:50 Renew…
## 10 Adamich Michele Mooney MINNETO… MN 2021-02-23 08:30:41 Renew…
## # … with 2,530 more rows, and 8 more variables: permit_issue_date <dttm>,
## # permit_expire_date <dttm>, issue_date <dttm>, application_type <chr>,
## # expire_date <dttm>, renewal_date <dttm>, license_type <chr>,
## # rereg_appl_date <dttm>
#None with rereg_appl also have application_date field
state %>%
filter (!is.na(rereg_appl_date) & !is.na(application_date))
## # A tibble: 0 × 15
## # … with 15 variables: last_name <chr>, first_name <chr>, middle_name <chr>,
## # city <chr>, state <chr>, application_date <dttm>, source <chr>,
## # permit_issue_date <dttm>, permit_expire_date <dttm>, issue_date <dttm>,
## # application_type <chr>, expire_date <dttm>, renewal_date <dttm>,
## # license_type <chr>, rereg_appl_date <dttm>
#None with renewal date also have issue_date field
state %>%
filter (!is.na(issue_date) & !is.na(renewal_date))
## # A tibble: 0 × 15
## # … with 15 variables: last_name <chr>, first_name <chr>, middle_name <chr>,
## # city <chr>, state <chr>, application_date <dttm>, source <chr>,
## # permit_issue_date <dttm>, permit_expire_date <dttm>, issue_date <dttm>,
## # application_type <chr>, expire_date <dttm>, renewal_date <dttm>,
## # license_type <chr>, rereg_appl_date <dttm>
state2 <- state %>%
mutate (issue_date = as.Date(coalesce(permit_issue_date, issue_date, renewal_date)),
expiration_date = as.Date(coalesce(permit_expire_date, expire_date)),
application_date = as.Date(coalesce(rereg_appl_date, application_date)),
license_type = coalesce(application_type, license_type))
#Only applicants have no license_type
state2 %>% filter (is.na (license_type)) %>% count (source)
## # A tibble: 2 × 2
## source n
## * <chr> <int>
## 1 EndorsementApplicants 7337
## 2 ExamApplicants 2808
repeats <- state2 %>%
#filter (str_detect(source, "Applicant")) %>%
group_by (first_name, middle_name, last_name, city, state) %>%
mutate (count = n()) %>%
filter (count > 1) %>%
arrange (desc(count), last_name, first_name)
state3 <- state2 %>%
mutate (lic_type = case_when (
source=="EndorsementApplicants" & !is.na(issue_date) ~ "Unknown-Temp",
#source=="ExamApplicants" ~ "Unknown-Exam",
#source=="EndorsementApplicants" ~ "Unknown-Endorsement",
license_type=="LP" ~ "LPN",
license_type=="RN" ~ "RN",
TRUE ~ "Unknown"),
app_type = case_when (
str_detect(source, "Exam") ~ "Exam",
str_detect(source, "Endorse") ~ "Endorsement",
str_detect(source, "Renewal") ~ "Renewal",
str_detect(source, "Reregistrations") ~ "Reinstatement"),
process_time = issue_date - application_date)
#Any negatives? 1588 -- all renewals and temp permits
state3 %>%
filter (process_time < 0) %>%
count (lic_type, app_type)
## # A tibble: 3 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Renewal 370
## 2 RN Renewal 928
## 3 Unknown-Temp Endorsement 290
#Only the unknowns (from applicant sheets failed to calculate, meaning these are the pending files)
state3 %>%
filter (is.na(process_time)) %>%
count (lic_type, app_type)
## # A tibble: 2 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 Unknown Endorsement 2146
## 2 Unknown Exam 2808
state3 %>% count (lic_type, app_type)
## # A tibble: 11 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 828
## 2 LPN Exam 2377
## 3 LPN Reinstatement 905
## 4 LPN Renewal 25315
## 5 RN Endorsement 11609
## 6 RN Exam 10703
## 7 RN Reinstatement 4112
## 8 RN Renewal 150675
## 9 Unknown Endorsement 2146
## 10 Unknown Exam 2808
## 11 Unknown-Temp Endorsement 5191
mn_state <- state3 %>%
mutate (month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (year = if_else (is.na(year), "Pending as of 11/23/21", as.character(year)),
data_state = "MN")
#Remove unformatted date fields and unnecessary fields
mn_state <- mn_state %>%
select (-c(permit_issue_date, renewal_date, permit_expire_date, rereg_appl_date, application_type, expire_date, license_type, source))
mn_state %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type, year)
## # A tibble: 45 × 4
## lic_type app_type year n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement 2019 310
## 2 LPN Endorsement 2020 239
## 3 LPN Endorsement 2021 279
## 4 LPN Exam 2019 676
## 5 LPN Exam 2020 883
## 6 LPN Exam 2021 818
## 7 LPN Reinstatement 2019 332
## 8 LPN Reinstatement 2020 307
## 9 LPN Reinstatement 2021 266
## 10 LPN Renewal 2019 8969
## # … with 35 more rows
str(mn_state)
## tibble [216,669 × 14] (S3: tbl_df/tbl/data.frame)
## $ last_name : chr [1:216669] "Aanderud" "Abad" "Abbi" "Abdi" ...
## $ first_name : chr [1:216669] "Kayla" "Rachellyn" "Nimo" "Asha" ...
## $ middle_name : chr [1:216669] "Marguerite" "Mae P" "Mohammed" "Mohamed" ...
## $ city : chr [1:216669] "Mankato" "Scarborough" "MINNEAPOLIS" "ST.LOUIS PARK" ...
## $ state : chr [1:216669] "MN" "ON" "MN" "MN" ...
## $ application_date: Date[1:216669], format: "2021-04-06" "2021-09-15" ...
## $ issue_date : Date[1:216669], format: NA NA ...
## $ expiration_date : Date[1:216669], format: NA NA ...
## $ lic_type : chr [1:216669] "Unknown" "Unknown" "Unknown" "Unknown" ...
## $ app_type : chr [1:216669] "Exam" "Exam" "Exam" "Exam" ...
## $ process_time : 'difftime' num [1:216669] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ month : chr [1:216669] NA NA NA NA ...
## $ year : chr [1:216669] "Pending as of 11/23/21" "Pending as of 11/23/21" "Pending as of 11/23/21" "Pending as of 11/23/21" ...
## $ data_state : chr [1:216669] "MN" "MN" "MN" "MN" ...
#Check for license number duplicates
#4 license numbers are duplicated.
dupe_numbers <- mn_state %>%
filter (!is.na(process_time)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#Lots of duplicate numbers, but most are renewals and reinstatements, which all have negative process times anyway
dupe_numbers %>%
filter (process_time > -1) %>%
ungroup() %>%
count (lic_type, app_type) %>%
arrange (desc(n))
## # A tibble: 5 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN Renewal 80880
## 2 LPN Renewal 12539
## 3 RN Reinstatement 119
## 4 Unknown-Temp Endorsement 93
## 5 LPN Reinstatement 38
dupe_numbers %>% filter (process_time > -1)
## # A tibble: 93,669 × 15
## # Groups: first_name, middle_name, last_name, city, lic_type, app_type
## # [47,147]
## last_name first_name middle_name city state application_date issue_date
## <chr> <chr> <chr> <chr> <chr> <date> <date>
## 1 Aabel Samantha Jean Garfield MN 2019-01-29 2019-01-29
## 2 Aabel Samantha Jean Garfield MN 2021-02-16 2021-02-16
## 3 Aadland Elyse Ann HUGO MN 2019-11-25 2019-11-25
## 4 Aadland Elyse Ann HUGO MN 2021-11-03 2021-11-03
## 5 Aadland Kristina Elizabeth BLOOMINGT… MN 2019-11-01 2019-11-01
## 6 Aadland Kristina Elizabeth BLOOMINGT… MN 2021-11-13 2021-11-13
## 7 Aagaard Lindsay Claire Woodbury MN 2019-09-28 2019-09-28
## 8 Aagaard Lindsay Claire Woodbury MN 2021-09-30 2021-09-30
## 9 Aagard Magdeline Celia Minneapol… MN 2019-11-04 2019-11-04
## 10 Aagard Magdeline Celia Minneapol… MN 2021-11-05 2021-11-05
## # … with 93,659 more rows, and 8 more variables: expiration_date <date>,
## # lic_type <chr>, app_type <chr>, process_time <drtn>, month <chr>,
## # year <chr>, data_state <chr>, n <int>
#34 duplicate license_number records
dupe_numbers2 <- dupe_numbers %>%
filter (!(app_type %in% c("Renewal", "Reinstatement"))) %>% #Remove renewals/reinstatements, which will obviously have repeating license numbers
filter (process_time > -1 ) %>% #Keep only positive process times
mutate (count = n()) %>% #recount
filter (count > 1) %>% #Find true duplicates, those with more than one exam/endorsement
arrange (last_name, first_name, middle_name, city, lic_type, app_type, application_date)
#Only Unknown-Temp has duplicates, many of which have one very long process time and one very short. Will keep first application date.
#remove 17 duplicates
dupe_numbers2 <- dupe_numbers2 %>%
slice_tail(n=1)
#Remove 17 wrong rows from mn_state
mn_state2 <- mn_state %>%
anti_join (dupe_numbers2, by = c("last_name", "first_name", "middle_name", "city", "lic_type", "app_type", "process_time"))
nrow(mn_state2) - nrow(mn_state)
## [1] -18
#1 row has two records with same process time, and both get removed. This re-adds it
missing <- anti_join (mn_state, mn_state2) %>%
filter(row_number()==2)
mn_state <- mn_state2 %>%
rbind (missing)
rm(mn_state2, dupe_numbers2, dupe_numbers, missing, repeats)
state <-read_excel("../state-data/MS-NPR Request-correct2.xlsx", skip=2) %>%
clean_names()
str(state)
## tibble [64,030 × 12] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:64030] "Anneitta Holliman Banks" "Trinita Mary Eddington" "Donna M. Mae Miller Roberts" "Sybil Womack Turnbull" ...
## $ residence_city : chr [1:64030] "Gulfport" "Jackson" "Pascagoula" "Mize" ...
## $ residence_state : chr [1:64030] "MS" "MS" "MS" "MS" ...
## $ license_type : chr [1:64030] "Licensed Practical Nurse" "Registered Nurse" "Licensed Practical Nurse" "Registered Nurse" ...
## $ perm_temp : chr [1:64030] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ initial_license_method: chr [1:64030] "Exam" "Exam" "Exam" "Exam" ...
## $ renewal_app : chr [1:64030] "YES" "YES" "YES" "YES" ...
## $ app_submit_date : POSIXct[1:64030], format: "2019-12-14 12:33:33" "2020-11-13 11:58:24" ...
## $ all_docs_rec_date : POSIXct[1:64030], format: "2019-12-14 13:00:02" "2020-11-13 13:00:01" ...
## $ renewal_date : POSIXct[1:64030], format: "2019-12-14" "2020-11-13" ...
## $ initial_issue_date : POSIXct[1:64030], format: "1955-03-16" "1957-07-06" ...
## $ expiration_date : POSIXct[1:64030], format: "2021-12-31" "2022-12-31" ...
state %>%
count (renewal_app)
## # A tibble: 2 × 2
## renewal_app n
## * <chr> <int>
## 1 NO 4747
## 2 YES 59283
ms_state <- state %>%
mutate (application_date = as_date(app_submit_date),
docs_date = as_date(all_docs_rec_date),
issue_date = as_date(initial_issue_date),
effective_renewal_date = as_date(renewal_date),
expiration_date = as_date(expiration_date),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
month = if_else (renewal_app == "YES",
substr(effective_renewal_date, 1,7),
substr(issue_date, 1,7)),
year = if_else (renewal_app == "YES",
as.character(year(effective_renewal_date)),
as.character(year(issue_date))),
process_time = if_else (renewal_app == "YES",
(effective_renewal_date - application_date), #<< use this for Renewal LPN, Renewal RN
(issue_date - application_date) ),
lic_type = case_when(
license_type=="Licensed Practical Nurse" ~ "LPN",
license_type=="Registered Nurse" ~ "RN"),
app_type = case_when (
renewal_app=="YES" ~ paste0("Renewal-", initial_license_method),
TRUE ~ initial_license_method), #<< use this for new applications
data_state = "MS")
#Any mismatches?
ms_state %>% filter (as.Date(app_submit_date) != application_date)
## # A tibble: 0 × 24
## # … with 24 variables: name <chr>, residence_city <chr>, residence_state <chr>,
## # license_type <chr>, perm_temp <chr>, initial_license_method <chr>,
## # renewal_app <chr>, app_submit_date <dttm>, all_docs_rec_date <dttm>,
## # renewal_date <dttm>, initial_issue_date <dttm>, expiration_date <date>,
## # application_date <date>, docs_date <date>, issue_date <date>,
## # effective_renewal_date <date>, gather_time <drtn>, docs_time <drtn>,
## # month <chr>, year <chr>, process_time <drtn>, lic_type <chr>, …
ms_state %>% filter (as.Date(renewal_date) != effective_renewal_date)
## # A tibble: 0 × 24
## # … with 24 variables: name <chr>, residence_city <chr>, residence_state <chr>,
## # license_type <chr>, perm_temp <chr>, initial_license_method <chr>,
## # renewal_app <chr>, app_submit_date <dttm>, all_docs_rec_date <dttm>,
## # renewal_date <dttm>, initial_issue_date <dttm>, expiration_date <date>,
## # application_date <date>, docs_date <date>, issue_date <date>,
## # effective_renewal_date <date>, gather_time <drtn>, docs_time <drtn>,
## # month <chr>, year <chr>, process_time <drtn>, lic_type <chr>, …
#Remove unformatted date fields and unnecessary fields
ms_state <- ms_state %>%
select (-c(license_type, initial_license_method, renewal_app, app_submit_date, all_docs_rec_date, initial_issue_date, renewal_date)) %>%
rename (city = residence_city,
state = residence_state,
duration = perm_temp)
ms_state %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 8 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 172
## 2 LPN Exam 1392
## 3 LPN Renewal-Endorsement 1601
## 4 LPN Renewal-Exam 10855
## 5 RN Endorsement 939
## 6 RN Exam 2244
## 7 RN Renewal-Endorsement 8561
## 8 RN Renewal-Exam 38266
str(ms_state)
## tibble [64,030 × 17] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:64030] "Anneitta Holliman Banks" "Trinita Mary Eddington" "Donna M. Mae Miller Roberts" "Sybil Womack Turnbull" ...
## $ city : chr [1:64030] "Gulfport" "Jackson" "Pascagoula" "Mize" ...
## $ state : chr [1:64030] "MS" "MS" "MS" "MS" ...
## $ duration : chr [1:64030] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ expiration_date : Date[1:64030], format: "2021-12-31" "2022-12-31" ...
## $ application_date : Date[1:64030], format: "2019-12-14" "2020-11-13" ...
## $ docs_date : Date[1:64030], format: "2019-12-14" "2020-11-13" ...
## $ issue_date : Date[1:64030], format: "1955-03-16" "1957-07-06" ...
## $ effective_renewal_date: Date[1:64030], format: "2019-12-14" "2020-11-13" ...
## $ gather_time : 'difftime' num [1:64030] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:64030] -23649 -23141 -22264 -22065 ...
## ..- attr(*, "units")= chr "days"
## $ month : chr [1:64030] "2019-12" "2020-11" "2019-12" "2020-12" ...
## $ year : chr [1:64030] "2019" "2020" "2019" "2020" ...
## $ process_time : 'difftime' num [1:64030] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:64030] "LPN" "RN" "LPN" "RN" ...
## $ app_type : chr [1:64030] "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" "Renewal-Exam" ...
## $ data_state : chr [1:64030] "MS" "MS" "MS" "MS" ...
#Six records have duplicates -- all are renewals with negative processing times, so no removal necessary.
dupe_numbers <- ms_state %>%
# filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (name, city, lic_type, app_type)
#Import LPN files showing licenses issued
mt_lpn1 <- read_excel ("../state-data/MT/MT-LPN Licenses - Exam.xlsx") %>%
clean_names() %>%
mutate (app_type = "Exam",
lic_type = "LPN")
mt_lpn2 <- read_excel ("../state-data/MT/MT-LPN Licenses - Credential.xlsx") %>%
clean_names() %>%
mutate (app_type = "Endorsement",
lic_type = "LPN")
mt_lpn_lic <- rbind (mt_lpn1, mt_lpn2) %>%
rename (license_number_lic = license_number,
lic_status = record_status,
issue_date = issued_date) %>%
select (-c(lic_type, state, country_region))
#Import LPN files showing applications
mt_lpn3 <- read_excel ("../state-data/MT/MT-LPN Applications - Exam .xlsx") %>%
clean_names() %>%
mutate (app_type = "Exam",
lic_type = "LPN")
mt_lpn4 <- read_excel ("../state-data/MT/MT-LPN Applications - Credential .xlsx") %>%
clean_names() %>%
mutate (app_type = "Endorsement",
lic_type = "LPN")
mt_lpn_app <- rbind (mt_lpn3, mt_lpn4) %>%
rename (application_date = opened_date,
license_number_app = license_number,
app_status = record_status)
#Join applications to licenses issued by name, city and app_type
lpn_bind <- full_join (mt_lpn_app, mt_lpn_lic, by = c("first_name", "last_name", "city", "app_type")) %>%
mutate (application_date = as.Date(application_date),
issue_date = as.Date(issue_date),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
lpn_bind %>%
filter (is.na(process_time)) %>%
count (app_status) %>%
arrange (desc(n))
## # A tibble: 7 × 2
## app_status n
## <chr> <int>
## 1 Additional Info Requested 34
## 2 <NA> 26
## 3 Awaiting Test Results 6
## 4 Initial Review 3
## 5 License Issued 2
## 6 Awaiting Board Meeting 1
## 7 Temporary License Issued 1
#Calculates how long those left pending have been pending as process_time2
lpn_bind <- lpn_bind %>%
mutate (process_time2 = if_else (is.na(process_time) & app_status %in% c("Additional Info Requested", "Awaiting Board Meeting", "Awaiting Service", "Awaiting Test Results", "Initial Review", "License Issued"),
as.Date("2021-11-19") - application_date,
process_time),
year = if_else (is.na(year), "Pending as of 11/19/21", as.character(year)))
mt_lpn_app %>%
count (app_status)
## # A tibble: 7 × 2
## app_status n
## * <chr> <int>
## 1 Additional Info Requested 35
## 2 Awaiting Board Meeting 1
## 3 Awaiting Test Results 6
## 4 Closed - License Issued 197
## 5 Initial Review 3
## 6 License Issued 117
## 7 Temporary License Issued 1
#26 issued licenses are not in application data, mostly issued early 2019, likely applied before Jan 1, 2019 and not sent to NPR by board.
anti_join (lpn_bind, mt_lpn_app, by = c("first_name", "last_name", "city", "app_type")) %>%
count (lic_status)
## # A tibble: 3 × 2
## lic_status n
## * <chr> <int>
## 1 Active 20
## 2 Expired 4
## 3 Terminated 2
#summarize (range = range(issue_date))
#47 applications are not in licensure data, almost all those pending.
anti_join (lpn_bind, mt_lpn_lic, by = c("first_name", "last_name", "city", "app_type")) %>%
count (app_status)
## # A tibble: 6 × 2
## app_status n
## * <chr> <int>
## 1 Additional Info Requested 34
## 2 Awaiting Board Meeting 1
## 3 Awaiting Test Results 6
## 4 Initial Review 3
## 5 License Issued 2
## 6 Temporary License Issued 1
#Import RN files showing licenses issued
mt_rn1 <- read_excel ("../state-data/MT/MT-RN Licenses - Exam.xlsx") %>%
clean_names() %>%
mutate (app_type = "Exam",
lic_type = "RN")
mt_rn2 <- read_excel ("../state-data/MT/MT-RN Licenses - Credential.xlsx") %>%
clean_names() %>%
mutate (app_type = "Endorsement",
lic_type = "RN")
mt_rn_lic <- rbind (mt_rn1, mt_rn2) %>%
rename (license_number_lic = license_number,
lic_status = record_status,
issue_date = issued_date) %>%
select (-c(lic_type, state, country_region))
#Import RN files showing applications
mt_rn3 <- read_excel ("../state-data/MT/MT-RN Applications - Exam.xlsx") %>%
clean_names() %>%
mutate (app_type = "Exam",
lic_type = "RN")
mt_rn4 <- read_excel ("../state-data/MT/MT-RN Applications - Credential .xlsx") %>%
clean_names() %>%
mutate (app_type = "Endorsement",
lic_type = "RN") %>%
select(-c(x9, x10))
mt_rn_app <- rbind (mt_rn3, mt_rn4) %>%
rename (application_date = opened_date,
license_number_app = license_number,
app_status = record_status)
#Only two people with doubled applications
mt_rn_app2 <- mt_rn_app %>%
group_by (first_name, last_name, city) %>%
mutate (count = n())
#Join applications to licenses issued by name, city and app_type
rn_bind <- full_join (mt_rn_app, mt_rn_lic, by = c("first_name", "last_name", "city", "app_type")) %>%
mutate (application_date = as.Date(application_date),
issue_date = as.Date(issue_date),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date))) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
rn_bind %>%
filter (is.na(process_time)) %>%
count (app_status) %>%
arrange (desc(n))
## # A tibble: 10 × 2
## app_status n
## <chr> <int>
## 1 Additional Info Requested 791
## 2 Awaiting Test Results 417
## 3 <NA> 301
## 4 Initial Review 42
## 5 Closed - License Issued 3
## 6 Denied Discipline - To Legal 2
## 7 Awaiting Board Meeting 1
## 8 Awaiting Service 1
## 9 Denied 1
## 10 License Issued 1
#Calculates how long those left pending have been pending as process_time2
rn_bind <- rn_bind %>%
mutate (process_time2 = if_else (is.na(process_time) & app_status %in% c("Additional Info Requested", "Awaiting Board Meeting", "Awaiting Service", "Awaiting Test Results", "Initial Review", "License Issued"),
as.Date("2021-11-19") - application_date,
process_time),
year = if_else (is.na(year), "Pending as of 11/19/21", as.character(year)))
mt_rn_app %>%
count (app_status)
## # A tibble: 11 × 2
## app_status n
## * <chr> <int>
## 1 Additional Info Requested 793
## 2 Approved 1
## 3 Awaiting Board Meeting 1
## 4 Awaiting Service 1
## 5 Awaiting Test Results 417
## 6 Closed - CMR 3
## 7 Closed - License Issued 3831
## 8 Denied 1
## 9 Denied Discipline - To Legal 2
## 10 Initial Review 42
## 11 License Issued 1639
#301 issued licenses are not in application data, mostly issued early 2019, likely applied before Jan 1, 2019 and not included from board.
anti_join (rn_bind, mt_rn_app, by = c("first_name", "last_name", "city", "app_type")) %>%
count (lic_status)
## # A tibble: 4 × 2
## lic_status n
## * <chr> <int>
## 1 Active 249
## 2 Closed 8
## 3 Expired 39
## 4 Terminated 5
#summarize (range = range(issued_date))
#1259 applications are not in licensure data, almost all those pending.
anti_join (rn_bind, mt_rn_lic, by = c("first_name", "last_name", "city", "app_type")) %>%
count (app_status)
## # A tibble: 9 × 2
## app_status n
## * <chr> <int>
## 1 Additional Info Requested 791
## 2 Awaiting Board Meeting 1
## 3 Awaiting Service 1
## 4 Awaiting Test Results 417
## 5 Closed - License Issued 3
## 6 Denied 1
## 7 Denied Discipline - To Legal 2
## 8 Initial Review 42
## 9 License Issued 1
#Bind RN/LPN dataframes
mt_state <- rbind (rn_bind, lpn_bind) %>%
select (-c(license_number_app, country_region)) %>%
rename (license_number = license_number_lic) %>%
mutate (data_state = "MT",
#extract lic_type from license number for those issued early 2019 without app records
lic_type = if_else (is.na(lic_type), substr(license_number, 5,6), lic_type)) %>%
mutate (lic_type = if_else (lic_type=="LP", "LPN", lic_type))
#Calculate averages
averages <- mt_state %>%
ungroup() %>%
filter (process_time2 > -1) %>% #removes 7 rows below 0, errors
filter (!is.na(process_time2)) %>%
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01")))) %>% #remove pending apps older than July 2020
group_by (lic_type, app_type, month) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
over30days = sum(process_time2 > 30),
pct_over30days = over30days/count,
over60days = sum(process_time2 > 60),
pct_over60days = over60days/count,
over90days = sum(process_time2 > 90),
pct_over90days = over90days/count,
over120days = sum(process_time2 > 120),
pct_over120days = over120days/count,
over180days = sum(process_time2 > 180),
pct_over180days = over180days/count,
over1year = sum(process_time2 > 365),
pct_over1year = over1year/count)
rn_exam21 <- mt_state %>%
filter (lic_type=="RN" & year=="2021")
#Graph
rn_exam21 %>%
count (process_time2, app_type) %>%
ggplot(aes(x = process_time2, y = n, fill = app_type)) +
facet_wrap (~app_type) +
geom_bar(stat='identity') +
#geom_vline (xintercept = (30), linetype = "dashed", colour = "red") +
labs(x="Count of days",
y="Count of applicants",
title="Montana nurse licenses issued in 2021",
subtitle="Days from application submission to license issuance",
caption="Source: XXX")
averages %>%
filter (lic_type == "RN") %>%
filter (str_detect(month, "2020")|str_detect(month, "2021")) %>%
ggplot(aes(x = month, y = median, group=app_type)) +
#facet_wrap (~app_type) +
geom_line() +
geom_smooth(method=lm) +
#geom_vline (xintercept = (30), linetype = "dashed", colour = "red") +
labs(x="Count of days",
y="Count of applicants",
title="Montana nurse licenses issued in 2021",
subtitle="Days from application submission to license issuance",
caption="Source: XXX")
mt_state %>%
count (lic_type, app_type)
## # A tibble: 4 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 225
## 2 LPN Exam 161
## 3 RN Endorsement 4344
## 4 RN Exam 2688
str(mt_state)
## tibble [7,418 × 17] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:7418] "MECAILA" "CRYSTAL" "KATHERINE" "MCKENAH" ...
## $ last_name : chr [1:7418] "MARTIN" "GADDO" "QUESENBERRY" "BLUME" ...
## $ application_date: Date[1:7418], format: "2019-03-25" "2019-03-26" ...
## $ app_status : chr [1:7418] "License Issued" "License Issued" "License Issued" "License Issued" ...
## $ city : chr [1:7418] "VIDA" "BOZEMAN" "BILLINGS" "ANACONDA" ...
## $ state : chr [1:7418] "MT" "MT" "MT" "MT" ...
## $ app_type : chr [1:7418] "Exam" "Exam" "Exam" "Exam" ...
## $ lic_type : chr [1:7418] "RN" "RN" "RN" "RN" ...
## $ license_number : chr [1:7418] "NUR-RN-LIC-145544" "NUR-RN-LIC-145131" "NUR-RN-LIC-145467" "NUR-RN-LIC-145440" ...
## $ issue_date : Date[1:7418], format: "2019-06-26" "2019-05-30" ...
## $ lic_status : chr [1:7418] "Active" "Active" "Active" "Active" ...
## $ expiration_date : Date[1:7418], format: "2022-12-31" "2022-12-31" ...
## $ month : chr [1:7418] "2019-06" "2019-05" "2019-06" "2019-06" ...
## $ year : chr [1:7418] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:7418] 93 65 86 85 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:7418] 93 65 86 85 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:7418] "MT" "MT" "MT" "MT" ...
#No duplicate numbers
dupe_numbers <- mt_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
count (license_number) %>%
filter (n > 1)
state <- read_excel("../state-data/NE-NPR Nursing Data Request-092821.xlsx") %>% clean_names()
#Calculate processing time
ne_state <- state %>%
mutate (issue_date = as.Date(issue_date),
application_date = as.Date(app_received_date),
expiration_date = as.Date(expiration_date),
effective_renewal_date = as.Date(date_last_renewal),
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = case_when(
license_type %in% c("Licensed Practical Nurse", "LPN-Compact Privilege") ~ "LPN",
license_type %in% c("Registered Nurse", "RN-Compact Privilege") ~ "RN",
license_type=="Temporary Licensed Practical Nurse" ~ "LPN-Temp",
license_type=="Registered Nurse - Temporary" ~ "RN-Temp",
license_type=="LPN - Refresher Temporary" ~ "LPN-Refresher Temporary",
license_type=="RN - Refresher Temporary" ~ "RN-Refresher Temporary",
license_type=="LPN Provisional Temporary" ~ "LPN-Provisional",
license_type=="RN Provisional Temporary" ~ "RN-Provisional",
license_type=="Registered Nurse - Temporary" ~ "RN-Temp",
TRUE ~ license_type),
app_type = case_when (
str_detect (license_type, "Provisional ") ~ "Application",
str_detect (obtained_by, "Compact State License") ~ "SSL to MSL",
# str_detect (lic_type, "Refresher Temporary") ~ "All",
str_detect (obtained_by, "Reinstate") ~ "Reinstatement",
TRUE ~ obtained_by )) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
ne_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 6 × 2
## status n
## <chr> <int>
## 1 Denied 811
## 2 Pending 317
## 3 Application File Closed 227
## 4 Withdrawn 72
## 5 Reinstatement Pending 28
## 6 Null and Void 6
#Calculates how long those left pending have been pending as process_time2
ne_state <- ne_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("Reinstatement Pending", "Pending"),
as.Date("2021-09-23") - application_date,
process_time)) %>%
mutate (year = if_else (is.na(year), "Pending as of 09/23/21", as.character(year)),
data_state = "NE")
#Any mismatches?
ne_state %>% filter (as.Date(app_received_date) != application_date)
## # A tibble: 0 × 22
## # … with 22 variables: last_name <chr>, middle_name <chr>, first_name <chr>,
## # addr_city <chr>, addr_state <chr>, profession_name <chr>,
## # license_type <chr>, obtained_by <chr>, status <chr>,
## # app_received_date <dttm>, issue_date <date>, date_last_renewal <dttm>,
## # expiration_date <date>, application_date <date>,
## # effective_renewal_date <date>, month <chr>, year <chr>, lic_type <chr>,
## # app_type <chr>, process_time <drtn>, process_time2 <drtn>, …
ne_state %>% filter (as.Date(date_last_renewal) != effective_renewal_date)
## # A tibble: 0 × 22
## # … with 22 variables: last_name <chr>, middle_name <chr>, first_name <chr>,
## # addr_city <chr>, addr_state <chr>, profession_name <chr>,
## # license_type <chr>, obtained_by <chr>, status <chr>,
## # app_received_date <dttm>, issue_date <date>, date_last_renewal <dttm>,
## # expiration_date <date>, application_date <date>,
## # effective_renewal_date <date>, month <chr>, year <chr>, lic_type <chr>,
## # app_type <chr>, process_time <drtn>, process_time2 <drtn>, …
#Remove unformatted date fields and unnecessary fields
ne_state <- ne_state %>%
select (-c(profession_name, license_type, app_received_date, date_last_renewal, obtained_by)) %>%
rename (city = addr_city,
state = addr_state,
lic_status = status)
ne_state %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 16 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 157
## 2 LPN Exam 1009
## 3 LPN Reinstatement 28
## 4 LPN SSL to MSL 1
## 5 LPN-Provisional Application 474
## 6 LPN-Refresher Temporary Application 13
## 7 LPN-Temp Endorsement 59
## 8 LPN-Temp Exam 28
## 9 RN Endorsement 2350
## 10 RN Exam 4430
## 11 RN Reinstatement 40
## 12 RN SSL to MSL 9
## 13 RN-Provisional Application 2014
## 14 RN-Refresher Temporary Application 66
## 15 RN-Temp Endorsement 388
## 16 RN-Temp Exam 189
str(ne_state)
## tibble [11,255 × 17] (S3: tbl_df/tbl/data.frame)
## $ last_name : chr [1:11255] "Dodds" "Briggs" "West" "Hayes" ...
## $ middle_name : chr [1:11255] "Anne" "Bomfim" "Brooke" "Brooke" ...
## $ first_name : chr [1:11255] "Marissa" "Patricia" "Alishia" "Whitney" ...
## $ city : chr [1:11255] "South Sioux City" "East Aurora" "Swansea" "Grand Island" ...
## $ state : chr [1:11255] "NE" "NY" "IL" "NE" ...
## $ lic_status : chr [1:11255] "Active" "Application File Closed" "Active" "Active" ...
## $ issue_date : Date[1:11255], format: "2019-03-13" NA ...
## $ expiration_date : Date[1:11255], format: "2021-10-31" NA ...
## $ application_date : Date[1:11255], format: "2019-01-02" "2019-01-02" ...
## $ effective_renewal_date: Date[1:11255], format: "2019-10-31" NA ...
## $ month : chr [1:11255] "2019-03" NA "2019-01" "2019-02" ...
## $ year : chr [1:11255] "2019" "Pending as of 09/23/21" "2019" "2019" ...
## $ lic_type : chr [1:11255] "LPN" "RN" "RN" "LPN" ...
## $ app_type : chr [1:11255] "Exam" "Endorsement" "Endorsement" "Exam" ...
## $ process_time : 'difftime' num [1:11255] 70 NA 22 57 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:11255] 70 NA 22 57 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:11255] "NE" "NE" "NE" "NE" ...
#Check for license number duplicates -- license number not included, using name, city and license type instead
str(ne_state)
## tibble [11,255 × 17] (S3: tbl_df/tbl/data.frame)
## $ last_name : chr [1:11255] "Dodds" "Briggs" "West" "Hayes" ...
## $ middle_name : chr [1:11255] "Anne" "Bomfim" "Brooke" "Brooke" ...
## $ first_name : chr [1:11255] "Marissa" "Patricia" "Alishia" "Whitney" ...
## $ city : chr [1:11255] "South Sioux City" "East Aurora" "Swansea" "Grand Island" ...
## $ state : chr [1:11255] "NE" "NY" "IL" "NE" ...
## $ lic_status : chr [1:11255] "Active" "Application File Closed" "Active" "Active" ...
## $ issue_date : Date[1:11255], format: "2019-03-13" NA ...
## $ expiration_date : Date[1:11255], format: "2021-10-31" NA ...
## $ application_date : Date[1:11255], format: "2019-01-02" "2019-01-02" ...
## $ effective_renewal_date: Date[1:11255], format: "2019-10-31" NA ...
## $ month : chr [1:11255] "2019-03" NA "2019-01" "2019-02" ...
## $ year : chr [1:11255] "2019" "Pending as of 09/23/21" "2019" "2019" ...
## $ lic_type : chr [1:11255] "LPN" "RN" "RN" "LPN" ...
## $ app_type : chr [1:11255] "Exam" "Endorsement" "Endorsement" "Exam" ...
## $ process_time : 'difftime' num [1:11255] 70 NA 22 57 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:11255] 70 NA 22 57 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:11255] "NE" "NE" "NE" "NE" ...
#These 21 records have duplicates -
dupe_numbers <- ne_state %>%
filter (!is.na(process_time2)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#All are temporary/provisional/refresher temporary licenses, so likely they were given multiple times to nurses. No removal necessary.
dupe_numbers %>% ungroup() %>% count (lic_type, app_type) %>% arrange (desc(n))
## # A tibble: 3 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN-Refresher Temporary Application 11
## 2 RN-Temp Exam 6
## 3 RN-Provisional Application 4
state <- read_csv("../state-data/NH-initial-apps.csv")%>%
clean_names()
#Calculate processing time
nh_state <- state %>%
mutate (issue_date = as.Date(issue_date),
application_date = as.Date(application_received_date),
month = substr(issue_date, 1,7),
year = year(issue_date)) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
nh_state %>%
filter (is.na(process_time)) %>%
count (license_status) %>%
arrange (desc(n))
## # A tibble: 9 × 2
## license_status n
## <chr> <int>
## 1 Withdrawn 789
## 2 Pending 760
## 3 Null and Void 166
## 4 Deleted 71
## 5 Reinstatement Pending 7
## 6 Denied 2
## 7 Active 1
## 8 Inactive 1
## 9 NULL 1
#Calculates how long those left pending have been pending as process_time2
nh_state <- nh_state %>%
mutate (process_time2 = if_else (is.na(process_time) & license_status %in% c("Active", "Pending", "Reinstatement Pending"),
as.Date("2021-11-18") - application_date,
process_time)) %>%
mutate (lic_type = case_when(
license_type=="Licensed Practical Nurse" ~ "LPN",
license_type=="Registered Nurse" ~ "RN",
license_type=="Temporary LPN" ~ "LPN-Temp",
license_type=="Temporary RN" ~ "RN-Temp",
TRUE ~ license_type),
app_type = case_when (
str_detect (lic_type, "Temp") ~ "All",
str_detect (obtained_by, "Endorsement") ~ "Endorsement",
str_detect (obtained_by, "Examination") ~ "Exam",
str_detect (obtained_by, "Reinstatement") ~ "Reinstatement",
obtained_by %in% c("Comparable Education", "Re-entry - Refresher Course", "Application") ~ "Other",
TRUE ~ obtained_by),
year = if_else (is.na(year), "Pending as of 11/18/21", as.character(year)),
data_state = "NH")
#Any mismatches?
nh_state %>% filter (as.Date(application_received_date) != application_date)
## # A tibble: 0 × 16
## # … with 16 variables: name <chr>, license_type <chr>, obtained_by <chr>,
## # license_status <chr>, application_received_date <date>,
## # checklist_complete_date <date>, issue_date <date>, expiration_date <date>,
## # application_date <date>, month <chr>, year <chr>, process_time <drtn>,
## # process_time2 <drtn>, lic_type <chr>, app_type <chr>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
nh_state <- nh_state %>%
select (-c(obtained_by, license_type, application_received_date)) %>%
rename (docs_date = checklist_complete_date,
lic_status = license_status)
str(nh_state)
## tibble [12,843 × 13] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:12843] "Foistner, Laurie J." "PAEY, ROBIN JEAN" "HARRINGTON, SANDRA MARIE" "HARRINGTON, SANDRA MARIE" ...
## $ lic_status : chr [1:12843] "Null and Void" "Deleted" "Active" "Active" ...
## $ docs_date : Date[1:12843], format: "2019-10-04" NA ...
## $ issue_date : Date[1:12843], format: "2019-10-04" NA ...
## $ expiration_date : Date[1:12843], format: "2020-02-01" NA ...
## $ application_date: Date[1:12843], format: "2019-09-19" "2020-10-17" ...
## $ month : chr [1:12843] "2019-10" NA "2019-07" "2019-07" ...
## $ year : chr [1:12843] "2019" "Pending as of 11/18/21" "2019" "2019" ...
## $ process_time : 'difftime' num [1:12843] 15 NA 35 35 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:12843] 15 NA 35 35 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:12843] "RN-Temp" "RN" "RN" "RN" ...
## $ app_type : chr [1:12843] "All" "Exam" "Exam" "Exam" ...
## $ data_state : chr [1:12843] "NH" "NH" "NH" "NH" ...
#2830 duplicated names
dupe_numbers <- nh_state %>%
count (name, lic_type, app_type) %>%
filter (n > 1) %>%
pull (name)
dupe_records <- nh_state %>%
filter (name %in% dupe_numbers)
nh_state2 <- nh_state %>%
distinct (name, lic_type, app_type, process_time2, .keep_all=T) #Remove duplicates with exact same name, license/application type and processing time
#Removed 2915 rows -- too many?
nrow(nh_state)-nrow(nh_state2)
## [1] 2915
#reduced to 9 duplicated names
dupe_numbers2 <- nh_state2 %>%
filter (!is.na(process_time2)) %>% #removes those that didn't calculate anyway
count (name, lic_type, app_type) %>%
filter (n > 1) %>%
pull (name)
#All of the remaining duplicates have one active and one pending, so no further removal necessary
dupe_numbers3 <- nh_state2 %>%
filter (name %in% dupe_numbers2)
#Compare how many duplicates were removed from each year
nh_state %>%
filter (year > "2018") %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(year, lic_type, app_type)# %>%
## # A tibble: 32 × 4
## year lic_type app_type n
## <chr> <chr> <chr> <int>
## 1 2019 LPN Endorsement 266
## 2 2019 LPN Exam 232
## 3 2019 LPN-Temp All 72
## 4 2019 RN Endorsement 2781
## 5 2019 RN Exam 1211
## 6 2019 RN-Temp All 451
## 7 2020 LPN Endorsement 103
## 8 2020 LPN Exam 182
## 9 2020 LPN Other 1
## 10 2020 LPN-Temp All 95
## # … with 22 more rows
# View()
nh_state2 %>%
filter (year > "2018") %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(year, lic_type, app_type) #%>%
## # A tibble: 32 × 4
## year lic_type app_type n
## <chr> <chr> <chr> <int>
## 1 2019 LPN Endorsement 137
## 2 2019 LPN Exam 121
## 3 2019 LPN-Temp All 68
## 4 2019 RN Endorsement 1452
## 5 2019 RN Exam 631
## 6 2019 RN-Temp All 443
## 7 2020 LPN Endorsement 87
## 8 2020 LPN Exam 158
## 9 2020 LPN Other 1
## 10 2020 LPN-Temp All 81
## # … with 22 more rows
# View()
#Rename and remove dataframes
nh_state <- nh_state2
rm(nh_state2, dupe_numbers3, dedupe, dupe_records, dupe_numbers, dupe_numbers2)
#Import RN files
nj1 <- read_csv ("../state-data/NJ-Responsive Documents/clean/NPR - RPN NEW APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
nj2 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - RPN GRAD LICENSEES 100521.csv") %>% clean_names()
nj3 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - RPN PENDING REINSTATEMENT APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
nj4 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - RPN COMPLETED REINSTATEMENT APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
#Import LPN files
nj_lpn1 <- read_csv ("../state-data/NJ-Responsive Documents/clean/NPR - LPN NEW APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
nj_lpn2 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - LPN GRAD LICENSEES 100521.csv") %>% clean_names()
nj_lpn3 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - LPN PENDING REINSTATEMENT APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
nj_lpn4 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - LPN COMPLETED REINSTATEMENT APPLICANTS SINCE 2019 100521.csv") %>% clean_names()
nj_lpn5 <- read_csv("../state-data/NJ-Responsive Documents/clean/NPR - LPN RENEWALS SINCE 2019 100521.csv",
col_types = cols(.default = "c", CREATE_DATE = "D", EXPIRATION_DATE = "D", DATE_THIS_STATUS = "D", DATE_THIS_STATUS = "D",RENEWAL_DATE = "D")) %>% clean_names()
#Combine exam, endorsement, foreign applicant licenses
nj1 <- rbind (nj1, nj_lpn1)
#Combine grad licenses
nj2 <- rbind (nj2, nj_lpn2) %>%
rename (app_received_date = create_date) #rename one column to rbind later
#Combine pending reinstatement applications
nj3 <- rbind (nj3, nj_lpn3) %>%
rename (app_received_date = reinst_app_received_date) #rename one column to rbind later
#Combine approved reinstatement applications
nj4 <- rbind (nj4, nj_lpn4) %>%
select (-issue_date) %>%
mutate (issue_date = date_this_status,
obtained_by = "Reinstatement") %>%
rename (app_received_date = entry_date) %>% #rename two columns to rbind later
filter (license_status == "Active") #Can only be sure active licenses are correct, otherwise the status change may have been later. Removes ~160 expired, etc.
#Which rows don't have issue dates? -- Only Deleted, Denied, Pending (6891), Withdrawn
nj1 %>%
filter (is.na(issue_date)) %>%
count (license_status) #license_type, obtained_by,
## # A tibble: 4 × 2
## license_status n
## * <chr> <int>
## 1 Deleted 138
## 2 Denied 16
## 3 Pending 6891
## 4 Withdrawn 52
#Add all types into one dataframe and calculate processing times
nj_state <- nj1 %>%
rbind (nj2) %>% #add graduate licenses
rbind (nj3) %>% #add pending reinstatements
rbind (nj4) %>% #add completed reinstatements
filter (issue_date > as_date ("2018-12-31") | is.na(issue_date)) %>% #remove few mistakes issued before Jan. 1, 2019
rename (application_date = app_received_date,
effective_renewal_date = date_this_status) %>%
mutate (month = substr(issue_date, 1,7),
year = year(issue_date)) %>%
mutate (obtained_by = if_else (obtained_by == "Reactivation", "Reinstatement", obtained_by), #combines pending reactivations/pending reinstatements
process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
nj_state %>%
filter (is.na(process_time)) %>%
count (license_status) %>%
arrange (desc(n))
## # A tibble: 5 × 2
## license_status n
## <chr> <int>
## 1 Pending 6917
## 2 Reinstatement Pending 404
## 3 Deleted 170
## 4 Withdrawn 84
## 5 Denied 63
#Calculates how long those left pending have been pending as process_time2
nj_state <- nj_state %>%
mutate (process_time2 = if_else (is.na(process_time) & license_status %in% c("Reinstatement Pending", "Pending"),
as.Date("2021-10-05") - application_date,
process_time)) %>%
mutate (lic_type = case_when (
license_type %in% c("Registered Prof. Nurse", "RN Reinstatement") ~ "RN",
license_type %in% c("Licensed Practical Nurse", "LPN Reinstatement") ~ "LPN",
license_type == "LPN Graduate License" ~ "LPN-Provisional",
license_type == "RPN Graduate License" ~ "RN-Provisional"),
year = if_else (is.na(year), "Pending as of 10/05/21", as.character(year)),
app_type = case_when (
obtained_by == "Examination" ~ "Exam",
#str_detect (lic_type, "^Grad") ~ "All",
TRUE ~ obtained_by),
data_state = "NJ")
#Remove unformatted date fields and unnecessary fields
nj_state <- nj_state %>%
select (-c(license_type, obtained_by)) %>%
rename (city = addr_city,
state = addr_state,
license_number = license_no,
lic_status = license_status)
nj_state %>%
#count(lic_type, renewal_app, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 10 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 837
## 2 LPN Exam 3815
## 3 LPN Foreign Applicant 7
## 4 LPN Reinstatement 356
## 5 LPN-Provisional Application 294
## 6 RN Endorsement 16344
## 7 RN Exam 12376
## 8 RN Foreign Applicant 127
## 9 RN Reinstatement 2148
## 10 RN-Provisional Application 1240
str(nj_state)
## tibble [37,544 × 18] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:37544] "MaryKate" "Shannon" "Diana" "Christine" ...
## $ middle_name : chr [1:37544] NA NA "H." "Marie" ...
## $ last_name : chr [1:37544] "Ryan" "Carpino" "Pelkaus" "Caton" ...
## $ city : chr [1:37544] "Sparta" "Maple Shade" "Jackson" "Abington" ...
## $ state : chr [1:37544] "NJ" "NJ" "NJ" "PA" ...
## $ license_number : chr [1:37544] "26NR20650900" "26NR20651800" "26NR20654400" "26NR20654700" ...
## $ application_date : Date[1:37544], format: "2019-01-03" "2019-01-02" ...
## $ issue_date : Date[1:37544], format: "2019-01-22" "2019-01-22" ...
## $ expiration_date : Date[1:37544], format: "2023-05-31" "2023-05-31" ...
## $ lic_status : chr [1:37544] "Active" "Active" "Active" "Active" ...
## $ effective_renewal_date: Date[1:37544], format: "2019-01-22" "2019-01-22" ...
## $ month : chr [1:37544] "2019-01" "2019-01" "2019-01" "2019-01" ...
## $ year : chr [1:37544] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:37544] 19 20 23 22 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:37544] 19 20 23 22 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:37544] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:37544] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
## $ data_state : chr [1:37544] "NJ" "NJ" "NJ" "NJ" ...
#These 39 licenses have duplicates -
dupe_numbers <- nj_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
#Duplicate license numbers are mostly reinstatements
nj_state %>%
filter (license_number %in% dupe_numbers) %>%
count (lic_type, app_type)
## # A tibble: 6 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1
## 2 LPN Exam 3
## 3 LPN Reinstatement 12
## 4 RN Endorsement 10
## 5 RN Exam 2
## 6 RN Reinstatement 55
#Remove reinstatements and rerun and no duplicates are found, so no removal necessary
dupe_numbers <- nj_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
filter (app_type != "Reinstatement") %>%
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
state <- read_csv("../state-data/NM-11.03.2021-clean.csv") %>%
clean_names()
#Calculate processing time
nm_state <- state %>%
mutate (application_date = as.Date(date_of_initial_application_submission),
issue_date = as.Date(application_to_license_date),
#issue_date = as.Date(license_issue_date),
#effective_renewal_date = as.Date(application_to_license_date), #app2license_date
#Compared this to NM Board page and it matches "effective date" on Nursys
month = substr(issue_date, 1,7),
year = year(issue_date)) %>%
mutate (process_time = issue_date - application_date
#process_time2 = issue_date - app2license_date,
#change = process_time2 - process_time
) #calculates for everything else that's been issued
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
nm_state %>%
filter (is.na(process_time)) %>%
count (license_application_status_11_03_2021) %>%
arrange (desc(n))
## # A tibble: 11 × 2
## license_application_status_11_03_2021 n
## <chr> <int>
## 1 Expired 2594
## 2 Denied 2042
## 3 Processing 1239
## 4 Denied for Cause 487
## 5 Administrative Denial 53
## 6 Void 36
## 7 On Hold 18
## 8 Withdrawn 12
## 9 Approved 7
## 10 Ready to issue 3
## 11 Closed 2
#Calculates how long those left pending have been pending as process_time2
nm_state <- nm_state %>%
mutate (process_time2 = if_else (is.na(process_time) & license_application_status_11_03_2021 %in% c("Processing", "On Hold", "Approved", "Ready to issue"),
as.Date("2021-11-03") - application_date,
process_time), #Calculates how long those left pending have been pending
lic_type = case_when (
license_duration=="FUll" & license_type=="Licensed Practical Nurse" ~ paste0("LPN"),
license_duration=="GradPermit" & license_type=="Licensed Practical Nurse" ~ paste0("LPN-Provisional"),
license_duration!="FUll" & license_type=="Licensed Practical Nurse" ~ paste0("LPN-", license_duration),
license_duration=="FUll" & license_type=="Registered Nurse" ~ paste0("RN"),
license_duration=="GradPermit" & license_type=="Registered Nurse" ~ paste0("RN-Provisional"),
license_duration!="FUll" & license_type=="Registered Nurse" ~ paste0("RN-", license_duration) )) %>%
mutate (app_type = case_when (
str_detect (lic_type, "Provisional") ~ "Application",
application_type == "Initial - Exam" ~ "Exam",
application_type == "Retest" ~ "Exam-retest",
TRUE ~ application_type),
year = if_else (is.na(year), "Pending as of 11/03/21", as.character(year)),
data_state = "NM")
#Any mismatches?
nm_state %>% filter (as.Date(date_of_initial_application_submission) != application_date)
## # A tibble: 0 × 22
## # … with 22 variables: name_of_applicant_first_middle_last <chr>,
## # city_of_residence <chr>, state_of_residence <chr>, license_type <chr>,
## # license_duration <chr>, application_type <chr>,
## # date_of_initial_application_submission <date>,
## # all_required_application_documents_received <chr>,
## # license_application_status_11_03_2021 <chr>,
## # application_to_license_date <date>, application_expiration_date <date>, …
#nm_state %>% filter (as.Date(application_to_license_date) != effective_renewal_date)
#Remove unformatted date fields and unnecessary fields
nm_state <- nm_state %>%
select (-c(license_type, application_type, all_required_application_documents_received, application_to_license_date, license_issue_date, date_of_initial_application_submission)) %>%
rename (name = name_of_applicant_first_middle_last,
city = city_of_residence,
state = state_of_residence,
duration = license_duration,
app_status = license_application_status_11_03_2021,
app_expiration_date = application_expiration_date,
expiration_date = license_expiration_date)
nm_state %>%
#count(lic_type, application_type, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 19 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 296
## 2 LPN Exam 461
## 3 LPN Exam-retest 69
## 4 LPN International 76
## 5 LPN Reinstatement 231
## 6 LPN Renewal 2700
## 7 LPN-Provisional Application 30
## 8 LPN-Temp Endorsement 33
## 9 LPN-Temp Exam 3
## 10 RN Endorsement 4208
## 11 RN Exam 2893
## 12 RN Exam-retest 1494
## 13 RN International 5094
## 14 RN Reinstatement 1774
## 15 RN Renewal 36812
## 16 RN-Provisional Application 515
## 17 RN-Temp Endorsement 337
## 18 RN-Temp Exam 6
## 19 RN-Temp International 1
str(nm_state)
## tibble [57,033 × 16] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:57033] "Jaclyn, Nicole, Sugrue" "Nathan, Denson, Savage-Lee" "JANET, DIANE, TORRES" "PANISARA, , LIMTHAMROS" ...
## $ city : chr [1:57033] "Wilmington" "Carlisle" "Rio Rancho" "Riverside" ...
## $ state : chr [1:57033] "MA" "PA" "NM" "CA" ...
## $ duration : chr [1:57033] "FUll" "FUll" "FUll" "FUll" ...
## $ app_status : chr [1:57033] "Approved" "Approved" "Approved" "Approved" ...
## $ app_expiration_date: Date[1:57033], format: "2019-07-01" "2019-07-01" ...
## $ expiration_date : Date[1:57033], format: "2020-05-31" "2020-06-30" ...
## $ application_date : Date[1:57033], format: "2019-01-01" "2019-01-01" ...
## $ issue_date : Date[1:57033], format: "2019-01-14" "2019-02-20" ...
## $ month : chr [1:57033] "2019-01" "2019-02" "2019-01" "2019-03" ...
## $ year : chr [1:57033] "2019" "2019" "2019" "2019" ...
## $ process_time : 'difftime' num [1:57033] 13 50 0 76 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:57033] 13 50 0 76 ...
## ..- attr(*, "units")= chr "days"
## $ lic_type : chr [1:57033] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:57033] "Endorsement" "Endorsement" "Renewal" "Endorsement" ...
## $ data_state : chr [1:57033] "NM" "NM" "NM" "NM" ...
#12,000+ duplicates, but mostly renewals/reinstatements
dupe_numbers <- nm_state %>%
filter (!(app_type %in% c("Reinstatement", "Renewal"))) %>% #reduces dupes to 1201 names
filter (!is.na(process_time2)) %>% #pending apps will show up as NA - reduces to 48 names
count (name, lic_type, app_type) %>%
filter (n > 1) %>%
distinct (name) %>%
pull()
dupe_records <- nm_state %>%
filter (name %in% dupe_numbers) %>%
filter (!is.na(process_time)) #%>% #pending apps will show up as NA - reduces to 48 names
#Small number are real duplicates, but most of these have two issue dates and the same application date or vice-versa. I cannot determine which one is the real one, and it's less than 100 records, so I'll leave as is.
dupe_records %>%
distinct (name, lic_type, app_type, application_date, issue_date, .keep_all=T) %>%
arrange (name) #%>%
## # A tibble: 93 × 16
## name city state duration app_status app_expiration_… expiration_date
## <chr> <chr> <chr> <chr> <chr> <date> <date>
## 1 AIMIEDE, P… Las C… NM FUll Approved 2021-08-19 2022-12-31
## 2 AIMIEDE, P… Las C… NM FUll Approved 2021-08-19 2022-12-31
## 3 AMANDA, LO… Albuq… NM FUll Approved 2020-08-10 2022-03-31
## 4 AMANDA, LO… Albuq… NM FUll Approved 2020-08-10 2022-03-31
## 5 ANDREA, E.… Cedar… NM FUll Approved 2022-02-24 2022-12-31
## 6 ANDREA, E.… Cedar… NM FUll Approved 2022-02-24 2022-12-31
## 7 ASHLEY, , … Bonai… GA FUll Approved 2021-06-11 2021-10-26
## 8 ASHLEY, , … Bonai… GA FUll Approved 2021-06-11 2021-10-26
## 9 BENJAMIN, … Santa… NM FUll Approved 2022-01-22 2023-02-28
## 10 BENJAMIN, … Santa… NM FUll Approved 2022-01-22 2023-02-28
## # … with 83 more rows, and 9 more variables: application_date <date>,
## # issue_date <date>, month <chr>, year <chr>, process_time <drtn>,
## # process_time2 <drtn>, lic_type <chr>, app_type <chr>, data_state <chr>
# view
state <- read_excel("../state-data/NC-NPRApplicationDataV2.xlsx") %>%
clean_names()
#No records have both initial exam and initial endorsement, so can combine them into issue_date, which is empty for all files in V1. V2 has issue date column filled in.
state %>%
filter (!is.na(initial_exam) & !is.na(initial_endorsement))
## # A tibble: 0 × 18
## # … with 18 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <dttm>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <dttm>, expiration_date <dttm>
state %>%
filter (!is.na(issue_date))
## # A tibble: 459,634 × 18
## first_name middle_name last_name addr_city addr_state license_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Rachel Tilley Haynie Pilot Mountain NC RN
## 2 Shannon Marie Boughton Hillsborough NC RN
## 3 Molly Strickland Reece Lewisville, NC RN
## 4 Rachel Marie Norris Lincolnton NC RN
## 5 Marilyn Fern Martin Black Mountain NC RN
## 6 Denisse Feliciano Fisher Lillington NC RN
## 7 Taffey <NA> McClure-Boone Westerlo NY RN
## 8 Susan Eileen Kordek Cornelius NC RN
## 9 Betty Sikes King Rockingham NC RN
## 10 Kathy Laciel Leon LAS VEGAS NV RN
## # … with 459,624 more rows, and 12 more variables: license_duration <chr>,
## # app_type <chr>, initial_exam <dttm>, initial_endorsement <dttm>,
## # last_renewal <dttm>, last_reactivation <lgl>, last_reinstatment <dttm>,
## # app_sumbit_date <dttm>, doc_received <lgl>, license_status <chr>,
## # issue_date <dttm>, expiration_date <dttm>
#Temp licenses don't calculate
nc_temp <- state %>%
filter (app_type == "Temporary Request")
state2 <- state %>%
mutate (issue_date2 = as.Date(coalesce(initial_exam, initial_endorsement, issue_date)),
application_date = as.Date(format(app_sumbit_date, "%Y-%m-%d")),
issue_date = as.Date(format(issue_date, "%Y-%m-%d")), #original
last_renewal = as.Date(format(last_renewal, "%Y-%m-%d"))
#Use below for V1 data
#last_renewal=as.Date(as.numeric(last_renewal), origin = "1899-12-30")
) #%>% #Converts last renewal to date format
# Only 44 don't match when comparing initial_exam/initial_endorsement to issue_date column, including 20 exams
mismatch <- state2 %>%
filter (issue_date != issue_date2)
mismatch2 <- mismatch %>%
mutate (issue_date2 = case_when (
app_type %in% c("Renewal", "Competency") ~ coalesce(last_renewal,as.Date(format(last_reinstatment, "%Y-%m-%d"))),
app_type == "Exam" ~ issue_date,
app_type == "Reinstatement" ~ as.Date(format(last_reinstatment, "%Y-%m-%d"))))
mismatch2 %>%
filter (issue_date == issue_date2)
## # A tibble: 20 × 20
## first_name middle_name last_name addr_city addr_state license_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Folashade Adenike Lojede Rolesville NC RN
## 2 Baleigh Aneta Barker Morehead City NC RN
## 3 Alexa Marie Futrell Tacoma WA RN
## 4 April Diane Dattler Miami FL RN
## 5 Diandra Denise Molson Bunnlevel NC RN
## 6 Alick <NA> Kioroglo Matthews NC RN
## 7 Sally <NA> Leviner Fuquay Varina NC RN
## 8 Riley Kate Robinson Marion NC RN
## 9 Kierra Alanna Garrett Catawba NC RN
## 10 Keondra Diajah Brown Charlotte NC RN
## 11 Sabrina Janine Crowder Winston Salem NC RN
## 12 La'mia Jacole Baldwin Durham NC RN
## 13 Claudia Marie Franklin Stokesdale NC RN
## 14 Dorothy <NA> Ofosu Durham NC RN
## 15 Candice Nichole Wiley Charlotte NC RN
## 16 Melissa Rose Hackley Clayton NC RN
## 17 Randi <NA> Stallings Smithfield NC LPN
## 18 Alicia Lanette Sutton Greenville NC RN
## 19 Alicia Lanette Sutton Greenville NC RN
## 20 Melissa Rose Hackley Clayton NC RN
## # … with 14 more variables: license_duration <chr>, app_type <chr>,
## # initial_exam <dttm>, initial_endorsement <dttm>, last_renewal <date>,
## # last_reactivation <lgl>, last_reinstatment <dttm>, app_sumbit_date <dttm>,
## # doc_received <lgl>, license_status <chr>, issue_date <date>,
## # expiration_date <dttm>, issue_date2 <date>, application_date <date>
state2a <- state2 %>%
anti_join (mismatch) %>% #remove those whose issue_date doesn't match and replace with corrected dates
rbind (mismatch2)
# Check Diandra Denise Molson to see they've matched
#Issue_date and issue_date2 don't match
mismatch %>%
filter (first_name=="Diandra" & middle_name=="Denise" & last_name=="Molson") %>%
select (8:20)
## # A tibble: 2 × 13
## app_type initial_exam initial_endorsement last_renewal last_reactivati…
## <chr> <dttm> <dttm> <date> <lgl>
## 1 Exam 2020-01-13 00:00:00 NA 2021-06-22 NA
## 2 Renewal 2020-01-13 00:00:00 NA 2021-06-22 NA
## # … with 8 more variables: last_reinstatment <dttm>, app_sumbit_date <dttm>,
## # doc_received <lgl>, license_status <chr>, issue_date <date>,
## # expiration_date <dttm>, issue_date2 <date>, application_date <date>
#Issue_date and issue_date2 match up here
state2a %>%
filter (first_name=="Diandra" & middle_name=="Denise" & last_name=="Molson") %>%
select (8:20)
## # A tibble: 2 × 13
## app_type initial_exam initial_endorsement last_renewal last_reactivati…
## <chr> <dttm> <dttm> <date> <lgl>
## 1 Exam 2020-01-13 00:00:00 NA 2021-06-22 NA
## 2 Renewal 2020-01-13 00:00:00 NA 2021-06-22 NA
## # … with 8 more variables: last_reinstatment <dttm>, app_sumbit_date <dttm>,
## # doc_received <lgl>, license_status <chr>, issue_date <date>,
## # expiration_date <dttm>, issue_date2 <date>, application_date <date>
#Renewals are messy because they all have the most recent renewal's issue date instead of each renewal's issue date, so not going to include.
state3 <- state2a %>%
mutate (process_time = case_when (
app_type %in% c("Exam", "Endorsement", "Temporary Request") ~ issue_date2 - application_date,
app_type %in% c("Competency", "Renewal") ~ last_renewal - application_date))
#How many have negative processing times?
negatives <- state3 %>%
filter (process_time < 0)
#All negatives are competency or renewals
negatives %>% count (app_type)
## # A tibble: 2 × 2
## app_type n
## * <chr> <int>
## 1 Competency 3294
## 2 Renewal 470
#Pull out only Exam/Endorsement. It appears this is how nurses are first licensed, and then later renewals are entered as competency or renewal. See below for example
jones <- state2a %>% filter (first_name=="Kimberly" & middle_name=="Ann" & last_name=="Jones")
#Pull out permanent licenses by exam and endorsement only. Will do temporary licenses separately
state3 <- state2a %>%
filter (app_type %in% c("Exam", "Endorsement")) %>%
mutate (month = substr(issue_date, 1,7),
year = year(issue_date),
#Temps don't have issue date, so they're not even here, but keeping in case NC returns them
lic_type = case_when (
license_duration=="Temporary" ~ paste0(license_type, "-Temp"),
TRUE ~ license_type),
process_time = issue_date - application_date)
#How many of each type are here?
state3 %>% count (lic_type, app_type, license_duration, year)
## # A tibble: 16 × 5
## lic_type app_type license_duration year n
## <chr> <chr> <chr> <dbl> <int>
## 1 LPN Endorsement Permanent 2019 488
## 2 LPN Endorsement Permanent 2020 566
## 3 LPN Endorsement Permanent 2021 666
## 4 LPN Endorsement Permanent 2022 77
## 5 LPN Exam Permanent 2019 906
## 6 LPN Exam Permanent 2020 986
## 7 LPN Exam Permanent 2021 1081
## 8 LPN Exam Permanent 2022 114
## 9 RN Endorsement Permanent 2019 3948
## 10 RN Endorsement Permanent 2020 4473
## 11 RN Endorsement Permanent 2021 6001
## 12 RN Endorsement Permanent 2022 756
## 13 RN Exam Permanent 2019 4139
## 14 RN Exam Permanent 2020 5607
## 15 RN Exam Permanent 2021 6268
## 16 RN Exam Permanent 2022 754
#Any negatives? none
state3 %>%
filter (process_time < 0 )
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Any failed? none
state3 %>%
filter (is.na(process_time))
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Any mismatches?
state3 %>% filter (as.Date(format(app_sumbit_date, "%Y-%m-%d")) != application_date)
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Some nurses applied multiple times. There are 34,202 individuals - 2,026 have more than 1 record, meaning they re-tested or submitted multiple endorsement applications >> 1746 are exams + 280 are endorsement
state3 %>% distinct (first_name, middle_name, last_name, addr_city)
## # A tibble: 34,202 × 4
## first_name middle_name last_name addr_city
## <chr> <chr> <chr> <chr>
## 1 Rachnaben Tejashkumar Patel Spindale
## 2 Jessica <NA> Perez Monroe
## 3 Liza Marie Tank Raleigh
## 4 Joanne M Kuplicki Shallotte
## 5 Valerie Marie Ledgerwood Littleton
## 6 Bettie Shenia Coleman Bladenboro
## 7 Julianne Janelle Lopez Greensboro
## 8 Alicen Mary Gillis Wake Forest
## 9 Lorrie Ann Compton Nashville
## 10 Afiya <NA> Felix Fort Washington
## # … with 34,192 more rows
nc_multiples <- state3 %>%
group_by (first_name, middle_name, last_name, addr_city) %>%
mutate (count=n()) %>%
arrange (desc(count), last_name, first_name, addr_city, application_date)
#Are any of the same people's names listed under multiple cities? No
nc_multiples %>%
ungroup() %>%
distinct (first_name, middle_name, last_name, addr_city, .keep_all=T) %>%
filter (count > 1) %>%
count (first_name, middle_name, last_name) %>%
filter (n > 1)
## # A tibble: 0 × 4
## # … with 4 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # n <int>
#How many people have multiple records? 2026
nc_multiples %>%
ungroup() %>%
distinct (first_name, middle_name, last_name, addr_city, .keep_all=T) %>%
filter (count > 1) %>%
count (app_type)
## # A tibble: 2 × 2
## app_type n
## * <chr> <int>
## 1 Endorsement 280
## 2 Exam 1746
nc_multiples %>%
ungroup() %>%
filter (count > 1) %>%
group_by (first_name, middle_name, last_name, addr_city) %>%
mutate (count = n()) %>%
distinct (first_name, middle_name, last_name, addr_city, .keep_all=T) %>%
filter (count > 1)
## # A tibble: 2,026 × 25
## # Groups: first_name, middle_name, last_name, addr_city [2,026]
## first_name middle_name last_name addr_city addr_state license_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Charles I Esonenjo Riverview FL RN
## 2 Myriame <NA> Alexandre Octave Naples FL RN
## 3 Alberto Luis Costales-Rodriguez Greensboro NC RN
## 4 Jovan <NA> Walker Miami FL RN
## 5 Angela <NA> McCafferty Seminole FL RN
## 6 Lorine <NA> Suah Trenton NJ RN
## 7 Saraphina <NA> Casseus Orlando FL RN
## 8 Cristina <NA> Cebotari Hickory NC RN
## 9 Zoe Extha Karpee East Flat … NC RN
## 10 Nicole Emily Richiez Cameron NC RN
## # … with 2,016 more rows, and 19 more variables: license_duration <chr>,
## # app_type <chr>, initial_exam <dttm>, initial_endorsement <dttm>,
## # last_renewal <date>, last_reactivation <lgl>, last_reinstatment <dttm>,
## # app_sumbit_date <dttm>, doc_received <lgl>, license_status <chr>,
## # issue_date <date>, expiration_date <dttm>, issue_date2 <date>,
## # application_date <date>, month <chr>, year <dbl>, lic_type <chr>,
## # process_time <drtn>, count <int>
#We'll assume the latest application is the one that eventually got approved, so this keeps the final application's process_time
nc_multiples2 <- nc_multiples %>%
filter (count > 1) %>%
slice_tail()
#Create dataframe with all 1-record people (32176 people)
nc_multiples1 <- nc_multiples %>%
filter (count==1)
#Combine 1-record people with only 1 record from the 2026 multiple-record people
state4 <- rbind(nc_multiples1, nc_multiples2) %>%
select (-count)
#Now add in temporary requests -- many of these are repeats of people from the permanent license records above, so adding this separately
#Pull out temporary licenses only
state_temp <- state2a %>%
filter (license_duration=="Temporary") %>%
mutate (month = substr(issue_date, 1,7),
year = year(issue_date),
#Temps don't have issue date, so they're not even here, but keeping in case NC returns them
lic_type = case_when (
license_duration=="Temporary" ~ paste0(license_type, "-Temp"),
TRUE ~ license_type),
process_time = issue_date - application_date)
#How many of each type are here?
state_temp %>% count (lic_type, app_type, license_duration, year)
## # A tibble: 8 × 5
## lic_type app_type license_duration year n
## <chr> <chr> <chr> <dbl> <int>
## 1 LPN-Temp Temporary Request Temporary 2019 235
## 2 LPN-Temp Temporary Request Temporary 2020 275
## 3 LPN-Temp Temporary Request Temporary 2021 302
## 4 LPN-Temp Temporary Request Temporary 2022 25
## 5 RN-Temp Temporary Request Temporary 2019 1773
## 6 RN-Temp Temporary Request Temporary 2020 1813
## 7 RN-Temp Temporary Request Temporary 2021 2652
## 8 RN-Temp Temporary Request Temporary 2022 256
#Any negatives? none
state_temp %>%
filter (process_time < 0 )
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Any failed? none
state_temp %>%
filter (is.na(process_time))
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Any people have multiple records? Just two people who got both temp LPN and temp RN licenses
state_temp %>%
group_by (first_name, middle_name, last_name, addr_city) %>%
mutate (count=n()) %>%
filter (count > 1) %>%
arrange (desc(count), last_name, first_name, addr_city, application_date)
## # A tibble: 4 × 25
## # Groups: first_name, middle_name, last_name, addr_city [2]
## first_name middle_name last_name addr_city addr_state license_type
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Portia Laniece Harris Charlotte NC LPN
## 2 Portia Laniece Harris Charlotte NC RN
## 3 Leah Lynn Thompson Wilmington NC LPN
## 4 Leah Lynn Thompson Wilmington NC RN
## # … with 19 more variables: license_duration <chr>, app_type <chr>,
## # initial_exam <dttm>, initial_endorsement <dttm>, last_renewal <date>,
## # last_reactivation <lgl>, last_reinstatment <dttm>, app_sumbit_date <dttm>,
## # doc_received <lgl>, license_status <chr>, issue_date <date>,
## # expiration_date <dttm>, issue_date2 <date>, application_date <date>,
## # month <chr>, year <dbl>, lic_type <chr>, process_time <drtn>, count <int>
#Any mismatches?
state_temp %>% filter (as.Date(format(app_sumbit_date, "%Y-%m-%d")) != application_date)
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # addr_city <chr>, addr_state <chr>, license_type <chr>,
## # license_duration <chr>, app_type <chr>, initial_exam <dttm>,
## # initial_endorsement <dttm>, last_renewal <date>, last_reactivation <lgl>,
## # last_reinstatment <dttm>, app_sumbit_date <dttm>, doc_received <lgl>,
## # license_status <chr>, issue_date <date>, expiration_date <dttm>,
## # issue_date2 <date>, application_date <date>, month <chr>, year <dbl>, …
#Virtually all temporary licenses go to endorsement candidates
state4 %>%
semi_join (state_temp, by = c("first_name", "middle_name", "last_name", "addr_city", "addr_state")) %>% #keeps all exam/endorsement candidates also found in temp dataframe
ungroup() %>%
count (app_type)
## # A tibble: 2 × 2
## app_type n
## * <chr> <int>
## 1 Endorsement 6738
## 2 Exam 3
#Join permanent exam/endorsement licenses with temporary licenses issued
state5 <- state4 %>%
rbind (state_temp)
#Remove unformatted date fields and unnecessary fields
nc_state <- state5 %>%
mutate (expiration_date = as.Date(expiration_date),
data_state = "NC",
year = as.character(year)) %>%
select (-c(initial_exam, initial_endorsement, last_renewal, last_reactivation, last_reinstatment, app_sumbit_date, doc_received, license_type)) %>%
rename (duration = license_duration,
lic_status = license_status,
city = addr_city,
state = addr_state) %>%
ungroup()
nc_state %>%
count(lic_type, app_type)
## # A tibble: 6 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1737
## 2 LPN Exam 2661
## 3 LPN-Temp Temporary Request 837
## 4 RN Endorsement 14955
## 5 RN Exam 14849
## 6 RN-Temp Temporary Request 6494
rm (state2, state3, negatives, jones, nc_multiples2, nc_multiples1, nc_multiples)
glimpse(nc_state)
## Rows: 41,533
## Columns: 17
## $ first_name <chr> "Ameenah", "Ernesto", "Rossanna", "Nichole", "Rosalyn…
## $ middle_name <chr> "A.", NA, NA, "Catherine", "Grace", NA, "D.", NA, "M"…
## $ last_name <chr> "Aaram", "Aba-a", "Abad", "Abair", "Abankwah", "Abare…
## $ city <chr> "Carrboro", "Fayetteville", "Brooklyn", "Hardwick", "…
## $ state <chr> "NC", "NC", "NY", "MA", "NC", "NC", "NC", "VT", "NC",…
## $ duration <chr> "Permanent", "Permanent", "Permanent", "Permanent", "…
## $ app_type <chr> "Exam", "Endorsement", "Endorsement", "Endorsement", …
## $ lic_status <chr> "Active", "Active", "Active", "Active", "Active", "Ac…
## $ issue_date <date> 2021-03-05, 2021-07-01, 2021-09-02, 2020-02-14, 2020…
## $ expiration_date <date> 2022-04-30, 2022-12-31, 2023-03-31, 2023-11-30, 2024…
## $ issue_date2 <date> 2021-03-05, 2021-07-01, 2021-09-02, 2020-02-14, 2020…
## $ application_date <date> 2020-09-23, 2020-10-21, 2021-06-24, 2019-08-08, 2020…
## $ month <chr> "2021-03", "2021-07", "2021-09", "2020-02", "2020-07"…
## $ year <chr> "2021", "2021", "2021", "2020", "2020", "2021", "2019…
## $ lic_type <chr> "LPN", "RN", "RN", "RN", "LPN", "RN", "LPN", "RN", "R…
## $ process_time <drtn> 163 days, 253 days, 70 days, 190 days, 32 days, 35 d…
## $ data_state <chr> "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC", "NC",…
#Zero duplicates, so no removal necessary
dupe_numbers <- nc_state %>%
# filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
state <- read_csv("../state-data/OH-DAS Report - 9_28_2021-clean.csv",
col_types = cols(.default = "c", `App Submitted Date` = "D", `Issue Date` = "D", `Expiration Date` = "D")) %>%
clean_names()
#Calculate processing time
oh_state <- state %>%
mutate (application_date = as.Date(app_submitted_date),
issue_date = as.Date(issue_date),
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = case_when (
type == "Licensed Practical Nurse - Temporary (IP)" ~ "LPN-Temp",
type == "Registered Nurse - Temporary (IR)" ~ "RN-Temp",
type == "Licensed Practical Nurse (LPN)" ~ "LPN",
type == "Registered Nurse (RN)" ~ "RN"),
app_type = case_when (
str_detect(type, "Temporary") ~ "Endorsement",
licensed_by == "Reciprocity" ~ "Endorsement",
licensed_by == "Examination" ~ "Exam",
TRUE ~ "Unknown")) %>%
mutate (process_time = issue_date - application_date) #calculates for everything else that's been issued
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
oh_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 4 × 2
## status n
## <chr> <int>
## 1 In Review 12076
## 2 Closed 7808
## 3 Submitted 2396
## 4 Inactive 43
#Calculates how long those left pending have been pending as process_time2
oh_state <- oh_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("In Review", "Submitted"),
as.Date("2021-09-28") - application_date,
process_time)) %>% #Calculates how long those left pending have been pending
mutate (year = if_else (is.na(year), "Pending as of 09/28/21", as.character(year)),
data_state = "OH")
#Any mismatches?
oh_state %>% filter (as.Date(app_submitted_date) != application_date)
## # A tibble: 0 × 18
## # … with 18 variables: applicant_full_name <chr>, mailing_address_city <chr>,
## # mailing_address_state <chr>, type <chr>, licensed_by <chr>,
## # license_number <chr>, app_submitted_date <date>, status <chr>,
## # issue_date <date>, expiration_date <date>, application_date <date>,
## # month <chr>, year <chr>, lic_type <chr>, app_type <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
oh_state <- oh_state %>%
select (-c(type, licensed_by, app_submitted_date)) %>%
rename (name = applicant_full_name,
city = mailing_address_city,
state = mailing_address_state,
app_status = status)
oh_state %>%
#count(lic_type, licensed_by, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 7 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1698
## 2 LPN Exam 12557
## 3 LPN-Temp Endorsement 781
## 4 RN Endorsement 17347
## 5 RN Exam 36523
## 6 RN Unknown 1
## 7 RN-Temp Endorsement 10560
str(oh_state)
## tibble [79,467 × 15] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:79467] "Kristin Elizabeth Beall" "Takira Louise Collins" "Briane DaVite McKeithen" "Christina Marie Young" ...
## $ city : chr [1:79467] "North Little Rock" "Cincinnati" "Boardman" "AUBREY" ...
## $ state : chr [1:79467] "AR" "OH" "OH" "TX" ...
## $ license_number : chr [1:79467] "APP-000498881" "APP-000389633" "APP-000362611" "APP-000498886" ...
## $ app_status : chr [1:79467] "Submitted" "Submitted" "Submitted" "Submitted" ...
## $ issue_date : Date[1:79467], format: NA NA ...
## $ expiration_date : Date[1:79467], format: NA NA ...
## $ application_date: Date[1:79467], format: "2021-08-28" "2021-08-10" ...
## $ month : chr [1:79467] NA NA NA NA ...
## $ year : chr [1:79467] "Pending as of 09/28/21" "Pending as of 09/28/21" "Pending as of 09/28/21" "Pending as of 09/28/21" ...
## $ lic_type : chr [1:79467] "RN" "LPN" "LPN" "RN" ...
## $ app_type : chr [1:79467] "Endorsement" "Endorsement" "Exam" "Endorsement" ...
## $ process_time : 'difftime' num [1:79467] NA NA NA NA ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:79467] 31 49 13 51 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:79467] "OH" "OH" "OH" "OH" ...
#Zero duplicates - no removal necessary
dupe_numbers <- oh_state %>%
count (license_number) %>%
filter (n > 1) %>%
distinct (license_number) %>%
pull()
state <- read_csv ("../state-data/OK-Copy of NPR Final Data-clean4.csv",
col_types = cols(.default = "c", `Application submit Date` = "D", `Documents recieved` = "D", IssuanceDate = "D", ExpirationDate = "D") ) %>%
clean_names()
state %>% count (license_type_name, license_duration, application_type)
## # A tibble: 25 × 4
## license_type_name license_duration application_type n
## <chr> <chr> <chr> <int>
## 1 LPN NULL Endorsement 53
## 2 LPN NULL Initial - Exam 318
## 3 LPN NULL Other 1
## 4 LPN NULL Retest 109
## 5 LPN NULL Temporary 10
## 6 LPN Permanent Endorsement 86
## 7 LPN Permanent Inactive 59
## 8 LPN Permanent Initial - Exam 688
## 9 LPN Permanent Other 125
## 10 LPN Permanent Reinstatement 822
## # … with 15 more rows
#Calculate processing time
ok_state <- state %>%
mutate (application_date = as.Date(application_submit_date),
issue_date = as.Date(issuance_date),
docs_date = as.Date(documents_recieved),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
month = substr(issue_date, 1,7),
year = year(issue_date),
app_type = case_when (
application_type == "Retest" ~ "Exam-retest",
application_type == "Initial - Exam" ~ "Exam",
application_type == "Other" ~ "SSL to MSL",
application_type == "Temporary" ~ "Application",
TRUE ~ application_type)) %>%
mutate (lic_type = case_when (
application_type=="Temporary" ~ paste0(license_type_name, "-Temp"),
TRUE ~ license_type_name)) %>%
mutate (process_time = issue_date - application_date)
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
ok_state %>%
filter (is.na(process_time)) %>%
count (application_status) %>%
arrange (desc(n))
## # A tibble: 13 × 2
## application_status n
## <chr> <int>
## 1 Processing 1164
## 2 Approved-Retest 330
## 3 Retest 301
## 4 Void 271
## 5 Denied 40
## 6 Ready to issue 27
## 7 Expired 25
## 8 Closed 22
## 9 Ready to Renew 6
## 10 On Hold 3
## 11 Administrative Denial 1
## 12 Approved 1
## 13 Withdrawn 1
#Calculates how long those left pending have been pending as process_time2
ok_state <- ok_state %>%
mutate (process_time2 = if_else (is.na(process_time) & application_status %in% c("Processing", "Approved-Retest", "Retest", "Ready to issue", "Ready to Renew", "On Hold", "Approved"),
as.Date("2021-10-20") - application_date,
process_time)) %>% #Calculates how long those left pending have been pending
mutate (year = if_else (is.na(year), "Pending as of 10/20/21", as.character(year)),
data_state = "OK")
#Any mismatches?
ok_state %>% filter (as.Date(application_submit_date) != application_date)
## # A tibble: 0 × 25
## # … with 25 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state_province <chr>, license_type_name <chr>,
## # license_duration <chr>, application_type <chr>,
## # application_submit_date <date>, documents_recieved <date>,
## # application_status <chr>, issuance_date <date>, expiration_date <date>,
## # application_date <date>, issue_date <date>, docs_date <date>,
## # gather_time <drtn>, docs_time <drtn>, month <chr>, year <chr>, …
#Remove unformatted date fields and unnecessary fields
ok_state <- ok_state %>%
select (-c(application_type, application_submit_date, issuance_date, documents_recieved)) %>%
rename (state = state_province,
duration = license_duration,
app_status = application_status)
ok_state %>%
count(lic_type, app_type)
## # A tibble: 16 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 139
## 2 LPN Exam 1006
## 3 LPN Exam-retest 187
## 4 LPN Inactive 59
## 5 LPN Reinstatement 822
## 6 LPN Renewal 11028
## 7 LPN SSL to MSL 126
## 8 LPN-Temp Application 10
## 9 RN Endorsement 988
## 10 RN Exam 2064
## 11 RN Exam-retest 551
## 12 RN Inactive 50
## 13 RN Reinstatement 504
## 14 RN Renewal 9112
## 15 RN SSL to MSL 1356
## 16 RN-Temp Application 58
str(ok_state)
## tibble [28,060 × 21] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:28060] "TERESA" "DEBORAH" "SANDRA" "ROBERT" ...
## $ middle_name : chr [1:28060] "YOUNGBLOOD" "RENE" "KAY" "COLIN" ...
## $ last_name : chr [1:28060] "OLDHAM" "MCCOY" "KING" "WOOD" ...
## $ city : chr [1:28060] "CHOCTAW" "TULSA" "OKLAHOMA CITY" "FT SUPPLY" ...
## $ state : chr [1:28060] "OK" "OK" "OK" "OK" ...
## $ license_type_name: chr [1:28060] "RN" "RN" "RN" "RN" ...
## $ duration : chr [1:28060] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ app_status : chr [1:28060] "Approved" "Approved" "Approved" "Approved" ...
## $ expiration_date : Date[1:28060], format: "2022-10-31" "2022-10-31" ...
## $ application_date : Date[1:28060], format: "2020-10-01" "2020-10-01" ...
## $ issue_date : Date[1:28060], format: "2020-11-01" "2020-11-01" ...
## $ docs_date : Date[1:28060], format: "2020-11-01" "2020-11-01" ...
## $ gather_time : 'difftime' num [1:28060] 31 31 31 31 ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:28060] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ month : chr [1:28060] "2020-11" "2020-11" "2020-11" "2020-11" ...
## $ year : chr [1:28060] "2020" "2020" "2020" "2020" ...
## $ app_type : chr [1:28060] "Renewal" "Renewal" "Renewal" "Renewal" ...
## $ lic_type : chr [1:28060] "RN" "RN" "RN" "RN" ...
## $ process_time : 'difftime' num [1:28060] 31 31 31 31 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:28060] 31 31 31 31 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:28060] "OK" "OK" "OK" "OK" ...
#These 360 records have duplicates -
dupe_numbers <- ok_state %>%
filter (!is.na(process_time)) %>% #removes pending/null duration
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#Leaves only renewal, no removal necessary
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 1 × 2
## app_type n
## <chr> <int>
## 1 Renewal 360
state <- read_excel("../state-data/OR-NPR chart 10-1-21.xlsx") %>% clean_names()
state2 <- state %>%
arrange (last_name, middle_name, first_name, city, state, application_type, desc(apps_after_1_1_19)) %>%
group_by (last_name, middle_name, first_name, city, state, application_type) %>%
mutate (count = n(),
id = row_number()) #adds row numbers for each grouping so I can keep only most recent renewal
#20,169 renewal applications doubled with wrong date
state2 %>%
filter (count > 1 & str_detect(application_type, "^Renewal")) %>%
arrange (last_name, middle_name, first_name, city, state, application_type, desc(apps_after_1_1_19))
## # A tibble: 40,339 × 16
## # Groups: last_name, middle_name, first_name, city, state, application_type
## # [20,169]
## last_name middle_name first_name city state license_type application_type
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 A Marie Kelly Eugene OR LPN Renewal LPN - By…
## 2 A Marie Kelly Eugene OR LPN Renewal LPN - By…
## 3 Aaker Katherine Jessica Portla… OR RN Renewal RN - By …
## 4 Aaker Katherine Jessica Portla… OR RN Renewal RN - By …
## 5 Aamaas <NA> Theresa Milwau… OR RN Renewal RN - By …
## 6 Aamaas <NA> Theresa Milwau… OR RN Renewal RN - By …
## 7 Aamodt Lynn Cheryl Forest… OR RN Renewal RN - By …
## 8 Aamodt Lynn Cheryl Forest… OR RN Renewal RN - By …
## 9 Aamodt Michelle Hannah Hubbard OR RN Renewal RN - By …
## 10 Aamodt Michelle Hannah Hubbard OR RN Renewal RN - By …
## # … with 40,329 more rows, and 9 more variables:
## # initial_application_submitted <dttm>,
## # initial_required_documents_received <dttm>, license_status <chr>,
## # license_issue_date <dttm>, renewal_date <dttm>,
## # license_expiration_date <dttm>, apps_after_1_1_19 <dttm>, count <int>,
## # id <int>
# ungroup() %>%
# count (application_type) %>%
# arrange (desc(n))
state2 %>%
filter (count > 1 & str_detect(application_type, "^Renewal")) %>%
group_by (last_name, middle_name, first_name, application_type, renewal_date) %>%
mutate (unique_renewal_dates = unique(renewal_date))
## # A tibble: 40,339 × 17
## # Groups: last_name, middle_name, first_name, application_type, renewal_date
## # [20,170]
## last_name middle_name first_name city state license_type application_type
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 A Marie Kelly Eugene OR LPN Renewal LPN - By…
## 2 A Marie Kelly Eugene OR LPN Renewal LPN - By…
## 3 Aaker Katherine Jessica Portla… OR RN Renewal RN - By …
## 4 Aaker Katherine Jessica Portla… OR RN Renewal RN - By …
## 5 Aamaas <NA> Theresa Milwau… OR RN Renewal RN - By …
## 6 Aamaas <NA> Theresa Milwau… OR RN Renewal RN - By …
## 7 Aamodt Lynn Cheryl Forest… OR RN Renewal RN - By …
## 8 Aamodt Lynn Cheryl Forest… OR RN Renewal RN - By …
## 9 Aamodt Michelle Hannah Hubbard OR RN Renewal RN - By …
## 10 Aamodt Michelle Hannah Hubbard OR RN Renewal RN - By …
## # … with 40,329 more rows, and 10 more variables:
## # initial_application_submitted <dttm>,
## # initial_required_documents_received <dttm>, license_status <chr>,
## # license_issue_date <dttm>, renewal_date <dttm>,
## # license_expiration_date <dttm>, apps_after_1_1_19 <dttm>, count <int>,
## # id <int>, unique_renewal_dates <dttm>
state2 %>%
filter (count > 1 & str_detect(application_type, "^Renewal")) %>%
group_by (last_name, middle_name, first_name, city, state, application_type) %>%
distinct (renewal_date, .keep_all=T) %>%
count () %>%
arrange (desc(n))
## # A tibble: 20,169 × 7
## # Groups: last_name, middle_name, first_name, city, state, application_type
## # [20,169]
## last_name middle_name first_name city state application_type n
## <chr> <chr> <chr> <chr> <chr> <chr> <int>
## 1 Wilson Ann Julie Pendleton OR Renewal RN - By Ap… 2
## 2 A Marie Kelly Eugene OR Renewal LPN - By A… 1
## 3 Aaker Katherine Jessica Portland OR Renewal RN - By Ap… 1
## 4 Aamaas <NA> Theresa Milwaukie OR Renewal RN - By Ap… 1
## 5 Aamodt Lynn Cheryl Forest Grove OR Renewal RN - By Ap… 1
## 6 Aamodt Michelle Hannah Hubbard OR Renewal RN - By Ap… 1
## 7 Aaron Grace Charlotte Lebanon OR Renewal RN - By Ap… 1
## 8 Aaron James Hunt Thomas Keizer OR Renewal RN - By Ap… 1
## 9 Aarts Nicole Danielle Palm Desert CA Renewal RN - By Ap… 1
## 10 Aase Packard Victoria Kelso WA Renewal RN - By Ap… 1
## # … with 20,159 more rows
state2 <- state2 %>%
filter (!(str_detect(application_type, "^Renewal") & count > 1 & id > 1)) #removes incorrect, earlier renewal rows
#New dataframe should have 90443 rows if these 20169 wrong renewals are removed
nrow (state) - 20169
## [1] 90443
#Check to see what dates initial licenses and their renewals have
state2 %>%
ungroup() %>%
count (last_name, first_name, middle_name, city, state) %>%
arrange (desc(n))
## # A tibble: 82,343 × 6
## last_name first_name middle_name city state n
## <chr> <chr> <chr> <chr> <chr> <int>
## 1 Ambroson Diana Jean Eugene OR 3
## 2 Amelung Alexandra <NA> Eugene OR 3
## 3 Andrews Saoirse <NA> Bend OR 3
## 4 Arnett Eleanor Julia Silverton OR 3
## 5 Arteaga Samantha Arielle Portland OR 3
## 6 Basilin Kimberly Renee Salem OR 3
## 7 Bass Alyssa Ann Marie Salem OR 3
## 8 Baszler Chelsea Marie Springfield OR 3
## 9 Becker Alexandra Maria Beaverton OR 3
## 10 Behnke Daisy <NA> Gresham OR 3
## # … with 82,333 more rows
#Check Ambroson Diana to see they're the same
#Calculate processing time
or_state <- state2 %>%
ungroup() %>%
#Coalesce initial application and renewals into one column since they're repeated
mutate (application_date = if_else (str_detect(application_type, "^Renewal")|str_detect(application_type, "^Reactivation"),
as.Date(apps_after_1_1_19),
as.Date(initial_application_submitted)),
docs_date = as.Date(initial_required_documents_received),
issue_date = if_else (str_detect(application_type, "^Renewal")|str_detect(application_type, "^Reactivation"),
as.Date(renewal_date),
as.Date(license_issue_date)),
#renewal_date = as.Date(renewal_date),
license_expiration_date = as.Date(license_expiration_date),
#apps_after_1_1_19 = as.Date(apps_after_1_1_19),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
#month = if_else (str_detect(application_type, "^Renewal")|str_detect(application_type, "^Reactivation"),
# substr(renewal_date, 1,7),
# substr(initial_application_submitted, 1,7)),
#year = if_else (str_detect(application_type, "^Renewal")|str_detect(application_type, "^Reactivation"),
# year(renewal_date),
# year(initial_application_submitted)),
app_type = case_when (
str_detect(application_type, "Endorsement - Foreign Educated") ~ "Endorsement (foreign)",
str_detect(application_type, "Endorsement$") ~ "Endorsement (US)",
str_detect(application_type, "Examination$") ~ "Exam (US)",
str_detect(application_type, "Exam - Foreign Educated") ~ "Exam (foreign)",
str_detect(application_type, "^Reactivation") ~ "Reinstatement",
str_detect(application_type, "^Renewal") ~ "Renewal",
TRUE ~ "UNKNOWN")) %>%
rename (lic_type = license_type) %>%
mutate (process_time = #if_else (str_detect(application_type, "^Renewal")|str_detect(application_type, "^Reactivation"),
# (renewal_date - apps_after_1_1_19), #<< use this for Renewal LPN, Renewal RN
# (license_issue_date - initial_application_submitted) )
issue_date - application_date,
data_state = "OR") #<< use this for new applications
#Any mismatches? Should only be renewals/reinstatements
or_state %>% filter (as.Date(initial_application_submitted) != application_date) %>%
count (application_type)
## # A tibble: 4 × 2
## application_type n
## * <chr> <int>
## 1 Reactivation LPN - By Reactivation / Reinstatement 159
## 2 Reactivation RN - By Reactivation / Reinstatement 1688
## 3 Renewal LPN - By Application 4608
## 4 Renewal RN - By Application 47349
or_state %>% filter (as.Date(issue_date) != license_issue_date) %>%
count (application_type)
## # A tibble: 4 × 2
## application_type n
## * <chr> <int>
## 1 Reactivation LPN - By Reactivation / Reinstatement 180
## 2 Reactivation RN - By Reactivation / Reinstatement 1965
## 3 Renewal LPN - By Application 5265
## 4 Renewal RN - By Application 60906
#Remove unformatted date fields and unnecessary fields
or_state <- or_state %>%
select (-c(application_type, initial_application_submitted, count, id, initial_application_submitted, renewal_date, apps_after_1_1_19, initial_required_documents_received, license_issue_date)) %>%
rename (expiration_date = license_expiration_date,
lic_status = license_status)
or_state %>%
#count(lic_type, application_type, app_type) #check for mistakes with renewal_app field
count(lic_type, app_type)
## # A tibble: 12 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement (foreign) 5
## 2 LPN Endorsement (US) 709
## 3 LPN Exam (foreign) 2
## 4 LPN Exam (US) 826
## 5 LPN Reinstatement 191
## 6 LPN Renewal 5268
## 7 RN Endorsement (foreign) 245
## 8 RN Endorsement (US) 15455
## 9 RN Exam (foreign) 79
## 10 RN Exam (US) 4604
## 11 RN Reinstatement 2123
## 12 RN Renewal 60935
glimpse(or_state)
## Rows: 90,442
## Columns: 18
## $ last_name <chr> "A", "Aaberg", "Aaker", "Aalseth", "Aamaas", "Aamaas"…
## $ middle_name <chr> "Marie", "Carolyn", "Katherine", "Michelle", "Jakob",…
## $ first_name <chr> "Kelly", "Aunaliese", "Jessica", "Erin", "Ola", "Ther…
## $ city <chr> "Eugene", "Newberg", "Portland", "Vancouver", "Milwau…
## $ state <chr> "OR", "OR", "OR", "WA", "OR", "OR", "OR", "OR", "IN",…
## $ lic_type <chr> "LPN", "RN", "RN", "RN", "LPN", "RN", "RN", "RN", "RN…
## $ lic_status <chr> "Active", "Active", "Active", "Active", "Expired", "A…
## $ expiration_date <date> 2023-01-10, 2023-03-25, 2023-02-20, 2022-12-15, 2017…
## $ application_date <date> 2020-12-26, 2021-02-16, 2020-12-01, 2020-09-17, 2019…
## $ docs_date <date> 2016-05-31, 2021-05-20, 2012-05-20, 2018-10-03, 2012…
## $ issue_date <date> 2020-12-29, 2021-05-20, 2020-12-03, 2020-09-18, 2015…
## $ gather_time <drtn> -1670 days, 93 days, -3117 days, -715 days, -2661 da…
## $ docs_time <drtn> 1673 days, 0 days, 3119 days, 716 days, 943 days, 31…
## $ month <chr> "2020-12", "2021-05", "2020-12", "2020-09", "2015-03"…
## $ year <chr> "2020", "2021", "2020", "2020", "2015", "2021", "2021…
## $ app_type <chr> "Renewal", "Exam (US)", "Renewal", "Renewal", "Reinst…
## $ process_time <drtn> 3 days, 93 days, 2 days, 1 days, -1718 days, 2 days,…
## $ data_state <chr> "OR", "OR", "OR", "OR", "OR", "OR", "OR", "OR", "OR",…
#These 52 records have duplicates -
dupe_numbers <- or_state %>%
filter (!is.na(process_time)) %>% #removes pending
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#Leaves only reinstatement, no removal necessary
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 1 × 2
## app_type n
## <chr> <int>
## 1 Reinstatement 52
sheets1 <- excel_sheets(path = "../state-data/PA/PA-RN APPLICATIONS.xlsx")
sheets2 <- excel_sheets(path = "../state-data/PA/PA-LPN APPLICATIONS.xlsx")
state <- tibble()
for (i in sheets1){
#i <- "Q1 21"
step_df <-read_excel("../state-data/PA/PA-RN APPLICATIONS.xlsx", sheet = i) %>%
clean_names()
state <- rbind (state, step_df)
}
for (i in sheets2){
#i <- "Q1 21"
step_df <-read_excel("../state-data/PA/PA-LPN APPLICATIONS.xlsx", sheet = i) %>%
clean_names()
state <- rbind (state, step_df)
}
#pa_rn <- read_excel("../state-data/PA/PA-RN APPLICATIONS.xlsx") %>% clean_names()
#pa_rn_renew <- read_excel("../state-data/PA/PA-RN RENEWALS.xlsx") %>% clean_names()
#pa_lpn <- read_excel("../state-data/PA/PA-LPN APPLICATIONS.xlsx") %>% clean_names()
#pa_lpn_renew <- read_excel("../state-data/PA/PA-LPN RENEWALS.xlsx") %>% clean_names()
str(state)
## tibble [83,888 × 12] (S3: tbl_df/tbl/data.frame)
## $ profession : chr [1:83888] "Nursing" "Nursing" "Nursing" "Nursing" ...
## $ application_license_type: chr [1:83888] "Graduate Registered Nurse Permit" "Registered Nurse" "Registered Nurse" "Registered Nurse" ...
## $ application_type : chr [1:83888] "Initial" "Initial" "Initial" "Initial" ...
## $ obtained_by : chr [1:83888] "Application" "Endorsement" "Endorsement" "Endorsement" ...
## $ full_name : chr [1:83888] "MARIA BROWNFIELD" "ANNA WALLACE" "BROOKE ALETHA ARNOLD" "CELESTE M TRIPICIAN" ...
## $ city : chr [1:83888] "Carnegie" "WILMINGTON" "CHARDON" "LOXAHATCHEE" ...
## $ state : chr [1:83888] "PENNSYLVANIA" "Delaware" "OHIO" "FLORIDA" ...
## $ submitted_date : POSIXct[1:83888], format: "2019-01-01" "2019-01-01" ...
## $ application_status : chr [1:83888] "License Issued" "License Issued" "License Issued" "Expired" ...
## $ license_type_issued : chr [1:83888] "Graduate Registered Nurse Permit" "Registered Nurse" "Registered Nurse" "Registered Nurse Temporary Permit" ...
## $ license_issue_date : POSIXct[1:83888], format: "2019-01-23" "2019-02-13" ...
## $ license_expiration_date : POSIXct[1:83888], format: "2020-01-23" "2022-04-30" ...
#Looking for various types
#application_license_type: grad LPN; grad RN; LPN; RN; & very few of these: LPN-Temp; RN-Temp
#application_type: above threshold (202); below threshold (753); Initial; reactivation
state %>%
#filter (application_type %in% c("Below Threshold", "Above Threshold")) %>%
count (application_license_type, application_type) #, application_type, obtained_by
## # A tibble: 13 × 3
## application_license_type application_type n
## <chr> <chr> <int>
## 1 Graduate Practical Nurse Permit Initial 1112
## 2 Graduate Registered Nurse Permit Initial 3059
## 3 Practical Nurse Above Threshold 173
## 4 Practical Nurse Below Threshold 2358
## 5 Practical Nurse Initial 10548
## 6 Practical Nurse ReActivation 341
## 7 Practical Nurse Temporary Permit Initial 52
## 8 Registered Nurse Above Threshold 1394
## 9 Registered Nurse Below Threshold 6334
## 10 Registered Nurse Initial 57257
## 11 Registered Nurse ReActivation 953
## 12 Registered Nurse Temporary Permit Initial 285
## 13 Volunteer Registered Nurse Initial 22
state %>%
summarize (submission = range(submitted_date),
issuance = range(license_issue_date, na.rm=T))
## # A tibble: 2 × 2
## submission issuance
## <dttm> <dttm>
## 1 2019-01-01 00:00:00 1953-12-31 00:00:00
## 2 2021-09-25 00:00:00 2021-09-24 00:00:00
#Calculate processing time
pa_state <- state %>%
ungroup() %>%
mutate (application_date = as.Date(submitted_date),
issue_date = as.Date(license_issue_date),
expiration_date = as.Date(license_expiration_date),
month = substr(issue_date, 1,7),
year = year(issue_date),
app_type = case_when (
application_type %in% c("Above Threshold", "Below Threshold", "ReActivation") ~ "Reinstatement", #all negative and get removed
#str_detect(obtained_by, "Examination - Out of State Grad") ~ "Exam (non-PA)",
#str_detect(obtained_by, "Examination - PA") ~ "Exam (PA)",
#str_detect(obtained_by, "Examination") ~ "Exam (PA)",
str_detect(obtained_by, "Examination") ~ "Exam",
str_detect(obtained_by, "Endorsement") ~ "Endorsement", #Only a few are specified PA or foreign, so combining all
TRUE ~ obtained_by),
lic_type = case_when (
is.na (license_type_issued) & application_license_type == "Graduate Practical Nurse Permit" ~ "LPN-Provisional",
is.na (license_type_issued) & application_license_type == "Graduate Registered Nurse Permit" ~ "RN-Provisional",
is.na (license_type_issued) & application_license_type == "Practical Nurse Temporary Permit" ~ "LPN-Temp",
is.na (license_type_issued) & application_license_type == "Registered Nurse Temporary Permit" ~ "RN-Temp",
is.na (license_type_issued) & application_license_type == "Registered Nurse" ~ "RN",
is.na (license_type_issued) & application_license_type == "Practical Nurse" ~ "LPN",
is.na (license_type_issued) & application_license_type == "Volunteer Registered Nurse" ~ "Volunteer RN",
license_type_issued == "Practical Nurse" ~ "LPN",
license_type_issued == "Registered Nurse" ~ "RN",
license_type_issued == "Graduate Practical Nurse Permit" ~ "LPN-Provisional",
license_type_issued == "Graduate Registered Nurse Permit" ~ "RN-Provisional",
license_type_issued == "Practical Nurse Temporary Permit" ~ "LPN-Temp",
license_type_issued == "Registered Nurse Temporary Permit" ~ "RN-Temp",
license_type_issued == "Volunteer Registered Nurse" ~ "Volunteer RN")) %>%
mutate (process_time = issue_date - application_date )
#Check status for those that couldn't calculate process_time to find those actively pending, and set them to calculate pending time as process_time2
pa_state %>%
filter (is.na(process_time)) %>%
count (application_status) %>%
arrange (desc(n))
## # A tibble: 11 × 2
## application_status n
## <chr> <int>
## 1 DISCREPANCY 4356
## 2 Submitted 1248
## 3 Expired 1206
## 4 Withdrawn 394
## 5 Duplicate 338
## 6 Null And Void 109
## 7 Denied 14
## 8 License Issued 11
## 9 Duplicate License 1
## 10 Pending Review 1
## 11 Temporary Permit Issued 1
#Calculates how long those left pending have been pending as process_time2
pa_state <- pa_state %>%
mutate (process_time2 = if_else (is.na(process_time) & application_status %in% c("DISCREPANCY", "Submitted", "License Issued", "Pending Review"),
as.Date("2021-09-25") - application_date,
process_time)) %>%
mutate (year = if_else (is.na(year), "Pending as of 09/25/21", as.character(year)),
data_state = "PA",
app_type = if_else (lic_type %in% c("LPN", "RN") & app_type=="Application", "Unknown", app_type)) #Fix provisional/temp licenses obtained_by "application"
#Any mismatches? Should only be renewals/reinstatements
pa_state %>% filter (as.Date(submitted_date) != application_date)
## # A tibble: 0 × 22
## # … with 22 variables: profession <chr>, application_license_type <chr>,
## # application_type <chr>, obtained_by <chr>, full_name <chr>, city <chr>,
## # state <chr>, submitted_date <dttm>, application_status <chr>,
## # license_type_issued <chr>, license_issue_date <dttm>,
## # license_expiration_date <dttm>, application_date <date>, issue_date <date>,
## # expiration_date <date>, month <chr>, year <chr>, app_type <chr>,
## # lic_type <chr>, process_time <drtn>, process_time2 <drtn>, …
pa_state %>% filter (as.Date(license_issue_date) != issue_date)
## # A tibble: 0 × 22
## # … with 22 variables: profession <chr>, application_license_type <chr>,
## # application_type <chr>, obtained_by <chr>, full_name <chr>, city <chr>,
## # state <chr>, submitted_date <dttm>, application_status <chr>,
## # license_type_issued <chr>, license_issue_date <dttm>,
## # license_expiration_date <dttm>, application_date <date>, issue_date <date>,
## # expiration_date <date>, month <chr>, year <chr>, app_type <chr>,
## # lic_type <chr>, process_time <drtn>, process_time2 <drtn>, …
#Remove unformatted date fields and unnecessary fields
pa_state <- pa_state %>%
select (-c(profession, application_license_type, application_type, submitted_date, license_issue_date, license_expiration_date, license_type_issued, obtained_by)) %>%
rename (name = full_name,
app_status = application_status)
pa_state %>%
count (lic_type, app_type, year)
## # A tibble: 177 × 4
## lic_type app_type year n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement 2019 285
## 2 LPN Endorsement 2020 311
## 3 LPN Endorsement 2021 234
## 4 LPN Endorsement Pending as of 09/25/21 460
## 5 LPN Exam 2019 1459
## 6 LPN Exam 2020 1653
## 7 LPN Exam 2021 1478
## 8 LPN Exam Pending as of 09/25/21 1045
## 9 LPN Reinstatement 1959 1
## 10 LPN Reinstatement 1961 1
## # … with 167 more rows
#count(lic_type, application_type, app_type) #check for mistakes with renewal_app field
# filter (app_type == "Application") %>%
# count (application_type, license_type_issued, application_license_type, obtained_by)
str(pa_state)
## tibble [83,888 × 14] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:83888] "MARIA BROWNFIELD" "ANNA WALLACE" "BROOKE ALETHA ARNOLD" "CELESTE M TRIPICIAN" ...
## $ city : chr [1:83888] "Carnegie" "WILMINGTON" "CHARDON" "LOXAHATCHEE" ...
## $ state : chr [1:83888] "PENNSYLVANIA" "Delaware" "OHIO" "FLORIDA" ...
## $ app_status : chr [1:83888] "License Issued" "License Issued" "License Issued" "Expired" ...
## $ application_date: Date[1:83888], format: "2019-01-01" "2019-01-01" ...
## $ issue_date : Date[1:83888], format: "2019-01-23" "2019-02-13" ...
## $ expiration_date : Date[1:83888], format: "2020-01-23" "2022-04-30" ...
## $ month : chr [1:83888] "2019-01" "2019-02" "2019-04" "2019-01" ...
## $ year : chr [1:83888] "2019" "2019" "2019" "2019" ...
## $ app_type : chr [1:83888] "Application" "Endorsement" "Endorsement" "Endorsement" ...
## $ lic_type : chr [1:83888] "RN-Provisional" "RN" "RN" "RN-Temp" ...
## $ process_time : 'difftime' num [1:83888] 22 43 106 1 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:83888] 22 43 106 1 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:83888] "PA" "PA" "PA" "PA" ...
#pa_state3 %>%
# count (application_type, license_type_issued, application_license_type, obtained_by)
#pa_state2 %>%
#filter (application_license_type=="Practical Nurse" & license_type_issued=="Graduate Practical Nurse Permit")
# filter (application_license_type=="Registered Nurse" & license_type_issued=="Registered Nurse" & obtained_by=="Application") #%>%
# count (application_type)
#These 741 records have duplicates -
dupe_numbers <- pa_state %>%
filter (!is.na(process_time)) %>% #removes pending/null duration
group_by (name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (name, city, lic_type, app_type)
#737 are reinstatements
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 2 × 2
## app_type n
## <chr> <int>
## 1 Reinstatement 737
## 2 Endorsement 4
#Only two endorsements are duplicated. Cannot determine which is correct, so will leave.
dupe_numbers <- dupe_numbers %>%
filter (process_time2 > -1)
state <- read_excel("../state-data/SC-Nursing FOIA Request RESPONSE.xlsx"#,
#skip=1,
#col_types = c("text", "text", "text", "text", "text", "text", "text", "text", "date", "date", "text","date", "date")
) %>%
clean_names() %>%
mutate (license_expiration_date = ymd (license_expiration_date))
#Calculate processing time
sc_state <- state %>%
filter (license_issue_date > as.Date("2018-12-31")) %>% #Keeps only 18209 licenses issued 2019-2021
filter (application_type %in% c("Application By reciprocity/endorsement", "Application By Exam")) %>% #removes renewals, which will calculate wrong, and 40 reinstatements
mutate (application_date = as.Date(initial_application_date),
issue_date = as.Date(license_issue_date),
docs_date = as.Date(required_documents_received),
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
lic_type = if_else (license_duration == "Temporary",
paste0(license_type, "-Temp"),
license_type),
app_type = case_when (
str_detect (application_type, "Exam") ~ "Exam",
str_detect (application_type, "endorse") ~ "Endorsement")) %>%
mutate (process_time = issue_date - application_date,
data_state = "SC")
#Any mismatches?
sc_state %>% filter (as.Date(initial_application_date) != application_date)
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state_code <chr>, license_type <chr>, license_duration <chr>,
## # application_type <chr>, initial_application_date <dttm>,
## # required_documents_received <dttm>, license_status <chr>,
## # license_issue_date <dttm>, license_expiration_date <date>,
## # application_date <date>, issue_date <date>, docs_date <date>,
## # gather_time <drtn>, docs_time <drtn>, month <chr>, year <chr>, …
sc_state %>% filter (as.Date(license_issue_date) != issue_date)
## # A tibble: 0 × 24
## # … with 24 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state_code <chr>, license_type <chr>, license_duration <chr>,
## # application_type <chr>, initial_application_date <dttm>,
## # required_documents_received <dttm>, license_status <chr>,
## # license_issue_date <dttm>, license_expiration_date <date>,
## # application_date <date>, issue_date <date>, docs_date <date>,
## # gather_time <drtn>, docs_time <drtn>, month <chr>, year <chr>, …
#Remove unformatted date fields and unnecessary fields
sc_state <- sc_state %>%
select (-c(application_type, license_type, initial_application_date, required_documents_received, license_issue_date)) %>%
rename (state = state_code,
duration = license_duration,
lic_status = license_status,
expiration_date = license_expiration_date)
sc_state %>%
count (lic_type, app_type)
## # A tibble: 8 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 757
## 2 LPN Exam 1469
## 3 LPN-Temp Endorsement 40
## 4 LPN-Temp Exam 19
## 5 RN Endorsement 6744
## 6 RN Exam 6509
## 7 RN-Temp Endorsement 293
## 8 RN-Temp Exam 42
#count(lic_type, application_type, app_type) #check they've all coded correctly field
str(sc_state)
## tibble [15,873 × 19] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:15873] "NIVIA" "FRANITRA" "JANESIA" "TERESA" ...
## $ middle_name : chr [1:15873] "SHAYLA" "QUTAVIA" NA "A." ...
## $ last_name : chr [1:15873] "GREEN" "HILL" "GLOVER" "MAGRINI" ...
## $ city : chr [1:15873] "COLUMBIA" "ROCK HILL" "ORANGEBURG" "BELVIDERE" ...
## $ state : chr [1:15873] "SC" "SC" "SC" "NJ" ...
## $ duration : chr [1:15873] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ lic_status : chr [1:15873] "ACTIVE" "ACTIVE" "ACTIVE" "ACTIVE" ...
## $ expiration_date : Date[1:15873], format: "2022-04-30" "2022-04-30" ...
## $ application_date: Date[1:15873], format: "2020-06-11" "2020-10-12" ...
## $ issue_date : Date[1:15873], format: "2020-11-04" "2020-11-20" ...
## $ docs_date : Date[1:15873], format: "2020-11-04" "2020-11-20" ...
## $ gather_time : 'difftime' num [1:15873] 146 39 30 49 ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:15873] 0 0 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ month : chr [1:15873] "2020-11" "2020-11" "2020-09" "2021-06" ...
## $ year : chr [1:15873] "2020" "2020" "2020" "2021" ...
## $ lic_type : chr [1:15873] "RN" "LPN" "LPN" "RN" ...
## $ app_type : chr [1:15873] "Exam" "Exam" "Exam" "Endorsement" ...
## $ process_time : 'difftime' num [1:15873] 146 39 30 49 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:15873] "SC" "SC" "SC" "SC" ...
#These 2 records have duplicates - different application dates, issue dates. Will leave as is.
dupe_numbers <- sc_state %>%
filter (!is.na(process_time)) %>% #removes pending/null duration
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
state <- read_excel("../state-data/TN-Updated Nursing Data for NPR 02042022 EM.xlsx",
col_types = c("text", "text", "text", "text", "text", "text", "date", "text", "text", "text", "text", "text", "text", "date")) %>%
clean_names()
#Check out what kind of licenses we've got
state %>%
count (license, initial_or_renewal, type_of_initial_licensure) %>%
arrange (desc(n))
## # A tibble: 28 × 4
## license initial_or_renewal type_of_initial_licensure n
## <chr> <chr> <chr> <int>
## 1 RN Initial INITIAL BY EXAM 12344
## 2 RN Initial INITIAL BY ENDORSEMENT 9103
## 3 LPN Renewal Renewal Application 5502
## 4 LPN Renewal Inital Application 4689
## 5 LPN Initial INITIAL BY EXAM 4111
## 6 RN Renewal Renewal Active 3182
## 7 RN Initial Renewal Active 2960
## 8 LPN Renewal Renewal Active 1684
## 9 LPN Initial RENEWAL ACTIVE 1597
## 10 LPN Renewal Renew Certification 829
## # … with 18 more rows
#weirdly, 49001 applications are open and only 901 closed?
state %>%
count (applcation_status, license_status) %>%
arrange (desc(n))
## # A tibble: 4 × 3
## applcation_status license_status n
## <chr> <chr> <int>
## 1 Open Licensed 48952
## 2 Closed Licensed 901
## 3 Open On Probation 33
## 4 Open Suspended 16
#17 types of initial licensure
#3 types of initial or renewal (+ reinstatement + 1 NA)
state %>%
#count (year, license, initial_or_renewal, type_of_initial_licensure)
count (initial_or_renewal, type_of_initial_licensure) #%>%
## # A tibble: 20 × 3
## initial_or_renewal type_of_initial_licensure n
## <chr> <chr> <int>
## 1 Initial INITIAL BY ENDORSEMENT 9857
## 2 Initial INITIAL BY EXAM 16455
## 3 Initial REINSTATE MULTISTATE VOIDS 33
## 4 Initial REINSTATE to ACTIVE 15
## 5 Initial Renewal Active 3659
## 6 Initial RENEWAL ACTIVE 1597
## 7 Reinstatement Renewal Active 891
## 8 Renewal Inital Application 4689
## 9 Renewal INITIAL TESTED 3
## 10 Renewal Reactivate from Expired 175
## 11 Renewal REINSTATE FTR - INSUF. FUNDS 2
## 12 Renewal REINSTATE TO ACTIVE 98
## 13 Renewal REINSTATE TO INACTIVE 1
## 14 Renewal Reinstatement Application 567
## 15 Renewal Renew Certification 834
## 16 Renewal Renewal Active 4866
## 17 Renewal Renewal Application 5502
## 18 Renewal RENEWAL for INACTIVE STATUS 45
## 19 Renewal Request License 612
## 20 <NA> Request License 1
# arrange (desc(n))
#Calculate processing time
tn_state <- state %>%
# select (-date_when_all_required_application_documents_received_if_later_than_initial_application_date_not_tracked) %>%
mutate (issue_date = as.Date(format(date_licensed, "%Y-%m-%d")),
#current_license_expiration_date = ymd(current_license_expiration_date),
app_date = as.Date(format(application_date, "%Y-%m-%d")),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
app_type = case_when (
initial_or_renewal=="Initial" & str_detect (type_of_initial_licensure, "EXAM") ~ "Exam",
initial_or_renewal=="Initial" & str_detect (type_of_initial_licensure, "ENDORSEMENT") ~ "Endorsement",
initial_or_renewal=="Initial" ~ paste0("Initial-", type_of_initial_licensure),
initial_or_renewal=="Reinstatement" ~ "Reinstatement",
initial_or_renewal=="Renewal" ~ paste0("Renewal-", type_of_initial_licensure),
is.na(initial_or_renewal) ~ "Renewal-Request License",
TRUE ~ "???"),
process_time = issue_date - app_date,
data_state = "TN") %>%
rename (lic_type=license)
tn_state %>% count (app_type)
## # A tibble: 19 × 2
## app_type n
## * <chr> <int>
## 1 Endorsement 9857
## 2 Exam 16455
## 3 Initial-REINSTATE MULTISTATE VOIDS 33
## 4 Initial-REINSTATE to ACTIVE 15
## 5 Initial-Renewal Active 3659
## 6 Initial-RENEWAL ACTIVE 1597
## 7 Reinstatement 891
## 8 Renewal-Inital Application 4689
## 9 Renewal-INITIAL TESTED 3
## 10 Renewal-Reactivate from Expired 175
## 11 Renewal-REINSTATE FTR - INSUF. FUNDS 2
## 12 Renewal-REINSTATE TO ACTIVE 98
## 13 Renewal-REINSTATE TO INACTIVE 1
## 14 Renewal-Reinstatement Application 567
## 15 Renewal-Renew Certification 834
## 16 Renewal-Renewal Active 4866
## 17 Renewal-Renewal Application 5502
## 18 Renewal-RENEWAL for INACTIVE STATUS 45
## 19 Renewal-Request License 613
test <- tn_state %>%
filter (app_type == "Initial-RENEWAL ACTIVE")
app_counts <- tn_state %>%
count (app_type) %>%
rename (total = n)
#how many failed? None!
tn_state %>%
filter ( is.na(process_time))
## # A tibble: 0 × 21
## # … with 21 variables: frst_name <chr>, second_name <chr>, surname <chr>,
## # key_name <chr>, city <chr>, state <chr>, application_date <dttm>,
## # applcation_status <chr>, initial_or_renewal <chr>,
## # type_of_initial_licensure <chr>, lic_type <chr>, license_number <chr>,
## # license_status <chr>, date_licensed <dttm>, issue_date <date>,
## # app_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, data_state <chr>
#12367 negative process times - ALL EXAM/ENDORSEMENT TIMES ARE POSITIVE
tn_state %>%
filter (process_time < 0) %>%
count (app_type) %>%
full_join (app_counts) %>%
mutate (pct_neg = round ( n / total * 100, digits=1)) %>%
mutate (pct_neg = replace_na (pct_neg, 0)) %>%
arrange (pct_neg)
## # A tibble: 19 × 4
## app_type n total pct_neg
## <chr> <int> <int> <dbl>
## 1 Endorsement NA 9857 0
## 2 Exam NA 16455 0
## 3 Renewal-INITIAL TESTED NA 3 0
## 4 Renewal-Inital Application 9 4689 0.2
## 5 Renewal-Request License 1 613 0.2
## 6 Renewal-RENEWAL for INACTIVE STATUS 14 45 31.1
## 7 Renewal-Reactivate from Expired 65 175 37.1
## 8 Renewal-Renewal Application 2076 5502 37.7
## 9 Renewal-Renew Certification 368 834 44.1
## 10 Renewal-REINSTATE TO ACTIVE 61 98 62.2
## 11 Renewal-Reinstatement Application 360 567 63.5
## 12 Renewal-Renewal Active 3703 4866 76.1
## 13 Reinstatement 691 891 77.6
## 14 Initial-REINSTATE MULTISTATE VOIDS 26 33 78.8
## 15 Initial-Renewal Active 3378 3659 92.3
## 16 Initial-REINSTATE to ACTIVE 15 15 100
## 17 Initial-RENEWAL ACTIVE 1597 1597 100
## 18 Renewal-REINSTATE FTR - INSUF. FUNDS 2 2 100
## 19 Renewal-REINSTATE TO INACTIVE 1 1 100
tn_state %>%
count (lic_type, app_type)
## # A tibble: 27 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 754
## 2 LPN Exam 4111
## 3 LPN Initial-REINSTATE MULTISTATE VOIDS 10
## 4 LPN Initial-REINSTATE to ACTIVE 15
## 5 LPN Initial-Renewal Active 699
## 6 LPN Initial-RENEWAL ACTIVE 1597
## 7 LPN Reinstatement 296
## 8 LPN Renewal-Inital Application 4689
## 9 LPN Renewal-Reactivate from Expired 175
## 10 LPN Renewal-REINSTATE FTR - INSUF. FUNDS 2
## # … with 17 more rows
#Any mismatches?
tn_state %>% filter (as.Date(application_date) != app_date)
## # A tibble: 0 × 21
## # … with 21 variables: frst_name <chr>, second_name <chr>, surname <chr>,
## # key_name <chr>, city <chr>, state <chr>, application_date <dttm>,
## # applcation_status <chr>, initial_or_renewal <chr>,
## # type_of_initial_licensure <chr>, lic_type <chr>, license_number <chr>,
## # license_status <chr>, date_licensed <dttm>, issue_date <date>,
## # app_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, data_state <chr>
tn_state %>% filter (as.Date(date_licensed) != issue_date)
## # A tibble: 0 × 21
## # … with 21 variables: frst_name <chr>, second_name <chr>, surname <chr>,
## # key_name <chr>, city <chr>, state <chr>, application_date <dttm>,
## # applcation_status <chr>, initial_or_renewal <chr>,
## # type_of_initial_licensure <chr>, lic_type <chr>, license_number <chr>,
## # license_status <chr>, date_licensed <dttm>, issue_date <date>,
## # app_date <date>, month <chr>, year <chr>, app_type <chr>,
## # process_time <drtn>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
tn_state <- tn_state %>%
select (-c(application_date, date_licensed, type_of_initial_licensure, initial_or_renewal)) %>%
rename (lic_status = license_status,
application_date = app_date,
app_status = applcation_status,
name = key_name,
last_name = surname,
middle_name = second_name,
first_name = frst_name)
tn_state %>%
count (lic_type, app_type)
## # A tibble: 27 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 754
## 2 LPN Exam 4111
## 3 LPN Initial-REINSTATE MULTISTATE VOIDS 10
## 4 LPN Initial-REINSTATE to ACTIVE 15
## 5 LPN Initial-Renewal Active 699
## 6 LPN Initial-RENEWAL ACTIVE 1597
## 7 LPN Reinstatement 296
## 8 LPN Renewal-Inital Application 4689
## 9 LPN Renewal-Reactivate from Expired 175
## 10 LPN Renewal-REINSTATE FTR - INSUF. FUNDS 2
## # … with 17 more rows
#count(lic_type, application_type, app_type) #check they've all coded correctly field
#35,827 are records for repeated people, some up to 28 times. 5000 are repeated EXAM/ENDORSEMENT
#Checking that repeated license numbers/names are true duplicates
repeats <- tn_state %>%
filter (app_type %in% c("Exam", "Endorsement")) %>%
group_by (license_number) %>%
mutate (count=n()) %>%
filter (count > 1) %>%
arrange (desc(count)) %>%
group_by (license_number, count) %>%
mutate (lic_app = paste0(lic_type, "-", app_type)) %>%
summarise(Type = toString(lic_app),
App_Dates = toString(application_date),
Issue_Dates = toString(issue_date),
Time = toString(process_time)
) %>%
separate(Type, into = paste0("Type", 1:4), sep = ", ", fill = "right", extra = "drop") %>%
separate(App_Dates, into = paste0("App", 1:4), sep = ", ", fill = "right", extra = "drop") %>%
separate(Issue_Dates, into = paste0("Iss", 1:4), sep = ", ", fill = "right", extra = "drop") %>%
separate(Time, into = paste0("Time", 1:4), sep = ", ", fill = "right", extra = "drop") %>%
ungroup()
#2293 have same times for all applications, meaning they're true duplicates; 48 people have different times, many are just 1 or 0 days, probably re-applied and quickly issued. This code uses longer time.
repeats %>%
filter (Time1 != Time2) %>%
arrange (desc(count))
## # A tibble: 48 × 18
## license_number count Type1 Type2 Type3 Type4 App1 App2 App3 App4 Iss1
## <chr> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 94598 4 LPN-E… LPN-E… LPN-E… LPN-… 2019… 2019… 2019… 2019… 2019…
## 2 246173 3 RN-Ex… RN-Ex… RN-Ex… <NA> 2020… 2020… 2020… <NA> 2020…
## 3 251197 3 RN-Ex… RN-Ex… RN-Ex… <NA> 2020… 2021… 2021… <NA> 2021…
## 4 236291 2 RN-En… RN-En… <NA> <NA> 2019… 2020… <NA> <NA> 2020…
## 5 236881 2 RN-En… RN-En… <NA> <NA> 2019… 2021… <NA> <NA> 2021…
## 6 238761 2 RN-En… RN-Ex… <NA> <NA> 2019… 2019… <NA> <NA> 2019…
## 7 239705 2 RN-En… RN-En… <NA> <NA> 2019… 2020… <NA> <NA> 2021…
## 8 241107 2 RN-En… RN-En… <NA> <NA> 2019… 2020… <NA> <NA> 2021…
## 9 243695 2 RN-En… RN-Ex… <NA> <NA> 2021… 2020… <NA> <NA> 2021…
## 10 244482 2 RN-En… RN-En… <NA> <NA> 2021… 2020… <NA> <NA> 2021…
## # … with 38 more rows, and 7 more variables: Iss2 <chr>, Iss3 <chr>,
## # Iss4 <chr>, Time1 <chr>, Time2 <chr>, Time3 <chr>, Time4 <chr>
str(tn_state)
## tibble [49,902 × 17] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:49902] "Wendy" "Logan" "Rita" "Kelly" ...
## $ middle_name : chr [1:49902] "Jean" "Shelley" "Marie" NA ...
## $ last_name : chr [1:49902] "Aarnes" "Aaron" "Aaron" "Aasa" ...
## $ name : chr [1:49902] "AARNES, WENDY JEAN" "AARON, LOGAN SHELLEY" "AARON, RITA MARIE" "AASA, KELLY" ...
## $ city : chr [1:49902] "Olney" "Pall Mall" "Pulaski" "Clarksville" ...
## $ state : chr [1:49902] "IL" "TN" "TN" "TN" ...
## $ app_status : chr [1:49902] "Open" "Open" "Open" "Open" ...
## $ lic_type : chr [1:49902] "RN" "RN" "RN" "RN" ...
## $ license_number : chr [1:49902] "252162" "248994" "246496" "243877" ...
## $ lic_status : chr [1:49902] "Licensed" "Licensed" "Licensed" "Licensed" ...
## $ issue_date : Date[1:49902], format: "2021-03-04" "2020-09-14" ...
## $ application_date: Date[1:49902], format: "2021-02-06" "2020-08-26" ...
## $ month : chr [1:49902] "2021-03" "2020-09" "2020-07" "2020-03" ...
## $ year : chr [1:49902] "2021" "2020" "2020" "2020" ...
## $ app_type : chr [1:49902] "Endorsement" "Endorsement" "Exam" "Endorsement" ...
## $ process_time : 'difftime' num [1:49902] 26 19 74 56 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:49902] "TN" "TN" "TN" "TN" ...
#Keep only Exam/Endorsement and cut out repeat records
tn_state <- tn_state %>%
filter (app_type %in% c("Exam", "Endorsement")) %>%
#arrange by processing time to keep longest for 48 repeats
arrange (license_number, desc(process_time)) %>%
distinct (license_number, .keep_all=T)
#TESTING TN AVERAGES
#removes 3741 records from TN dataframe
valid_data2 <- tn_state %>%
filter (app_type %in% c("Exam", "Endorsement")) %>%
filter (process_time > -1) %>% #Keep only valid processing times
filter (!is.na(process_time)) %>%
filter (year != "2022") %>% #remove Jan/Feb 2022
#remove pending apps older than July 2020 (11789 RNs & 1994 LPNs)
#filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01")))) %>%
#Standardize timeframe
filter (application_date <= as.Date("2021-09-23")) %>%
filter (issue_date <= as.Date("2021-09-23"))
tn_averages2 <- valid_data2 %>%
#arrange by processing time to keep longest for 48 repeats
arrange (license_number, desc(process_time)) %>%
distinct (license_number, .keep_all=T) %>%
rename (process_time2 = process_time) %>%
group_by (data_state, lic_type, app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count)
state <- read_excel("../state-data/TX-Approved Licenses-USETHIS2.xlsx") %>%
clean_names()
tx_temp <- read_csv ("../state-data/TX-ApplicationsWithoutLicensure.csv") %>%
clean_names()
#They're all either endorsement or exam
tx_temp %>%
count(license_application_type, basis_for_licensure)
## # A tibble: 11 × 3
## license_application_type basis_for_licensure n
## <chr> <chr> <int>
## 1 "Endorsement LVN" Endorsement 608
## 2 "Endorsement LVN \x96 INTERNATIONAL AND PUERTO RI… Endorsement-Interna… 90
## 3 "Endorsement LVN \x96 U.S. EDUCATED BUT NOT PUERT… Endorsement 781
## 4 "Endorsement RN" Endorsement 3416
## 5 "Endorsement RN \x96 INTERNATIONAL AND PUERTO RIC… Endorsement-Interna… 1063
## 6 "Endorsement RN \x96 U.S. EDUCATED BUT NOT PUERTO… Endorsement 4126
## 7 "Exam LVN" Exam 2568
## 8 "Exam RN" Exam 10175
## 9 "NCLEX Application \x96 International Graduates (… Exam-International 10346
## 10 "NCLEX-PN Application \x96 U.S. Graduates Initial… Exam 2266
## 11 "NCLEX-RN Application \x96 U.S. Graduates Initial… Exam 8439
tx_temp %>%
count (license_type_name_1)
## # A tibble: 3 × 2
## license_type_name_1 n
## * <chr> <int>
## 1 LVN 3176
## 2 LVN/LPN 3255
## 3 RN 37447
#Standardize license and app type
tx_temp2 <- tx_temp %>%
mutate (app_type = case_when (
str_detect (basis_for_licensure, "Exam") ~ "Exam",
str_detect (basis_for_licensure, "Endorsement") ~ "Endorsement"),
lic_type = case_when (
license_type_name_1 %in% c("LVN", "LVN/LPN") ~ "LPN",
license_type_name_1=="RN" ~ "RN") )
tx_temp2 %>% count (lic_type, app_type)
## # A tibble: 4 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 1478
## 2 LPN Exam 4953
## 3 RN Endorsement 8606
## 4 RN Exam 28841
#19 pending statuses
tx_temp2 %>% count (application_status)
## # A tibble: 19 × 2
## application_status n
## * <chr> <int>
## 1 APPL.REC 201
## 2 APPLCNT 3701
## 3 Approved-Retest 7622
## 4 Closed 62
## 5 Denied 68
## 6 Denied for Cause 2
## 7 Expired 6997
## 8 FAILED.EX 3631
## 9 On Hold 87
## 10 PASSED.EX 6
## 11 PROC.GN 6
## 12 Processing 11487
## 13 Ready to issue 20
## 14 Retest 38
## 15 SENT.GN 4839
## 16 SENT.GVN 560
## 17 SENT.TEMP 3823
## 18 Void 672
## 19 Withdrawn 56
tx_temp3 <- tx_temp2 %>%
#remove those obviously not still pending (expired, denied, withdrawn, etc)
filter(application_status %in% c("Processing", "Ready to issue", "Retest", "PROC.GN", "PASSED.EX", "On Hold", "Approved-Retest", "APPL.REC", "APPLCNT")) %>%
mutate (application_date = ymd(submitdate),
data_state = "TX",
year = "Pending as of 10/01/21") %>%
filter (application_date >= as.Date("2020-06-15")) %>% #Keep only this applications after system migration
mutate (process_time = as.Date("2021-10-01") - application_date) %>%
rename (state = state_or_territory,
lic_status = application_status) %>%
select (-c(license_type_name_1, license_application_type, basis_for_licensure, license_type_name, submitdate))
tx_temp3 %>%
count (lic_type, app_type, lic_status)
## # A tibble: 14 × 4
## lic_type app_type lic_status n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement On Hold 3
## 2 LPN Endorsement Processing 481
## 3 LPN Endorsement Ready to issue 3
## 4 LPN Exam Approved-Retest 783
## 5 LPN Exam On Hold 5
## 6 LPN Exam Processing 781
## 7 RN Endorsement On Hold 21
## 8 RN Endorsement Processing 2949
## 9 RN Endorsement Ready to issue 9
## 10 RN Exam Approved-Retest 3604
## 11 RN Exam On Hold 58
## 12 RN Exam Processing 7269
## 13 RN Exam Ready to issue 6
## 14 RN Exam Retest 3
#Calculate processing time
tx_state <- state %>%
select (-date_when_all_required_application_documents_received_if_later_than_initial_application_date_not_tracked) %>%
mutate (issue_date = ymd(original_issuance_date),
current_license_expiration_date = ymd(current_license_expiration_date),
submit_date_available = ymd(submit_date_available),
submit_date_placeholder_data_used_for_migration_during_major_system_update_in_june_2020 = ymd(submit_date_placeholder_data_used_for_migration_during_major_system_update_in_june_2020),
application_date = coalesce(submit_date_available, submit_date_placeholder_data_used_for_migration_during_major_system_update_in_june_2020),
month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
license_number = as.character(license_number),
app_type = case_when (
str_detect (license_application_template_name, "RETEST Application ñ U.S. Graduates") ~ "Exam-retest (US)",
str_detect (license_application_template_name, "RETEST Application ñ International Graduates") ~ "Exam-retest (non-US)",
str_detect (license_application_template_name, "Application ñ U.S. Graduates") ~ "Exam (US)",
str_detect (license_application_template_name, "NCLEX Application ñ International Graduates") ~ "Exam (non-US)",
str_detect (license_application_template_name, "U.S. EDUCATED BUT NOT PUERTO RICO EDUCATED") ~ "Endorsement (US)",
str_detect (license_application_template_name, "INTERNATIONAL AND PUERTO RICO EDUCATED") ~ "Endorsement (non-US)",
license_application_template_name %in% c("Endorsement RN Application for Graduates of U.S. Programs", "Endorsement LVN Application for Graduates of U.S. Programs") ~ "Endorsement (US)",
license_application_template_name %in% c("Endorsement RN Application for Graduates of International Programs", "Endorsement LVN Application for Graduates of International Programs") ~ "Endorsement (non-US)",
#Renewals/Reactivations won't calculate correctly because include original issue date
str_detect (license_application_template_name, "Renewal") ~ "Renewal",
str_detect (license_application_template_name, "Reactivation") ~ "Reinstatement",
TRUE ~ paste0("Other - ", license_application_template_name)),
lic_type = case_when (
license_type_name=="LVN/LPN" ~ "LPN",
license_type_name=="RN" ~ "RN") ,
process_time = issue_date - application_date,
data_state = "TX") %>%
filter (application_date > as.Date("2020-06-15"))
#Check columns in pending dataframe match issued
comparing <- compare_df_cols(tx_temp3, tx_state)
tx_state %>%
count (lic_type, app_type)
## # A tibble: 22 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement (non-US) 5
## 2 LPN Endorsement (US) 1204
## 3 LPN Exam (non-US) 1
## 4 LPN Exam (US) 3737
## 5 LPN Exam-retest (non-US) 3
## 6 LPN Exam-retest (US) 465
## 7 LPN Other - Inactivate Application 804
## 8 LPN Other - Retire Application 225
## 9 LPN Other - Volunteer Retire 2
## 10 LPN Reinstatement 252
## # … with 12 more rows
tx_state %>%
filter (process_time < 0) %>%
count (app_type)
## # A tibble: 5 × 2
## app_type n
## * <chr> <int>
## 1 Other - Inactivate Application 2405
## 2 Other - Retire Application 1390
## 3 Other - Volunteer Retire 58
## 4 Reinstatement 1450
## 5 Renewal 282574
#Any mismatches?
tx_state %>% filter (as.Date(submit_date_available) != application_date)
## # A tibble: 0 × 22
## # … with 22 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city <chr>, state_or_territory <chr>, license_type_name <chr>,
## # license_number <chr>, license_application_template_name <chr>,
## # original_issuance_date <dbl>, current_license_expiration_date <date>,
## # initial_type <chr>, submit_date_available <date>,
## # submit_date_placeholder_data_used_for_migration_during_major_system_update_in_june_2020 <date>,
## # status <chr>, issue_date <date>, application_date <date>, month <chr>, …
#tx_state %>% filter (as.Date(original_issuance_date) != issue_date)
#Remove unformatted date fields and unnecessary fields
tx_state <- tx_state %>%
select (-c(license_type_name, license_application_template_name, original_issuance_date, initial_type, submit_date_available, submit_date_placeholder_data_used_for_migration_during_major_system_update_in_june_2020)) %>%
rename (state = state_or_territory,
expiration_date = current_license_expiration_date,
lic_status = status) %>%
bind_rows (tx_temp3) #Add pending records
tx_state %>%
count (lic_type, app_type)
## # A tibble: 26 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 487
## 2 LPN Endorsement (non-US) 5
## 3 LPN Endorsement (US) 1204
## 4 LPN Exam 1569
## 5 LPN Exam (non-US) 1
## 6 LPN Exam (US) 3737
## 7 LPN Exam-retest (non-US) 3
## 8 LPN Exam-retest (US) 465
## 9 LPN Other - Inactivate Application 804
## 10 LPN Other - Retire Application 225
## # … with 16 more rows
#count(lic_type, application_type, app_type) #check they've all coded correctly field
str(tx_state)
## tibble [335,164 × 16] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:335164] "GURDEEP" "JOSEPH" "NIMROD" "UGOCHUKWU" ...
## $ middle_name : chr [1:335164] "KAUR" "NWABUEZE" NA "STEPHEN" ...
## $ last_name : chr [1:335164] "RANDHAWA" "EZEKWEM" "KIBAARA" "ONYEBUCHI" ...
## $ city : chr [1:335164] "FRISCO" "Rahway" "Katy" "8 Belleville Drive CAN" ...
## $ state : chr [1:335164] "TX" "NJ" "TX" NA ...
## $ license_number : chr [1:335164] "1034515" "1047338" "1048832" "1050647" ...
## $ expiration_date : Date[1:335164], format: "2022-10-31" "2023-09-30" ...
## $ lic_status : chr [1:335164] "Approved" "Approved" "Approved" "Approved" ...
## $ issue_date : Date[1:335164], format: "2021-10-05" "2021-10-05" ...
## $ application_date: Date[1:335164], format: "2021-07-14" "2021-07-11" ...
## $ month : chr [1:335164] "2021-10" "2021-10" "2021-10" "2021-10" ...
## $ year : chr [1:335164] "2021" "2021" "2021" "2021" ...
## $ app_type : chr [1:335164] "Endorsement (non-US)" "Endorsement (non-US)" "Endorsement (non-US)" "Endorsement (non-US)" ...
## $ lic_type : chr [1:335164] "RN" "RN" "RN" "RN" ...
## $ process_time : 'difftime' num [1:335164] 83 86 69 53 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:335164] "TX" "TX" "TX" "TX" ...
#These 128 records are duplicates - but most of them have one RN and one LPN
dupe_numbers <- tx_state %>%
filter (!(app_type %in% c("Reinstatement", "Renewal"))) %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (license_number)
#Duplicate license numbers are mostly renewal
dupe_numbers %>%
ungroup() %>%
count (lic_type, app_type) %>%
arrange (desc(n))
## # A tibble: 10 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Exam (US) 24
## 2 RN Exam (US) 24
## 3 RN Other - Inactivate Application 17
## 4 RN Other - Retire Application 16
## 5 LPN Endorsement (US) 15
## 6 RN Endorsement (US) 15
## 7 LPN Other - Inactivate Application 6
## 8 LPN Other - Retire Application 5
## 9 RN Exam-retest (US) 5
## 10 RN Other - Volunteer Retire 1
# View
#How many have 1 RN + 1 LPN record? Must have applied for both at once?
#Only six records are of same license type/application type and have very negative processing times, so they'll be filtered out anyway.
#No removal necessary.
dupe_numbers %>%
group_by (license_number, lic_type, app_type) %>%
mutate (n=n()) %>%
filter (n >1) #%>%
## # A tibble: 6 × 17
## # Groups: license_number, lic_type, app_type [3]
## first_name middle_name last_name city state license_number expiration_date
## <chr> <chr> <chr> <chr> <chr> <chr> <date>
## 1 JACQUELYNN S HORNER SAN ANT… TX 250187 2021-06-08
## 2 JACQUELYNN S HORNER SAN ANT… TX 250187 2021-06-08
## 3 JEANIE E PEGUES SALADO TX 56082 2021-03-23
## 4 JEANIE E PEGUES SALADO TX 56082 2021-03-23
## 5 DOLORES MADELINE SHEPARD ROUND R… TX 563747 2021-04-06
## 6 DOLORES MADELINE SHEPARD ROUND R… TX 563747 2021-04-06
## # … with 10 more variables: lic_status <chr>, issue_date <date>,
## # application_date <date>, month <chr>, year <chr>, app_type <chr>,
## # lic_type <chr>, process_time <drtn>, data_state <chr>, n <int>
# view
state <- read_excel("../state-data/VT-NPR_Nursing_PRR_09-23-21_2021-09-23_20-10-30.xlsx") %>%
clean_names()
str(state)
## tibble [17,812 × 13] (S3: tbl_df/tbl/data.frame)
## $ board_name : chr [1:17812] "Nursing" "Nursing" "Nursing" "Nursing" ...
## $ profession_type : chr [1:17812] "Registered Nurse" "Registered Nurse" "Registered Nurse" "Registered Nurse" ...
## $ license_number : chr [1:17812] "026.0137872" "026.0020338" "026.0137878" "026.0137876" ...
## $ status : chr [1:17812] "Active" "Active" "Expired" "Expired" ...
## $ state_code : chr [1:17812] "VT" "MA" "NC" "UT" ...
## $ city : chr [1:17812] "East Montpelier" "Wendell Depot" "Huntersville" "Herriman" ...
## $ first_name : chr [1:17812] "Peggy" "Paul" "Crystal" "Karen" ...
## $ last_name : chr [1:17812] "Fair" "Wanta" "Camp" "Pineda" ...
## $ first_issuance_date: POSIXct[1:17812], format: "2019-01-01 16:03:51" "1992-01-07 00:00:00" ...
## $ application_date : POSIXct[1:17812], format: "2019-01-01" "2019-01-02" ...
## $ effective_date : POSIXct[1:17812], format: "2021-04-01" "2021-04-01" ...
## $ expiration_date : POSIXct[1:17812], format: "2023-03-31 01:00:00" "2023-03-31 01:00:00" ...
## $ basis_of_licensure : chr [1:17812] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
#What combinations exist?
state %>%
count (profession_type)
## # A tibble: 7 × 2
## profession_type n
## * <chr> <int>
## 1 Advanced Practice Registered Nurse 819
## 2 Advanced Practice Registered Nurse - Emergency Temporary License 5
## 3 Licensed Practical Nurse 1323
## 4 Licensed Practical Nurse - Emergency Temporary License 134
## 5 Registered Nurse 15295
## 6 Registered Nurse - Emergency Temporary License 235
## 7 Registered Nurse - Temporary Permit for Re-Entry 1
state %>%
count (profession_type, basis_of_licensure)
## # A tibble: 11 × 3
## profession_type basis_of_licensure n
## <chr> <chr> <int>
## 1 Advanced Practice Registered Nurse Certification 819
## 2 Advanced Practice Registered Nurse - Emergency Te… Registration 5
## 3 Licensed Practical Nurse Endorsement 864
## 4 Licensed Practical Nurse Examination 454
## 5 Licensed Practical Nurse Fast Track Endorsem… 5
## 6 Licensed Practical Nurse - Emergency Temporary Li… Registration 134
## 7 Registered Nurse Endorsement 11172
## 8 Registered Nurse Examination 4083
## 9 Registered Nurse Fast Track Endorsem… 40
## 10 Registered Nurse - Emergency Temporary License Registration 235
## 11 Registered Nurse - Temporary Permit for Re-Entry Registration 1
#5694 licenses included here were first issued before Jan 2019, so they are renewals that we can't calculate with certainty because Vermont doesn't track intermediate renewal application dates
state %>%
filter (first_issuance_date < as.Date("2019-01-01")) #%>%
## # A tibble: 5,694 × 13
## board_name profession_type license_number status state_code city first_name
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Nursing Registered Nurse 026.0020338 Active MA Wend… Paul
## 2 Nursing Registered Nurse 026.0033970 Active VT West… Nathaniel
## 3 Nursing Registered Nurse 026.0043516 Expir… FL Cass… Robelyn
## 4 Nursing Registered Nurse 026.0035795 Expir… FL wint… Manju
## 5 Nursing Registered Nurse 026.0017845 Active VT East… Margaret
## 6 Nursing Registered Nurse 026.0028491 Active VT West… Cathy
## 7 Nursing Registered Nurse 026.0013623 Active VT Lond… Pamela
## 8 Nursing Registered Nurse 026.0035904 Expir… FL Alta… Nadhiya
## 9 Nursing Registered Nurse 026.0042881 Expir… NH Brad… Kathleen
## 10 Nursing Registered Nurse 026.0038751 Expir… NY Tico… Lacey
## # … with 5,684 more rows, and 6 more variables: last_name <chr>,
## # first_issuance_date <dttm>, application_date <dttm>, effective_date <dttm>,
## # expiration_date <dttm>, basis_of_licensure <chr>
#count (status)
#Any duplicates? 4 duplicates, 788 NAs
state %>%
count(license_number) %>%
arrange(desc(n))
## # A tibble: 17,021 × 2
## license_number n
## <chr> <int>
## 1 <NA> 788
## 2 026.0021267 2
## 3 026.0023408 2
## 4 026.0031314 2
## 5 026.0039235 2
## 6 025.0003890 1
## 7 025.0004944 1
## 8 025.0005242 1
## 9 025.0005651 1
## 10 025.0005731 1
## # … with 17,011 more rows
#Calculate processing time
vt_state <- state %>%
filter (!str_detect(profession_type, "^Advanced")) %>% #remove APRNs
mutate (issue_date = as.Date(first_issuance_date),
effective_renewal_date = as.Date(effective_date),
application_date = as.Date(application_date),
expiration_date = as.Date(expiration_date),
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = case_when (
profession_type == "Licensed Practical Nurse" ~ "LPN",
profession_type == "Licensed Practical Nurse - Emergency Temporary License" ~ "LPN-Temp",
profession_type == "Registered Nurse" ~ "RN",
profession_type == "Registered Nurse - Emergency Temporary License" ~ "RN-Temp",
profession_type == "Registered Nurse - Temporary Permit for Re-Entry" ~ "RN-Temp"),
app_type = case_when (
basis_of_licensure == "Reciprocity" ~ "Endorsement",
basis_of_licensure == "Examination" ~ "Exam",
basis_of_licensure == "Registration" ~ "Application",
TRUE ~ basis_of_licensure)) %>%
mutate (process_time = issue_date - application_date)
#Check statuses for those that couldn't calculate process_time, so we can remove clearly inactive applications and calculate pending time for all others as process_time2 in next step -- all appear to be actively pending
vt_state %>%
filter (is.na(process_time)) %>%
count (status) %>%
arrange (desc(n))
## # A tibble: 6 × 2
## status n
## <chr> <int>
## 1 Application Expired 343
## 2 Application Incomplete 202
## 3 Application Withdrawn 115
## 4 Approved For Exam 61
## 5 Application Pending 4
## 6 Application Denied (conduct) 2
#Calculates how long those left pending have been pending as process_time2
vt_state <- vt_state %>%
mutate (process_time2 = if_else (is.na(process_time) & status %in% c("Application Incomplete", "Approved For Exam", "Application Pending"),
as.Date("2021-09-23") - application_date,
process_time),
year = if_else (is.na(year), "Pending as of 09/23/21", as.character(year)),
data_state = "VT")
#Any mismatches?
#vt_state %>% filter (as.Date(submit_date_available) != application_date)
vt_state %>% filter (as.Date(first_issuance_date) != issue_date)
## # A tibble: 0 × 22
## # … with 22 variables: board_name <chr>, profession_type <chr>,
## # license_number <chr>, status <chr>, state_code <chr>, city <chr>,
## # first_name <chr>, last_name <chr>, first_issuance_date <dttm>,
## # application_date <date>, effective_date <dttm>, expiration_date <date>,
## # basis_of_licensure <chr>, issue_date <date>, effective_renewal_date <date>,
## # month <chr>, year <chr>, lic_type <chr>, app_type <chr>,
## # process_time <drtn>, process_time2 <drtn>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
vt_state <- vt_state %>%
select (-c(board_name, effective_date, basis_of_licensure, profession_type, first_issuance_date)) %>%
rename (state = state_code,
lic_status = status)
vt_state %>%
count (lic_type, app_type)
## # A tibble: 8 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Endorsement 864
## 2 LPN Exam 454
## 3 LPN Fast Track Endorsement 5
## 4 LPN-Temp Application 134
## 5 RN Endorsement 11172
## 6 RN Exam 4083
## 7 RN Fast Track Endorsement 40
## 8 RN-Temp Application 236
#count(lic_type, application_type, app_type) #check they've all coded correctly field
str(vt_state)
## tibble [16,988 × 17] (S3: tbl_df/tbl/data.frame)
## $ license_number : chr [1:16988] "026.0137872" "026.0020338" "026.0137878" "026.0137876" ...
## $ lic_status : chr [1:16988] "Active" "Active" "Expired" "Expired" ...
## $ state : chr [1:16988] "VT" "MA" "NC" "UT" ...
## $ city : chr [1:16988] "East Montpelier" "Wendell Depot" "Huntersville" "Herriman" ...
## $ first_name : chr [1:16988] "Peggy" "Paul" "Crystal" "Karen" ...
## $ last_name : chr [1:16988] "Fair" "Wanta" "Camp" "Pineda" ...
## $ application_date : Date[1:16988], format: "2019-01-01" "2019-01-02" ...
## $ expiration_date : Date[1:16988], format: "2023-03-31" "2023-03-31" ...
## $ issue_date : Date[1:16988], format: "2019-01-01" "1992-01-07" ...
## $ effective_renewal_date: Date[1:16988], format: "2021-04-01" "2021-04-01" ...
## $ month : chr [1:16988] "2019-01" "1992-01" "2019-01" "2019-01" ...
## $ year : chr [1:16988] "2019" "1992" "2019" "2019" ...
## $ lic_type : chr [1:16988] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:16988] "Endorsement" "Endorsement" "Endorsement" "Endorsement" ...
## $ process_time : 'difftime' num [1:16988] 0 -9857 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:16988] 0 -9857 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:16988] "VT" "VT" "VT" "VT" ...
#These 8 records have duplicates -
dupe_numbers <- vt_state %>%
filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (license_number) %>%
mutate (n=n()) %>%
filter (n > 1) %>%
arrange (license_number)
sheets <- excel_sheets(path = "../state-data/VA-NPR Request RN LPN 30dayapplicationprocessing Jan 1 2019 -Sep 30 2021.xlsx")
state <- tibble()
for (i in sheets){
#i <- "Q1 21"
step_df <-read_excel("../state-data/VA-NPR Request RN LPN 30dayapplicationprocessing Jan 1 2019 -Sep 30 2021.xlsx",
sheet = i) %>%
clean_names() %>%
mutate (entry_date = as_date(entry_date),
last_item_complete_date = as_date(last_item_complete_date),
issue_date = as_date(issue_date)#,
#sheet = i
)
state <- rbind (state, step_df)
}
va_state <- state %>%
mutate (month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
issue_date = ymd(issue_date),
application_date = ymd(entry_date),
docs_date = ymd(last_item_complete_date),
process_time = issue_date - application_date,
gather_time = docs_date - application_date,
docs_time = issue_date - docs_date,
nurse_id = paste0("nurse", row_number()),
data_state = "VA",
lic_type = case_when (
lic_status=="Temporary" & lic_type=="Registered Nurse" ~ "RN-Temp",
lic_status=="Temporary" & lic_type=="Licensed Practical Nurse" ~ "LPN-Temp",
lic_status=="Provisional" & lic_type=="Registered Nurse" ~ "RN-Provisional",
lic_status=="Provisional" & lic_type=="Licensed Practical Nurse" ~ "LPN-Provisional",
lic_type=="Registered Nurse" ~ "RN",
lic_type=="Licensed Practical Nurse" ~ "LPN"),
app_type = "Unknown") %>%
#remove ~120 entries "entered" years ago with processing times from 387-7241 days
filter (entry_date >= as.Date("2017-01-01"))
#Any mismatches?
#va_state %>% filter (as.Date(submit_date_available) != application_date)
#va_state %>% filter (as.Date(first_issuance_date) != issue_date)
#Remove unformatted date fields and unnecessary fields
va_state <- va_state %>%
select (-c(board, entry_date, last_item_complete_date, days, nurse_id))
va_state %>%
count (lic_type, app_type)
## # A tibble: 4 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 LPN Unknown 3929
## 2 RN Unknown 22194
## 3 RN-Provisional Unknown 15
## 4 RN-Temp Unknown 39
str(va_state)
## tibble [26,177 × 12] (S3: tbl_df/tbl/data.frame)
## $ lic_type : chr [1:26177] "LPN" "LPN" "LPN" "LPN" ...
## $ lic_status : chr [1:26177] "Current Active" "Current Active" "Current Active" "Current Active" ...
## $ issue_date : Date[1:26177], format: "2019-03-04" "2019-01-12" ...
## $ month : chr [1:26177] "2019-03" "2019-01" "2019-01" "2019-02" ...
## $ year : chr [1:26177] "2019" "2019" "2019" "2019" ...
## $ application_date: Date[1:26177], format: "2017-03-28" "2017-08-21" ...
## $ docs_date : Date[1:26177], format: "2019-03-04" "2019-01-08" ...
## $ process_time : 'difftime' num [1:26177] 706 509 476 490 ...
## ..- attr(*, "units")= chr "days"
## $ gather_time : 'difftime' num [1:26177] 706 505 476 490 ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:26177] 0 4 0 0 ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:26177] "VA" "VA" "VA" "VA" ...
## $ app_type : chr [1:26177] "Unknown" "Unknown" "Unknown" "Unknown" ...
#CANNOT CHECK FOR DUPLICATES BECAUSE NO IDENTIFYING INFORMATION PROVIDED FROM STATE
state <- read_csv("../state-data/WV-NPR Spreadsheet 1-1-19 to 10-7-21-clean.csv") %>%
clean_names()
#Jan 2, 2019 - Oct. 7, 2021 applications
#March 11, 2019 - Oct. 7, 2021 issued
state %>%
summarize (app_range = range(date_initial_application_submit),
issue_range = range (license_issue_date, na.rm=T))
## # A tibble: 2 × 2
## app_range issue_range
## <date> <date>
## 1 2019-01-02 2019-03-11
## 2 2021-10-07 2021-10-07
#Calculate processing time
wv_state <- state %>%
mutate (issue_date = as.Date(license_issue_date),
docs_date = as.Date(date_required_documents_received),
application_date = as.Date(date_initial_application_submit),
month = substr(issue_date, 1,7),
year = year(issue_date),
lic_type = if_else (str_detect(application_type, "Temporary"), "RN-Temp", license_type),
app_type = case_when (
application_type == "RN Retest Initial Exam" ~ "Exam-retest",
application_type == "RN 90-Day Temporary License By Endorsement" ~ "Endorsement",
application_type == "RN Internationally Educated" ~ "International",
str_detect(application_type, "Reinstatement") ~ "Reinstatement",
TRUE ~ str_remove_all(application_type, "RN "))) %>%
mutate (process_time = issue_date - application_date,
docs_time = issue_date - docs_date)
#Check statuses for those that couldn't calculate process_time, so we can remove clearly inactive applications and calculate pending time for all others as process_time2 in next step -- all appear to be actively pending
wv_state %>%
filter (is.na(process_time)) %>%
count (application_status) %>%
arrange (desc(n))
## # A tibble: 13 × 2
## application_status n
## <chr> <int>
## 1 Expired 1204
## 2 Processing 909
## 3 Retest 471
## 4 Closed 196
## 5 Void 108
## 6 Approved 44
## 7 Denied 14
## 8 Ready to issue 7
## 9 Withdrawn 7
## 10 Ready to Reinstate 2
## 11 Approved-Retest 1
## 12 Dropped 1
## 13 Ready to Renew 1
#Calculates how long those left pending have been pending as process_time2
wv_state <- wv_state %>%
mutate (process_time2 = if_else (is.na(process_time) & application_status %in% c("Processing", "Retest", "Approved", "Ready to issue", "Ready to Reinstate", "Approved-Retest", "Ready to Renew"),
as.Date("2021-10-07") - application_date,
process_time),
year = if_else (is.na(year), "Pending as of 10/07/21", as.character(year)),
data_state = "WV")
#Any mismatches?
wv_state %>% filter (as.Date(date_initial_application_submit) != application_date)
## # A tibble: 0 × 25
## # … with 25 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city_of_residence <chr>, state_province <chr>, license_type <chr>,
## # license_duration <chr>, application_type <chr>,
## # date_initial_application_submit <date>,
## # date_required_documents_received <date>, application_status <chr>,
## # license_issue_date <date>, current_license_status <chr>,
## # expiration_date <date>, issue_date <date>, docs_date <date>, …
wv_state %>% filter (as.Date(license_issue_date) != issue_date)
## # A tibble: 0 × 25
## # … with 25 variables: first_name <chr>, middle_name <chr>, last_name <chr>,
## # city_of_residence <chr>, state_province <chr>, license_type <chr>,
## # license_duration <chr>, application_type <chr>,
## # date_initial_application_submit <date>,
## # date_required_documents_received <date>, application_status <chr>,
## # license_issue_date <date>, current_license_status <chr>,
## # expiration_date <date>, issue_date <date>, docs_date <date>, …
#Remove unformatted date fields and unnecessary fields
wv_state <- wv_state %>%
select (-c(license_type, date_initial_application_submit, date_required_documents_received, application_type, license_issue_date)) %>%
rename (state = state_province,
city = city_of_residence,
duration = license_duration,
app_status = application_status,
lic_status = current_license_status)
wv_state %>%
#count (lic_type, license_type, application_type, app_type)
count(lic_type, app_type) #check they've all coded correctly
## # A tibble: 7 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN Endorsement 4285
## 2 RN Exam 3686
## 3 RN Exam-retest 531
## 4 RN International 45
## 5 RN Reinstatement 2011
## 6 RN Renewal 74378
## 7 RN-Temp Endorsement 918
str(wv_state)
## tibble [85,854 × 20] (S3: tbl_df/tbl/data.frame)
## $ first_name : chr [1:85854] "TRAVIS" "ASHLEY" "EUN SOO" "CASSANDRA" ...
## $ middle_name : chr [1:85854] "MICHAEL" "ELIZABETH" NA "LEA" ...
## $ last_name : chr [1:85854] "HIBNER" "GORDON" "CHO" "JENKINS" ...
## $ city : chr [1:85854] "HUNTINGTON" "Clarksburg" "FAIRMONT" "BARBOURSVILLE" ...
## $ state : chr [1:85854] "WV" "WV" "WV" "WV" ...
## $ duration : chr [1:85854] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ app_status : chr [1:85854] "Approved" "Approved" "Approved" "Expired" ...
## $ lic_status : chr [1:85854] "Active" "Active" "Lapsed" NA ...
## $ expiration_date : Date[1:85854], format: "2021-10-31" "2021-10-31" ...
## $ issue_date : Date[1:85854], format: "2019-03-15" "2019-05-16" ...
## $ docs_date : Date[1:85854], format: "2019-03-14" "2019-05-15" ...
## $ application_date: Date[1:85854], format: "2019-01-04" "2019-01-04" ...
## $ month : chr [1:85854] "2019-03" "2019-05" "2019-04" NA ...
## $ year : chr [1:85854] "2019" "2019" "2019" "Pending as of 10/07/21" ...
## $ lic_type : chr [1:85854] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:85854] "Exam" "Exam" "Exam" "Exam" ...
## $ process_time : 'difftime' num [1:85854] 70 132 88 NA ...
## ..- attr(*, "units")= chr "days"
## $ docs_time : 'difftime' num [1:85854] 1 1 5 NA ...
## ..- attr(*, "units")= chr "days"
## $ process_time2 : 'difftime' num [1:85854] 70 132 88 NA ...
## ..- attr(*, "units")= chr "days"
## $ data_state : chr [1:85854] "WV" "WV" "WV" "WV" ...
#These 70408 records have duplicates -
dupe_numbers <- wv_state %>%
# filter (!is.na(license_number)) %>% #pending apps will show up as NA
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (n = n()) %>%
filter (n > 1) %>%
arrange (last_name, first_name, middle_name, city, lic_type, app_type)
#Most are renewals, which will fall out anyway
dupe_numbers %>% ungroup() %>% count (app_type) %>% arrange (desc(n))
## # A tibble: 6 × 2
## app_type n
## <chr> <int>
## 1 Renewal 69675
## 2 Endorsement 301
## 3 Reinstatement 198
## 4 Exam-retest 139
## 5 Exam 93
## 6 International 2
#535 records have duplicates that AREN'T renewal or reinstatements, but they've all been voided or expired and only one issued
dupe_numbers2 <- dupe_numbers %>%
ungroup() %>%
filter (!(app_type %in% c("Renewal", "Reinstatement"))) %>%
filter (!is.na(process_time)) %>% #removes all duplicates
group_by (first_name, middle_name, last_name, city, lic_type, app_type) %>%
mutate (count = n()) %>%
# filter (count == 1) #
filter (count > 1)
#No removal necessary
state_end <- read_excel("../state-data/WY-Credentials Issued.xlsx RN End.xlsx") %>%
clean_names() %>%
mutate (lic_type = "RN",
app_type = "Endorsement",
application_date = mdy (application_submitted),
issue_date = mdy (original_issue_date),
expiration_date = mdy (expiration_date),
process_time = issue_date - application_date)
state_exam <- read_excel("../state-data/WY-Credentials Issued.xlsx RN Exam.xlsx", skip=1) %>%
clean_names() %>%
mutate (lic_type = "RN",
app_type = "Exam",
application_date = mdy (application_submitted),
issue_date = mdy (original_issue_date),
expiration_date = mdy (expiration_date),
process_time = issue_date - application_date)
#Any negatives? None
state_exam %>%
filter (process_time < 0)
## # A tibble: 0 × 41
## # … with 41 variables: basis_for_licensure_1 <chr>, license_type_2 <chr>,
## # orbsid <chr>, middle_name <chr>, last_name <chr>, license_number <chr>,
## # license_category <chr>, original_issue_date <chr>, expiration_date <date>,
## # basis_for_licensure_10 <chr>, license_type_11 <chr>, status_name <chr>,
## # application_submitted <chr>, days_from_submission <dbl>,
## # application_reviewed_completed <lgl>, days_from_app_review <lgl>,
## # credential_evaluation_services_completed <lgl>, days_from_ces <lgl>, …
state_end %>% #None
filter (process_time < 0)
## # A tibble: 0 × 37
## # … with 37 variables: license_number <chr>, license_category <chr>,
## # original_issue_date <chr>, expiration_date <date>,
## # basis_for_licensure <chr>, license_type <chr>, application <chr>,
## # status_name <chr>, application_submitted <chr>, days_from_submission <dbl>,
## # application_reviewed_completed <lgl>, days_from_app_review <lgl>,
## # credential_evaluation_services_completed <lgl>, days_from_ces <lgl>,
## # afadavit_completed <lgl>, days_from_afadavit <lgl>, …
state_exam %>% #10 didn't calculate, all subtotal rows
filter (is.na(process_time))
## # A tibble: 10 × 41
## basis_for_licensu… license_type_2 orbsid middle_name last_name license_number
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> <NA> Total <NA> <NA> <NA>
## 2 <NA> Total 22 <NA> <NA> <NA>
## 3 <NA> <NA> Total <NA> <NA> <NA>
## 4 <NA> Total 785 <NA> <NA> <NA>
## 5 <NA> <NA> Total <NA> <NA> <NA>
## 6 <NA> Total 14 <NA> <NA> <NA>
## 7 <NA> 821 <NA> <NA> <NA> <NA>
## 8 Total <NA> <NA> <NA> <NA> <NA>
## 9 <NA> <NA> <NA> <NA> <NA> <NA>
## 10 <NA> <NA> <NA> <NA> <NA> <NA>
## # … with 35 more variables: license_category <chr>, original_issue_date <chr>,
## # expiration_date <date>, basis_for_licensure_10 <chr>,
## # license_type_11 <chr>, status_name <chr>, application_submitted <chr>,
## # days_from_submission <dbl>, application_reviewed_completed <lgl>,
## # days_from_app_review <lgl>, credential_evaluation_services_completed <lgl>,
## # days_from_ces <lgl>, afadavit_completed <lgl>, days_from_afadavit <lgl>,
## # english_proficiency_completed <lgl>, days_from_english <lgl>, …
state_end %>% #8 didn't calculate, all subtotal rows
filter (is.na(process_time))
## # A tibble: 8 × 37
## license_number license_category original_issue_date expiration_date
## <chr> <chr> <chr> <date>
## 1 <NA> <NA> <NA> NA
## 2 <NA> <NA> <NA> NA
## 3 <NA> <NA> <NA> NA
## 4 <NA> <NA> <NA> NA
## 5 <NA> <NA> <NA> NA
## 6 <NA> <NA> <NA> NA
## 7 <NA> <NA> <NA> NA
## 8 <NA> <NA> <NA> NA
## # … with 33 more variables: basis_for_licensure <chr>, license_type <chr>,
## # application <chr>, status_name <chr>, application_submitted <chr>,
## # days_from_submission <dbl>, application_reviewed_completed <lgl>,
## # days_from_app_review <lgl>, credential_evaluation_services_completed <lgl>,
## # days_from_ces <lgl>, afadavit_completed <lgl>, days_from_afadavit <lgl>,
## # english_proficiency_completed <lgl>, days_from_english <lgl>,
## # nursys_fits_results_completed <lgl>, days_from_nursys_fits <lgl>, …
#9 rows had NAs in Wyoming's calculation but not mine... all with process times over 221 days. Are they trying to skew their average?
state_exam %>%
filter (is.na(days_from_submission) & !is.na(process_time))
## # A tibble: 9 × 41
## basis_for_licensur… license_type_2 orbsid middle_name last_name license_number
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> <NA> 64299 Diane Hobbs 43315
## 2 <NA> <NA> 61731 <NA> Duru 43326
## 3 <NA> <NA> 80046 FAITH BURKINDI… 43921
## 4 <NA> <NA> 74868 Ann Montague 44571
## 5 <NA> <NA> 80968 <NA> KELLY 44735
## 6 <NA> <NA> 60868 Aurora Wagner 45832
## 7 <NA> <NA> 67011 Jean Olson 46085
## 8 <NA> <NA> 69213 Lynn Jones 46816
## 9 <NA> RN 64002 Rose McCartney 42047
## # … with 35 more variables: license_category <chr>, original_issue_date <chr>,
## # expiration_date <date>, basis_for_licensure_10 <chr>,
## # license_type_11 <chr>, status_name <chr>, application_submitted <chr>,
## # days_from_submission <dbl>, application_reviewed_completed <lgl>,
## # days_from_app_review <lgl>, credential_evaluation_services_completed <lgl>,
## # days_from_ces <lgl>, afadavit_completed <lgl>, days_from_afadavit <lgl>,
## # english_proficiency_completed <lgl>, days_from_english <lgl>, …
state_end %>% #no missing calculations
filter (is.na(days_from_submission) & !is.na(process_time))
## # A tibble: 0 × 37
## # … with 37 variables: license_number <chr>, license_category <chr>,
## # original_issue_date <chr>, expiration_date <date>,
## # basis_for_licensure <chr>, license_type <chr>, application <chr>,
## # status_name <chr>, application_submitted <chr>, days_from_submission <dbl>,
## # application_reviewed_completed <lgl>, days_from_app_review <lgl>,
## # credential_evaluation_services_completed <lgl>, days_from_ces <lgl>,
## # afadavit_completed <lgl>, days_from_afadavit <lgl>, …
state <- bind_rows (state_exam, state_end) #combine exam/endorsement
str(state)
## tibble [2,267 × 44] (S3: tbl_df/tbl/data.frame)
## $ basis_for_licensure_1 : chr [1:2267] "Exam" NA NA NA ...
## $ license_type_2 : chr [1:2267] "RN" NA NA NA ...
## $ orbsid : chr [1:2267] "79020" "78581" "74976" "79583" ...
## $ middle_name : chr [1:2267] "MARIE" "GRACE" "L" "S" ...
## $ last_name : chr [1:2267] "DAVIDSON" "COOLEY" "Hansen" "NEAL" ...
## $ license_number : chr [1:2267] "43411" "43910" "43976" "45699" ...
## $ license_category : chr [1:2267] "Permanent" "Permanent" "Permanent" "Permanent" ...
## $ original_issue_date : chr [1:2267] "06/17/2021" "07/16/2020" "07/16/2020" "06/16/2021" ...
## $ expiration_date : Date[1:2267], format: "2022-12-31" "2020-12-31" ...
## $ basis_for_licensure_10 : chr [1:2267] "Exam" "Exam" "Exam" "Exam" ...
## $ license_type_11 : chr [1:2267] "RN" "RN" "RN" "RN" ...
## $ status_name : chr [1:2267] "Approved" "Approved" "Approved" "Approved" ...
## $ application_submitted : chr [1:2267] "04/19/2021" "05/15/2020" "05/01/2020" "05/03/2021" ...
## $ days_from_submission : num [1:2267] 59 62 76 44 81 77 52 84 57 32 ...
## $ application_reviewed_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_app_review : logi [1:2267] NA NA NA NA NA NA ...
## $ credential_evaluation_services_completed: logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_ces : logi [1:2267] NA NA NA NA NA NA ...
## $ afadavit_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_afadavit : logi [1:2267] NA NA NA NA NA NA ...
## $ english_proficiency_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_english : logi [1:2267] NA NA NA NA NA NA ...
## $ nursys_fits_results_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_nursys_fits : logi [1:2267] NA NA NA NA NA NA ...
## $ testing_accomodation_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_testing : logi [1:2267] NA NA NA NA NA NA ...
## $ background_history_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_background : logi [1:2267] NA NA NA NA NA NA ...
## $ referto_investigations_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_investigation : logi [1:2267] NA NA NA NA NA NA ...
## $ official_transcripts_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_transcripts : logi [1:2267] NA NA NA NA NA NA ...
## $ proofof_education_completed : chr [1:2267] "05/11/2021" "05/29/2020" "05/29/2020" "05/18/2021" ...
## $ days_from_education : num [1:2267] 37 48 48 29 57 50 28 28 42 27 ...
## $ final_review_completed : logi [1:2267] NA NA NA NA NA NA ...
## $ days_from_final_review : logi [1:2267] NA NA NA NA NA NA ...
## $ lic_type : chr [1:2267] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:2267] "Exam" "Exam" "Exam" "Exam" ...
## $ application_date : Date[1:2267], format: "2021-04-19" "2020-05-15" ...
## $ issue_date : Date[1:2267], format: "2021-06-17" "2020-07-16" ...
## $ process_time : 'difftime' num [1:2267] 59 62 76 44 ...
## ..- attr(*, "units")= chr "days"
## $ basis_for_licensure : chr [1:2267] NA NA NA NA ...
## $ license_type : chr [1:2267] NA NA NA NA ...
## $ application : chr [1:2267] NA NA NA NA ...
#What combinations exist?
state %>%
count (lic_type, app_type)
## # A tibble: 2 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN Endorsement 1436
## 2 RN Exam 831
#Issued from Jan 3, 2019 to Oct. 14, 2021
state %>%
summarize (app_range = range(application_date, na.rm=T),
issue_range = range (issue_date, na.rm=T))
## # A tibble: 2 × 2
## app_range issue_range
## <date> <date>
## 1 2017-03-23 2019-01-03
## 2 2021-10-01 2021-10-14
#Calculate processing time
wy_state <- state %>%
#Remove blank columns
select(-c(basis_for_licensure_1, license_type_2, orbsid, basis_for_licensure_10, basis_for_licensure, license_type, application, license_type_11, c(15:32), final_review_completed, days_from_final_review)) %>%
mutate (month = substr(issue_date, 1,7),
year = as.character(year(issue_date)),
data_state = "WY") %>%
filter (!is.na(year)) #remove blank subtotal rows
#Any mismatches?
wy_state %>% filter (mdy(application_submitted) != application_date)
## # A tibble: 0 × 19
## # … with 19 variables: middle_name <chr>, last_name <chr>,
## # license_number <chr>, license_category <chr>, original_issue_date <chr>,
## # expiration_date <date>, status_name <chr>, application_submitted <chr>,
## # days_from_submission <dbl>, proofof_education_completed <chr>,
## # days_from_education <dbl>, lic_type <chr>, app_type <chr>,
## # application_date <date>, issue_date <date>, process_time <drtn>,
## # month <chr>, year <chr>, data_state <chr>
wy_state %>% filter (mdy(original_issue_date) != issue_date)
## # A tibble: 0 × 19
## # … with 19 variables: middle_name <chr>, last_name <chr>,
## # license_number <chr>, license_category <chr>, original_issue_date <chr>,
## # expiration_date <date>, status_name <chr>, application_submitted <chr>,
## # days_from_submission <dbl>, proofof_education_completed <chr>,
## # days_from_education <dbl>, lic_type <chr>, app_type <chr>,
## # application_date <date>, issue_date <date>, process_time <drtn>,
## # month <chr>, year <chr>, data_state <chr>
#Remove unformatted date fields and unnecessary fields
wy_state <- wy_state %>%
select (-c(license_category, original_issue_date, application_submitted, days_from_submission, proofof_education_completed, days_from_education)) %>%
rename (lic_status = status_name, )
wy_state %>%
#count (lic_type, license_type, application_type, app_type)
count(lic_type, app_type) #check they've all coded correctly
## # A tibble: 2 × 3
## lic_type app_type n
## <chr> <chr> <int>
## 1 RN Endorsement 1428
## 2 RN Exam 821
str(wy_state)
## tibble [2,249 × 13] (S3: tbl_df/tbl/data.frame)
## $ middle_name : chr [1:2249] "MARIE" "GRACE" "L" "S" ...
## $ last_name : chr [1:2249] "DAVIDSON" "COOLEY" "Hansen" "NEAL" ...
## $ license_number : chr [1:2249] "43411" "43910" "43976" "45699" ...
## $ expiration_date : Date[1:2249], format: "2022-12-31" "2020-12-31" ...
## $ lic_status : chr [1:2249] "Approved" "Approved" "Approved" "Approved" ...
## $ lic_type : chr [1:2249] "RN" "RN" "RN" "RN" ...
## $ app_type : chr [1:2249] "Exam" "Exam" "Exam" "Exam" ...
## $ application_date: Date[1:2249], format: "2021-04-19" "2020-05-15" ...
## $ issue_date : Date[1:2249], format: "2021-06-17" "2020-07-16" ...
## $ process_time : 'difftime' num [1:2249] 59 62 76 44 ...
## ..- attr(*, "units")= chr "days"
## $ month : chr [1:2249] "2021-06" "2020-07" "2020-07" "2021-06" ...
## $ year : chr [1:2249] "2021" "2020" "2020" "2021" ...
## $ data_state : chr [1:2249] "WY" "WY" "WY" "WY" ...
#No duplicate license numbers. No removal necessary.
dupe_numbers <- wy_state %>%
group_by (license_number) %>%
mutate (n=n()) %>%
filter (n>1)
Remove all intermediary dataframes
save_these=(ls() %>% as.data.frame() %>% filter (str_detect(., "_state$")) %>% pull())
rm(list=setdiff(ls(), c("start_time", save_these)))
end_time <- Sys.time()
paste ("Processing time lasted", (end_time-start_time))
## [1] "Processing time lasted 2.27697230180105"
This is how I knew what to rename columns in all the import code chunks above. Originally, there were almost 200 unique column names among the 32 states, which have been pared down to 45 columns for our dataset.
#Compare columns to know which ones to remove from each state dataframe in code above
all_data <- compare_df_cols (ar_state, ca_state, co_state, ct_state, fl_state, hi_state, ia_state, il_state, in_state, ky_state, la_state, ma_state, me_state, mi_state, mn_state, ms_state, mt_state, ne_state, nh_state, nj_state, nm_state, nc_state, oh_state, ok_state, or_state, pa_state, sc_state, tn_state, tx_state, vt_state, va_state, wv_state, wy_state)
names <- all_data %>% select (column_name)
counts <- apply(all_data, MARGIN = 1, function(x) sum(!is.na(x))) %>%
as.data.frame() %>%
magrittr::set_colnames("count") %>%
mutate (count = count - 1)
counts2 <- cbind (names, counts)
#all_data2 %>% filter (is.na(name))
#all_data2 %>% filter (!is.na(suffix) & is.na(name)) #2690 people have a suffix
#Join all state dataframes into one frame
all_data2 <- bind_rows (ar_state, ca_state, co_state, ct_state, fl_state, hi_state, ia_state, il_state, in_state, ky_state, la_state, ma_state, me_state, mn_state, mi_state, mn_state, ms_state, mt_state, ne_state, nh_state, nj_state, nm_state, nc_state, oh_state, ok_state, or_state, pa_state, sc_state, tn_state, tx_state, vt_state, va_state, wv_state, wy_state) %>%
#Create name column for those without
mutate (name = case_when (
is.na(name) & !is.na(suffix) & is.na (middle_name) ~ paste (first_name, last_name, suffix),
is.na(name) & is.na(suffix) & is.na (middle_name) ~ paste (first_name, last_name),
is.na(name) & !is.na(suffix) & !is.na (middle_name) ~ paste (first_name, middle_name, last_name, suffix),
is.na(name) & is.na(suffix) & !is.na (middle_name) ~ paste (first_name, middle_name, last_name),
!is.na(name) ~ name),
#Copy process time into process_time2 for those states where it wasn't needed
process_time2 = if_else (data_state %in% c("AR", "IL", "KY", "ME", "MN", "MS", "NC", "OR", "SC", "TN", "TX", "VA", "WY"), process_time, process_time2)) %>%
#Rearrange columns, leaving out process_time
select (data_state, lic_type, app_type, name, city, state, process_time2, month, year, application_date, docs_date, issue_date, effective_renewal_date, expiration_date, gather_time, docs_time, app_status, lic_status, license_number, duration, app_month, app_year, app_expiration_date, first_name, middle_name, last_name, suffix, maiden_name, address_line1, address_line2, zip_code, county, date_of_birth, gender, ethnicity, phone, email, school_name, graduation_date, highest_education_level, prelicensure_education_preparation, state_of_original_license, deficient)
#all_data2 %>% filter (process_time != process_time2)
all_data2 %>% count (app_type)
## # A tibble: 35 × 2
## app_type n
## * <chr> <int>
## 1 All 1285
## 2 Application 42906
## 3 Archive Record 1
## 4 Emergency 1138
## 5 Endorsement 404002
## 6 Endorsement (foreign) 250
## 7 Endorsement (non-US) 2003
## 8 Endorsement (US) 25338
## 9 Exam 450340
## 10 Exam (foreign) 81
## # … with 25 more rows
How many have negative process times? This happens when the issue date is before the application date.
all_data2 %>%
filter (process_time2 < 0) %>%
#count (lic_type, app_type) %>%
filter (!(str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement"))) #%>%
## # A tibble: 10,643 × 43
## data_state lic_type app_type name city state process_time2 month year
## <chr> <chr> <chr> <chr> <chr> <chr> <drtn> <chr> <chr>
## 1 CO RN Exam frede… Accok… MD -1 days <NA> Pend…
## 2 CO RN Exam SABRA… Littl… CO -1 days <NA> Pend…
## 3 CO RN Exam Carol… Parker CO -1 days <NA> Pend…
## 4 CO RN Endorsement Savan… Denver CO -1 days <NA> Pend…
## 5 CO LPN Endorsement David… Cente… CO -1 days <NA> Pend…
## 6 CO RN Exam Laure… Denver CO -1 days <NA> Pend…
## 7 CO RN Endorsement Jane … <NA> <NA> -1 days <NA> Pend…
## 8 CO RN Endorsement Cindy… Edwar… IL -1 days <NA> Pend…
## 9 CO RN Exam BROOK… THORN… CO -1 days <NA> Pend…
## 10 CO RN Endorsement Lynn … Denver CO -16811 days 1973… 1973
## # … with 10,633 more rows, and 34 more variables: application_date <date>,
## # docs_date <date>, issue_date <date>, effective_renewal_date <date>,
## # expiration_date <date>, gather_time <drtn>, docs_time <drtn>,
## # app_status <chr>, lic_status <chr>, license_number <chr>, duration <chr>,
## # app_month <chr>, app_year <dbl>, app_expiration_date <date>,
## # first_name <chr>, middle_name <chr>, last_name <chr>, suffix <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, …
#arrange (desc(n))
24.3% of data is renewals/reinstatements I cannot use because they have negative process times.
nrow(all_data2 %>% filter (process_time2 < 0) %>%
filter ((str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement")))) / nrow(all_data2)
## [1] 0.2400707
0.5% of other types have negative process time and cannot be used – input errors.
nrow(all_data2 %>% filter (process_time2 < 0) %>%
filter (!(str_detect(app_type, "^Renewal")|str_detect(app_type, "^Reinstatement")))) / nrow(all_data2)
## [1] 0.004602681
How many process times didn’t calculate? (because denied, withdrawn, etc.)
all_data2 %>%
filter (is.na(process_time2)) %>%
#count (data_state)
count (lic_type, app_type) %>%
arrange (desc(n)) %>%
adorn_totals()
## lic_type app_type n
## RN Exam 9193
## RN Unknown 7675
## RN Endorsement 5733
## Unknown Exam 5616
## Unknown Endorsement 4292
## RN International 2777
## LPN Exam 2700
## LPN Unknown 1199
## RN Exam-retest 1081
## LPN Endorsement 980
## RN Reinstatement 744
## RN-Temp Application 504
## RN-Provisional Application 373
## RN Renewal 270
## RN-Temp Endorsement 209
## LPN Exam-retest 208
## LPN-Provisional Application 164
## LPN Renewal 121
## LPN-Temp Application 108
## RN-Temp All 97
## LPN-Temp Unknown 94
## LPN Reinstatement 80
## LPN International 63
## RN Other 31
## LPN-Temp All 26
## LPN-Temp Endorsement 12
## LPN Other 6
## RN Renewal-Unknown 6
## RN SSL to MSL 5
## LPN SSL to MSL 4
## RN Renewal-Endorsement 4
## RN Foreign Applicant 3
## RN Inactive 3
## Unknown Unknown 3
## RN-Temp Exam 2
## LPN Foreign Applicant 1
## LPN Inactive 1
## LPN Renewal-Endorsement 1
## RN Archive Record 1
## RN Reinstatement-Endorsement 1
## RN-Refresher Temporary Application 1
## Volunteer RN Application 1
## Total - 44393
nrow(all_data2 %>% filter (is.na(process_time2))) / nrow(all_data2)
## [1] 0.01919823
How much valid data do I have? * 1.70M records (including pending) * 1.55M without pending
all_data2 %>%
filter (process_time2 > -1) %>%
filter (year != "2022") %>% #remove 266 errors
filter (!(str_detect(year, "^Pending"))) #%>% #remove all pending apps
## # A tibble: 1,548,989 × 43
## data_state lic_type app_type name city state process_time2 month year
## <chr> <chr> <chr> <chr> <chr> <chr> <drtn> <chr> <chr>
## 1 AR LPN Exam MEGAN BRI… <NA> <NA> 27 days 2019… 2019
## 2 AR LPN Exam MORGAN FO… <NA> <NA> 83 days 2019… 2019
## 3 AR LPN Exam ASHLEY IR… <NA> <NA> 83 days 2019… 2019
## 4 AR RN Exam CASEY HOD… <NA> <NA> 41 days 2019… 2019
## 5 AR LPN Exam SYDNI ROC… <NA> <NA> 48 days 2019… 2019
## 6 AR RN Exam CHRISTINE… <NA> <NA> 31 days 2019… 2019
## 7 AR LPN Exam JODI SCHA… <NA> <NA> 83 days 2019… 2019
## 8 AR RN Exam RAMSEY SH… <NA> <NA> 41 days 2019… 2019
## 9 AR RN Exam MCKENZIE … <NA> <NA> 24 days 2019… 2019
## 10 AR LPN Exam CRESHAE P… <NA> <NA> 41 days 2019… 2019
## # … with 1,548,979 more rows, and 34 more variables: application_date <date>,
## # docs_date <date>, issue_date <date>, effective_renewal_date <date>,
## # expiration_date <date>, gather_time <drtn>, docs_time <drtn>,
## # app_status <chr>, lic_status <chr>, license_number <chr>, duration <chr>,
## # app_month <chr>, app_year <dbl>, app_expiration_date <date>,
## # first_name <chr>, middle_name <chr>, last_name <chr>, suffix <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, …
#filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01")))) #%>% #remove pending apps older than July 2020 (11789 RNs & 1994 LPNs) %>%
# count (year)
#Louisiana has 258 records missing application dates that didn't calculate, rest of NAs are pending
all_data2 %>%
filter (is.na(process_time2) & year %in% c("2019", "2020", "2021")) %>%
count (data_state)
## # A tibble: 2 × 2
## data_state n
## * <chr> <int>
## 1 LA 258
## 2 MT 327
#266 records (almost all NM) wrongly have 2022 issue dates, 244 renewals - plus CA which provided some records into 2022
all_data2 %>%
filter (year=="2022") #%>%
## # A tibble: 2,107 × 43
## data_state lic_type app_type name city state process_time2 month year
## <chr> <chr> <chr> <chr> <chr> <chr> <drtn> <chr> <chr>
## 1 CA RN Exam 24196… COLLI… TN 26 days <NA> 2022
## 2 CA RN Exam 68386… INGLE… CA 85 days <NA> 2022
## 3 CA RN Exam 74665… VAN N… CA 239 days <NA> 2022
## 4 CA RN Exam 75467… ANAHE… CA 131 days <NA> 2022
## 5 CA RN Endorsement 75315… ONEON… AL 126 days <NA> 2022
## 6 CA RN Endorsement 74180… HOUST… MS 310 days <NA> 2022
## 7 CA RN Endorsement 72256… WOODB… VA 97 days <NA> 2022
## 8 CA RN Endorsement 74688… SAVAGE MN 236 days <NA> 2022
## 9 CA RN Endorsement 74717… BLAINE MN 231 days <NA> 2022
## 10 CA RN Endorsement 74189… ARLIN… MA 308 days <NA> 2022
## # … with 2,097 more rows, and 34 more variables: application_date <date>,
## # docs_date <date>, issue_date <date>, effective_renewal_date <date>,
## # expiration_date <date>, gather_time <drtn>, docs_time <drtn>,
## # app_status <chr>, lic_status <chr>, license_number <chr>, duration <chr>,
## # app_month <chr>, app_year <dbl>, app_expiration_date <date>,
## # first_name <chr>, middle_name <chr>, last_name <chr>, suffix <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, …
# count (app_type)
#CT has 5 records of unknown license/application type
all_data2 %>%
filter (lic_type=="Unknown")
## # A tibble: 9,913 × 43
## data_state lic_type app_type name city state process_time2 month year
## <chr> <chr> <chr> <chr> <chr> <chr> <drtn> <chr> <chr>
## 1 CT Unknown Unknown LIANA H… EAST … PA 0 days 2021… 2021
## 2 CT Unknown Unknown KRISTIN… WOONS… RI 0 days 2021… 2021
## 3 CT Unknown Unknown MARJORI… STAMF… CT NA days <NA> Pendi…
## 4 CT Unknown Unknown MARK A … SOUTH… CT NA days <NA> Pendi…
## 5 CT Unknown Unknown EBRIMA … STAMF… CT NA days <NA> Pendi…
## 6 MN Unknown Exam Kayla M… Manka… MN NA days <NA> Pendi…
## 7 MN Unknown Exam Rachell… Scarb… ON NA days <NA> Pendi…
## 8 MN Unknown Exam Nimo Mo… MINNE… MN NA days <NA> Pendi…
## 9 MN Unknown Exam Asha Mo… ST.LO… MN NA days <NA> Pendi…
## 10 MN Unknown Exam Fowsia … Chaska MN NA days <NA> Pendi…
## # … with 9,903 more rows, and 34 more variables: application_date <date>,
## # docs_date <date>, issue_date <date>, effective_renewal_date <date>,
## # expiration_date <date>, gather_time <drtn>, docs_time <drtn>,
## # app_status <chr>, lic_status <chr>, license_number <chr>, duration <chr>,
## # app_month <chr>, app_year <dbl>, app_expiration_date <date>,
## # first_name <chr>, middle_name <chr>, last_name <chr>, suffix <chr>,
## # maiden_name <chr>, address_line1 <chr>, address_line2 <chr>, …
#2,032,084 records for Jan. 1, 2019 - Sept. 23, 2021
filtered_data <- all_data2 %>%
filter (application_date <= as.Date("2021-09-23")) %>%
filter (issue_date <= as.Date("2021-09-23")) %>%
#Standardize 29 app types to simplest version
mutate (standard_app_type = case_when(
str_detect(app_type, "Reinstatement") ~ "Reinstatement",
str_detect(app_type, "Renewal") ~ "Renewal",
str_detect(lic_type, "Provisional") ~ "Temporary",
str_detect(lic_type, "Temp") ~ "Temporary",
lic_type=="Volunteer RN" ~ "Temporary", #for 22 records in PA
str_detect(app_type, "Endorsement") ~ "Endorsement",
str_detect(app_type, "Exam-retest") ~ "Exam-retest",
str_detect(app_type, "Exam") ~ "Exam",
app_type %in% c("Foreign Applicant", "Inactive", "SSL to MSL", "International", "Military Vet Expedited Initial Licensure") ~ "Other",
TRUE ~ app_type))
#How many processing times are negative and will be removed?
#553214 records
filtered_data %>% filter (process_time2 < 0) %>% nrow()
## [1] 553214
#Where are these from and what types of applications?
#Majority are renewals (525793);
#3125 are Exam
#2787 are Endorsement
filtered_data %>% filter (process_time2 < 0) %>% count(standard_app_type) %>%
adorn_totals("row")
## standard_app_type n
## Endorsement 2787
## Exam 3125
## Exam-retest 28
## Other 142
## Other - Inactivate Application 2357
## Other - Retire Application 1352
## Other - Volunteer Retire 57
## Reinstatement 17043
## Renewal 525793
## Temporary 511
## Unknown 19
## Total 553214
#Four application types NPR is analyzing = 226,304 new licenses issued in 2021
#Adding Florida: 253,694
filtered_data %>%
filter (year=="2021" & standard_app_type %in% c("Exam", "Endorsement", "Other", "Unknown")) %>%
count(standard_app_type) %>%
adorn_totals("row")
## standard_app_type n
## Endorsement 106186
## Exam 129951
## Other 1667
## Unknown 15890
## Total 253694
#How many of these had negative process times that were removed from 2021's data?
#Just 77 licenses from six states
filtered_data %>%
filter (year=="2021" & standard_app_type %in% c("Exam", "Endorsement", "Other", "Unknown") & process_time2 < 0) %>%
count(standard_app_type, data_state) %>%
adorn_totals("row")
## standard_app_type data_state n
## Endorsement HI 10
## Endorsement MI 34
## Endorsement VT 1
## Exam HI 3
## Exam MA 24
## Exam MI 1
## Other OK 3
## Unknown CT 1
## Total - 77
#How many processing times are NA and will be removed?
#NONE
filtered_data %>%
filter (is.na(process_time2)) %>%
nrow()
## [1] 0
#Original method to filter down dataset, keeps pending applications
valid_data <- all_data2 %>%
filter (process_time2 > -1) %>% #Keep only valid processing times
filter (!is.na(process_time2)) %>%
filter (year != "2022") %>% #remove 266 errors and those issued in NC/TN
#remove pending apps older than July 2020 (11789 RNs & 1994 LPNs)
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-12-31")))) #"2020-07-01"
#639,067 records removed (NA + negative)
nrow(all_data2)-nrow(valid_data)
## [1] 645108
valid_data %>% count (app_type)
## # A tibble: 31 × 2
## app_type n
## * <chr> <int>
## 1 All 1156
## 2 Application 41181
## 3 Emergency 1138
## 4 Endorsement 376201
## 5 Endorsement (foreign) 250
## 6 Endorsement (non-US) 2003
## 7 Endorsement (US) 25338
## 8 Exam 416082
## 9 Exam (foreign) 81
## 10 Exam (non-US) 513
## # … with 21 more rows
#Standardize 30+ app types to simplest version - 8 types
valid_data2 <- valid_data %>%
mutate (standard_app_type = case_when(
str_detect(app_type, "Reinstatement") ~ "Reinstatement",
str_detect(app_type, "Renewal") ~ "Renewal",
str_detect(lic_type, "Provisional") ~ "Temporary",
str_detect(lic_type, "Temp") ~ "Temporary",
lic_type=="Volunteer RN" ~ "Temporary", #for 22 records in PA
str_detect(app_type, "Endorsement") ~ "Endorsement",
str_detect(app_type, "Exam-retest") ~ "Exam-retest",
str_detect(app_type, "Exam") ~ "Exam",
app_type %in% c("Foreign Applicant", "Inactive", "SSL to MSL", "International", "Military Vet Expedited Initial Licensure") ~ "Other",
TRUE ~ app_type))
#Double check standardization
valid_data2 %>%
count (lic_type, app_type, standard_app_type)
## # A tibble: 80 × 4
## lic_type app_type standard_app_type n
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement Endorsement 25114
## 2 LPN Endorsement (foreign) Endorsement 5
## 3 LPN Endorsement (non-US) Endorsement 5
## 4 LPN Endorsement (US) Endorsement 1913
## 5 LPN Exam Exam 79752
## 6 LPN Exam (foreign) Exam 2
## 7 LPN Exam (non-US) Exam 1
## 8 LPN Exam (US) Exam 4563
## 9 LPN Exam-retest Exam-retest 5750
## 10 LPN Exam-retest (non-US) Exam-retest 3
## # … with 70 more rows
#Double check standardization
valid_data2 %>% count (standard_app_type)
## # A tibble: 8 × 2
## standard_app_type n
## * <chr> <int>
## 1 Endorsement 366005
## 2 Exam 415422
## 3 Exam-retest 23460
## 4 Other 4298
## 5 Reinstatement 20546
## 6 Renewal 660661
## 7 Temporary 114851
## 8 Unknown 61997
#What is the breakdown by license type and application type?
valid_data2 %>%
filter (!(#str_detect(app_type, "Exam-retest")|
str_detect(app_type, "Renewal")|
str_detect(app_type, "Reinstatement"))) %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (year %in% c("2021", "Pending")) %>%
count (year, lic_type)
## # A tibble: 18 × 3
## year lic_type n
## <chr> <chr> <int>
## 1 2021 LPN 39394
## 2 2021 LPN-Provisional 1676
## 3 2021 LPN-Refresher Temporary 2
## 4 2021 LPN-Temp 2238
## 5 2021 RN 254983
## 6 2021 RN-Provisional 8452
## 7 2021 RN-Refresher Temporary 11
## 8 2021 RN-Temp 23057
## 9 2021 Unknown 2
## 10 2021 Unknown-Temp 3526
## 11 2021 Volunteer RN 13
## 12 Pending LPN 18001
## 13 Pending LPN-Provisional 53
## 14 Pending LPN-Temp 61
## 15 Pending RN 92027
## 16 Pending RN-Provisional 63
## 17 Pending RN-Temp 7037
## 18 Pending Volunteer RN 1
#Where are the unknown application types from?
valid_data2 %>%
filter (standard_app_type=="Unknown") %>%
count (data_state) %>%
arrange (desc(n))
## # A tibble: 5 × 2
## data_state n
## <chr> <int>
## 1 CT 32912
## 2 VA 26123
## 3 MI 2957
## 4 PA 3
## 5 CO 2
#Unknowns are from CT + VA, which didn't specify application_type, plus 3600 pending applications from MI, which weren't specified. Also includes: 5 from PA, 4 from CO
#What are the ranges of application dates and issue dates for each state's data?
valid_data2 %>%
group_by (data_state) %>%
summarize (apps = range(application_date),
lics = range(issue_date, na.rm=T)) %>%
mutate (id = if_else (row_number()==1, "start", "end")) %>%
pivot_wider (names_from = c(id),
values_from = c(apps, lics)) %>%
arrange ((lics_end))
## # A tibble: 33 × 5
## # Groups: data_state [33]
## data_state apps_start apps_end lics_start lics_end
## <chr> <date> <date> <date> <date>
## 1 AR 2018-11-13 2021-09-21 2019-05-30 2021-09-23
## 2 NE 2019-01-02 2021-09-23 2019-01-02 2021-09-23
## 3 VT 2019-01-01 2021-09-23 2019-01-01 2021-09-23
## 4 LA 2019-01-07 2021-09-28 2019-06-28 2021-09-24
## 5 PA 2019-01-01 2021-09-25 2019-01-02 2021-09-24
## 6 SC 2019-01-01 2021-09-24 2019-01-02 2021-09-24
## 7 CO 2019-01-01 2021-09-27 2019-01-03 2021-09-28
## 8 OH 2019-01-01 2021-09-28 2019-01-02 2021-09-28
## 9 VA 2017-01-10 2021-09-27 2019-01-02 2021-09-30
## 10 OR 2019-01-01 2021-09-29 2019-01-02 2021-10-01
## # … with 23 more rows
Remove data points after Sept. 23, 2021, to standardize the timeframe among all states. This code removes any licenses issued or applications filed after that date.
#How many still-pending applications are in full data? >> #118,255
valid_data %>%
filter (str_detect (year, "^Pending")) %>%
nrow()
## [1] 118251
#Remove any applications after Sept. 23, 2021 >> 78027
valid_data2 <- valid_data2 %>%
filter (application_date <= as.Date("2021-09-23"))
nrow(valid_data)-nrow(valid_data2)
## [1] 78027
#How many still-pending applications who applied before Sept. 23 are in this filtered data? >> #82948
valid_data2 %>%
filter (str_detect (year, "^Pending")) %>%
nrow()
## [1] 82944
#Filter out licenses issued after Sept. 23, keeping pending applications
valid_data3 <- valid_data2 %>%
filter (!(year %in% c("2019", "2020", "2021") & issue_date > as.Date("2021-09-23"))) %>%
#Mark that CA + VA redacted names
#Mark that NH redacted nurses' city/state
mutate (name = if_else (data_state %in% c("CA", "VA"), "REDACTED", name),
city = if_else (data_state == "NH", "REDACTED", city),
state = if_else (data_state == "NH", "REDACTED", state))
#removes 105534 total records, to end up with 1,561,710 records. Some overlap if nurses applied and were issued after Sept. 23)
nrow(valid_data)-nrow(valid_data3)
## [1] 105534
#How many still-pending applications who applied before Sept. 23 remain in this filtered data? >> #82948, so none were lost
valid_data3 %>%
filter (str_detect (year, "^Pending")) %>%
nrow()
## [1] 82944
Includes 2019-2021 and still-pending applications submitted after Dec. 31, 2020. 1.66 million rows x 43 columns, so will be a very large file.
#write_csv(valid_data, "../output/nurse_licenses_all_2019_2021.csv")
Write dataset of only licenses issued from Jan. 1 to Sept. 23, 2021.
Write dataset of pending license applications
valid_data21 <- valid_data3 %>%
filter (!(year %in% c("2019", "2020"))) %>%
arrange (data_state, lic_type, app_type, desc(process_time2)) %>%
select (1:3, standard_app_type, 4:44)
valid_data21 %>%
count (year) %>%
adorn_totals()
## year n
## 2021 461639
## Pending as of 01/04/22 19081
## Pending as of 09/23/21 572
## Pending as of 09/25/21 3644
## Pending as of 09/27/21 1300
## Pending as of 09/28/21 9520
## Pending as of 10/01/21 12648
## Pending as of 10/05/21 3658
## Pending as of 10/06/21 360
## Pending as of 10/07/21 4006
## Pending as of 10/15/21 4508
## Pending as of 10/20/21 4827
## Pending as of 10/21/21 521
## Pending as of 11/03/21 796
## Pending as of 11/04/21 1882
## Pending as of 11/18/21 382
## Pending as of 11/19/21 2340
## Pending as of 12/31/21 12899
## Total 544583
valid_data21_issued <- valid_data21 %>%
filter (year=="2021")
valid_data21_pending <- valid_data21 %>%
filter (str_detect(year, "^Pending"))
write_csv (valid_data21_issued, "../output/nurse_licenses_issued_2021.csv")
write_csv (valid_data21_pending, "../output/nurse_licenses_pending_2021.csv")
How many of each type were issued in 2021?
valid_data21_issued %>%
count (lic_type, standard_app_type) %>%
adorn_totals()
## lic_type standard_app_type n
## LPN Endorsement 7719
## LPN Exam 23359
## LPN Exam-retest 1569
## LPN Other 121
## LPN Reinstatement 1373
## LPN Renewal 27157
## LPN Unknown 1423
## LPN-Provisional Temporary 1617
## LPN-Refresher Temporary Temporary 2
## LPN-Temp Temporary 2137
## RN Endorsement 98422
## RN Exam 106564
## RN Exam-retest 4304
## RN Other 1543
## RN Reinstatement 4765
## RN Renewal 134307
## RN Unknown 14464
## RN-Provisional Temporary 8205
## RN-Refresher Temporary Temporary 11
## RN-Temp Temporary 20348
## Unknown Unknown 2
## Unknown-Temp Temporary 2214
## Volunteer RN Temporary 13
## Total - 461639
#How many of each application type were issued in 2021?
valid_data21_issued %>%
count (standard_app_type) %>%
adorn_totals()
## standard_app_type n
## Endorsement 106141
## Exam 129923
## Exam-retest 5873
## Other 1664
## Reinstatement 6138
## Renewal 161464
## Temporary 34547
## Unknown 15889
## Total 461639
Where do “Other” applications come from? << 3,055 issued in 2020 or 2021 or still-pending
valid_data3 %>%
filter (year %in% c("2021", "2020") | str_detect(year, "^Pending")) %>%
filter (standard_app_type=="Other") %>%
count (data_state, app_type) %>%
arrange (desc(n)) %>%
adorn_totals()
## data_state app_type n
## NM International 1413
## OK SSL to MSL 1290
## FL Military Vet Expedited Initial Licensure 215
## NJ Foreign Applicant 57
## NH Other 38
## OK Inactive 20
## WV International 20
## AR International 2
## Total - 3055
Where do “Unknown” applications come from? << 39,703 issued in 2020 or 2021 or still-pending
valid_data3 %>%
filter (year %in% c("2021", "2020") | str_detect(year, "^Pending")) %>%
filter (standard_app_type=="Unknown") %>%
count (data_state, lic_type, app_type) %>%
arrange (desc(n)) %>%
adorn_totals()
## data_state lic_type app_type n
## CT RN Unknown 18982
## VA RN Unknown 14622
## VA LPN Unknown 2454
## MI RN Unknown 2035
## CT LPN Unknown 1289
## MI LPN Unknown 315
## CT Unknown Unknown 2
## CO LPN Unknown 1
## CO RN Unknown 1
## PA LPN Unknown 1
## PA RN Unknown 1
## Total - - 39703
Group by state, license type, application type and year to find state medians, means and counts. Also count how many took longer than a month (using 30 days as proxy), two months (60 days), three months (90 days), etc.
This code also creates separate dataframes breaking records down by license type (RN or LPN) and all new nurses as one lumped group (excluding renewals, reinstatements, exam-retest, since those take notably longer or state boards didn’t track consistently).
Most states didn’t provide any applications submitted before Jan. 1, 2019, meaning 2019’s data is incomplete since those licensed through the first months of 2019 likely applied in late-2018. Additionally, Hawaii, Oklahoma, Texas and Maine didn’t provide any records from 2019.
So, I’ve removed 2019 from the state averages dataframe.
#Break down by state, license type, application type, and year.
state_averages <- valid_data3 %>%
group_by (data_state, lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#pending_counts <- state_averages %>%
# filter (str_detect(year, "^Pending")) %>%
# mutate (year = if_else(data_state=="CA", "Pending as of 01/04/22", year))
#write_csv (pending_counts, "Pending_counts_by_state.csv")
#Break down new licenses, separating by RN and LPN only (and by state and year).
#Removes exam-retest, reinstatements, renewals
state_avg_lic_type_only <- valid_data3 %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
mutate (standard_app_type = "Endorsement+Exam+Other+Unknown") %>%
group_by (data_state, lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Break down new licenses, separating by application type only (and by state and year).
#Removes exam-retest, reinstatements, renewals
state_avg_app_type_only <- valid_data3 %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
mutate (lic_type = "LPN+RN") %>%
group_by (data_state, lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Calculate state totals for 2021, combining all new RN/LPN licenses who applied through endorsement, exam, other and unknown (CT/VA).
#Removes exam-retest, reinstatements, renewals
state_averages_combined <- valid_data3 %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
mutate (lic_type = "LPN+RN",
standard_app_type = "Endorsement+Exam+Other+Unknown") %>%
group_by (data_state, lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Write summary files
write_csv (state_averages, "../output/state_averages_by_app_lic_type.csv")
write_csv (state_avg_lic_type_only, "../output/state_averages_lic_type_only.csv")
write_csv (state_avg_app_type_only, "../output/state_averages_app_type_only.csv")
write_csv (state_averages_combined, "../output/state_averages_combined.csv")
Which states had the most new nurses (both LPNs and RNs combined) take a long time? Ordering by share of nurses that took over 3 months
state_averages_combined %>%
filter (year=="2021") %>%
arrange (desc(pct_over90days))
## # A tibble: 33 × 19
## # Groups: data_state, lic_type, standard_app_type [33]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 TX LPN+RN Endorsement+Exam+Other+Unknown 2021 105 da… 102 d… 22642
## 2 PA LPN+RN Endorsement+Exam+Other+Unknown 2021 124 da… 97 d… 11944
## 3 CA LPN+RN Endorsement+Exam+Other+Unknown 2021 121 da… 96 d… 24979
## 4 MN LPN+RN Endorsement+Exam+Other+Unknown 2021 112 da… 83 d… 15184
## 5 MS LPN+RN Endorsement+Exam+Other+Unknown 2021 90 da… 82 d… 2947
## 6 IN LPN+RN Endorsement+Exam+Other+Unknown 2021 91 da… 79 d… 6420
## 7 IL LPN+RN Endorsement+Exam+Other+Unknown 2021 108 da… 69 d… 13119
## 8 OH LPN+RN Endorsement+Exam+Other+Unknown 2021 87 da… 70 d… 15482
## 9 NJ LPN+RN Endorsement+Exam+Other+Unknown 2021 98 da… 70 d… 8470
## 10 AR LPN+RN Endorsement+Exam+Other+Unknown 2021 81 da… 77 d… 2907
## # … with 23 more rows, and 12 more variables: count_over30days <int>,
## # pct_over30days <dbl>, count_over60days <int>, pct_over60days <dbl>,
## # count_over90days <int>, pct_over90days <dbl>, count_over120days <int>,
## # pct_over120days <dbl>, count_over180days <int>, pct_over180days <dbl>,
## # count_over1year <int>, pct_over1year <dbl>
FACT CHECK: How many new licenses in 2021?
226,225 (excluding re-tests [which skew medians longer], reinstatements, reactivations, etc.)
avg_counts <- state_averages %>%
ungroup() %>%
filter (year=="2021") %>%
group_by (lic_type, standard_app_type) %>%
summarize (count = sum(count))
avg_counts %>%
ungroup() %>%
filter (lic_type %in% c("RN", "LPN") & standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
summarize (total = sum(count))
## # A tibble: 1 × 1
## total
## <int>
## 1 253615
valid_data3 %>%
ungroup() %>%
filter (year=="2021") %>%
filter (lic_type %in% c("RN", "LPN") & standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
summarize (total = n())
## # A tibble: 1 × 1
## total
## <int>
## 1 253615
Repeat state average calculations, but without grouping by state so as to calculate “national” averages for our sample of 32 states that provided records.
#Break down by license type, application type and year
natl_averages <- valid_data3 %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
group_by (lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
min = min(process_time2),
max = max(process_time2),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Nationally, it's taking about six days longer (median) to get an RN by examination than last year and three days longer for RN by endorsement.
natl_comparison <- natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count, min, max)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020)
#natl_averages2 <- natl_avg_lic_type
#What about for new licenses only, grouped into LPN + RN?
natl_avg_lic_type <- valid_data3 %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
group_by (lic_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Nationally, RN licenses are taking 5 days longer and LPNs are taking 6 days longer than last year.
natl_comparison2 <- natl_avg_lic_type %>%
#filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(lic_type),
names_from = year,
values_from = c(mean, median, count)) %>%
#arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020)
#Percent of new licenses issued that took longer than a month increased from 78% in 2020 to 80% in 2021.
#Percent of new licenses issued that took longer than 2 months increased from 53% in 2020 to 57% in 2021.
#Percent of new licenses issued that took longer than 3 months increased from 31% in 2020 to 35% in 2021.
#What about for all new RN/LPN licenses, grouping exam, endorsement, other and unknown together?
#app_types: Unknown is mostly CT/VA, which didn't specify application type
natl_averages_combined <- valid_data3 %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
group_by (year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Group by application type only
#app_types: Unknown is mostly CT/VA, which didn't specify application type
natl_avg_app_type_only <- valid_data3 %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
mutate (lic_type = "LPN+RN") %>%
group_by (lic_type, standard_app_type, year) %>%
summarize (mean = round(mean(process_time2)),
median = median (process_time2),
count = n(),
count_over30days = sum(process_time2 > 30),
pct_over30days = count_over30days/count,
count_over60days = sum(process_time2 > 60),
pct_over60days = count_over60days/count,
count_over90days = sum(process_time2 > 90),
pct_over90days = count_over90days/count,
count_over120days = sum(process_time2 > 120),
pct_over120days = count_over120days/count,
count_over180days = sum(process_time2 > 180),
pct_over180days = count_over180days/count,
count_over1year = sum(process_time2 > 365),
pct_over1year = count_over1year/count) %>%
filter (year != "2019")
#Write summary files
write_csv (natl_averages, "../output/natl_averages_by_app_lic_type.csv")
write_csv (natl_avg_lic_type, "../output/natl_averages_lic_type_only.csv")
write_csv (natl_avg_app_type_only, "../output/natl_averages_app_type_only.csv")
write_csv (natl_averages_combined, "../output/natl_averages_combined.csv")
end_time <- Sys.time()
paste ("Processing time lasted", (end_time-start_time))
## [1] "Processing time lasted 3.54257833560308"
Hit “Run All Chunks Above” to process to this point.
Most states didn’t provide applications before Jan. 1, 2019, meaning 2019’s data is incomplete since those licensed through the first months of 2019 likely applied in late-2018. Additionally, Hawaii, Oklahoma, Texas and Maine didn’t provide any records from 2019.
#What are the ranges of application dates for each state's data?
valid_data3 %>%
group_by (data_state) %>%
summarize (apps = range(application_date),
lics = range(issue_date, na.rm=T)) %>%
mutate (id = if_else (row_number()==1, "start", "end")) %>%
pivot_wider (names_from = c(id),
values_from = c(apps, lics)) %>%
arrange (desc(apps_start)) %>%
print(n=1e3)
## # A tibble: 33 × 5
## # Groups: data_state [33]
## data_state apps_start apps_end lics_start lics_end
## <chr> <date> <date> <date> <date>
## 1 HI 2021-02-22 2021-09-23 2021-02-22 2021-09-23
## 2 OK 2020-10-01 2021-09-23 2020-10-12 2021-09-23
## 3 TX 2020-06-16 2021-09-23 2020-06-17 2021-09-23
## 4 ME 2020-01-02 2021-09-23 2020-01-14 2021-09-23
## 5 LA 2019-01-07 2021-09-23 2019-06-28 2021-09-23
## 6 MA 2019-01-07 2021-09-23 2019-01-15 2021-09-23
## 7 MS 2019-01-07 2021-09-23 1955-03-16 2021-09-23
## 8 WV 2019-01-04 2021-09-23 2019-03-11 2021-09-23
## 9 IN 2019-01-02 2021-09-23 2019-01-04 2021-09-23
## 10 NE 2019-01-02 2021-09-23 2019-01-02 2021-09-23
## 11 CA 2019-01-01 2021-09-23 2019-01-03 2021-09-23
## 12 CO 2019-01-01 2021-09-23 2019-01-03 2021-09-23
## 13 IA 2019-01-01 2021-09-23 1952-05-20 2021-09-23
## 14 IL 2019-01-01 2021-09-10 2019-01-26 2021-09-23
## 15 KY 2019-01-01 2021-09-18 2019-01-17 2021-09-23
## 16 MT 2019-01-01 2021-09-23 2019-01-11 2021-09-23
## 17 NC 2019-01-01 2021-09-20 2019-01-03 2021-09-23
## 18 NH 2019-01-01 2021-09-23 2019-01-04 2021-09-23
## 19 NJ 2019-01-01 2021-09-23 2019-01-10 2021-09-23
## 20 NM 2019-01-01 2021-09-23 2019-01-01 2021-09-23
## 21 OH 2019-01-01 2021-09-23 2019-01-02 2021-09-23
## 22 OR 2019-01-01 2021-09-21 2019-01-02 2021-09-23
## 23 PA 2019-01-01 2021-09-23 2019-01-02 2021-09-23
## 24 SC 2019-01-01 2021-09-23 2019-01-02 2021-09-23
## 25 TN 2019-01-01 2021-09-20 2019-01-08 2021-09-23
## 26 VT 2019-01-01 2021-09-23 2019-01-01 2021-09-23
## 27 AR 2018-11-13 2021-09-21 2019-05-30 2021-09-23
## 28 MI 2017-07-11 2021-09-23 2019-11-08 2021-09-23
## 29 WY 2017-03-23 2021-09-10 2019-01-03 2021-09-23
## 30 VA 2017-01-10 2021-09-15 2019-01-02 2021-09-23
## 31 CT 2009-09-30 2021-09-23 2019-01-02 2021-09-23
## 32 FL 2006-03-17 2021-09-23 2019-01-02 2021-09-23
## 33 MN 1997-04-09 2021-09-23 2019-01-02 2021-09-23
FACT CHECK Article text: “Almost 1 in 10 nurses issued new licenses last year waited six months or longer, according to an analysis of licensing records from 32 states. More than a third of these 226,000 registered nurses and licensed practical nurses waited at least three months.”
This code compares new licenses by year only, combining all nurses together to see total counts and percents that took over 30 days, 60 days, etc.
Additional details: Over half (57%) the 226,225 RNs and LPNs issued new licenses Jan. 1-Sept. 23, 2021, waited more than two months (129,452 nurses). Over one-third (35%) of nurses (80,056) waited more than 3 months to get a new license issued in 2021.
NOTE: Florida provided its records on March 15, after publication: It issued 27,390 licenses from Jan. 1-Sept. 23, 2021, with relatively fast medians.
compare_pct <- natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021")) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>% #
group_by (year) %>%
summarize (count = sum(count),
across(c(count_over30days, count_over60days, count_over90days, count_over120days, count_over180days, count_over1year), ~(sum(.x))/sum(count)))
#COUNTS-Compare new licenses by year only, combining all nurses together
compare_count <- natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021")) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>% #
group_by (year) %>%
summarize (count = sum(count),
across(c(count_over30days, count_over60days, count_over90days, count_over120days, count_over180days, count_over1year), ~(sum(.x))))
#Show counts and percents of all new nurses
rbind (compare_count, compare_pct) %>% print(n=1e3)
## # A tibble: 4 × 8
## year count count_over30days count_over60days count_over90days
## <chr> <int> <dbl> <dbl> <dbl>
## 1 2020 255800 196202 131993 75659
## 2 2021 253615 195512 138291 84691
## 3 2020 255800 0.767 0.516 0.296
## 4 2021 253615 0.771 0.545 0.334
## # … with 3 more variables: count_over120days <dbl>, count_over180days <dbl>,
## # count_over1year <dbl>
#How many total new RNs + LPNs in 2021? = 226,225 nurses
compare_pct[compare_pct$year=="2021", "count"]
## # A tibble: 1 × 1
## count
## <int>
## 1 253615
#What percent of these new nurses' licenses took longer than three months? = 35.4%, "over a third"
compare_pct[compare_pct$year=="2021", "count_over90days"]
## # A tibble: 1 × 1
## count_over90days
## <dbl>
## 1 0.334
#What percent of these new nurses' licenses took longer than six months? = 8.8%, "almost one in 10"
compare_pct[compare_pct$year=="2021", "count_over180days"]
## # A tibble: 1 × 1
## count_over180days
## <dbl>
## 1 0.0839
#How many new licenses did Florida issue?
state_averages %>% filter (data_state=="FL" & year=="2021" & standard_app_type!="Exam-retest") %>% ungroup() %>% summarize (count = sum(count))
## # A tibble: 1 × 1
## count
## <int>
## 1 27390
How many nurses waited over six months? 19,924 nurses
valid_data3 %>%
filter (process_time2 > 180) %>%
filter (year=="2021") %>%
filter (standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
count (data_state) %>%
arrange (desc(n)) %>%
adorn_totals ()
## data_state n
## CA 3831
## MN 1926
## PA 1882
## IL 1806
## TX 1735
## OH 1490
## FL 1362
## NJ 929
## NC 728
## MI 701
## VA 644
## MA 634
## IN 465
## KY 415
## OR 397
## CT 388
## CO 313
## TN 276
## IA 202
## MT 189
## SC 158
## MS 145
## ME 116
## OK 116
## AR 110
## LA 74
## NM 72
## WV 53
## NH 45
## NE 41
## VT 21
## WY 19
## HI 3
## Total 21286
# count (lic_type, standard_app_type)
Which license types have the highest proportion of nurses taking over six months?
state_averages %>%
filter (year=="2021") %>%
filter (standard_app_type %in% c("Endorsement", "Exam", "Other", "Unknown")) %>%
arrange (desc(pct_over180days)) %>%
select (1:7, count_over180days, pct_over180days)
## # A tibble: 135 × 9
## # Groups: data_state, lic_type, standard_app_type [135]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 NJ RN Other 2021 432 days 336 days 15
## 2 PA LPN Endorsement 2021 155 days 126 days 233
## 3 OH LPN Endorsement 2021 137 days 106 days 267
## 4 MN LPN Endorsement 2021 145 days 107 days 452
## 5 OH RN Endorsement 2021 138 days 119 days 3210
## 6 PA LPN Exam 2021 151 days 111 days 1468
## 7 CA RN Endorsement 2021 129 days 103 days 10668
## 8 NJ LPN Exam 2021 136 days 78 days 803
## 9 PA RN Endorsement 2021 122 days 90 days 3989
## 10 CA LPN Endorsement 2021 121 days 78 days 548
## # … with 125 more rows, and 2 more variables: count_over180days <int>,
## # pct_over180days <dbl>
Count up totals by RN/LPN (new licenses only)
natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021")) %>%
filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>% #
group_by (lic_type, year) %>%
summarize (count = sum(count),
across(c(count_over30days, count_over60days, count_over90days, count_over120days, count_over180days, count_over1year), ~(sum(.x)/sum(count))))
## # A tibble: 4 × 9
## # Groups: lic_type [2]
## lic_type year count count_over30days count_over60days count_over90days
## <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 LPN 2020 36347 0.821 0.525 0.300
## 2 LPN 2021 32622 0.821 0.561 0.344
## 3 RN 2020 219453 0.758 0.515 0.295
## 4 RN 2021 220993 0.763 0.543 0.333
## # … with 3 more variables: count_over120days <dbl>, count_over180days <dbl>,
## # count_over1year <dbl>
Count up totals by RN/LPN and application type (includes reinstatement/renewal/retest)
Adding Florida shifts these down, particularly LPNs by endorsement:
compare_count2 <- natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021", "Pending")) %>%
#filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
group_by (lic_type, standard_app_type, year) %>%
summarize (count = sum(count),
across(c(count_over30days, count_over60days, count_over90days, count_over120days, count_over180days, count_over1year), ~(sum(.x))))
compare_pct2 <- natl_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year %in% c("2020", "2021", "Pending")) %>%
#filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
group_by (lic_type, standard_app_type, year) %>%
summarize (count = sum(count),
across(c(count_over30days, count_over60days, count_over90days, count_over120days, count_over180days, count_over1year), ~(sum(.x))/sum(count)))
rbind (compare_count2, compare_pct2) %>%
filter (year %in% c("2021", "Pending") & standard_app_type %in% c("Exam", "Endorsement")) %>%
print(n=1e3)
## # A tibble: 16 × 10
## # Groups: lic_type, standard_app_type [4]
## lic_type standard_app_type year count count_over30days count_over60days
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 LPN Endorsement 2021 7719 4535 2986
## 2 LPN Endorsement Pending 2973 2733 2454
## 3 LPN Exam 2021 23359 20968 14340
## 4 LPN Exam Pending 8450 7665 6681
## 5 RN Endorsement 2021 98422 59774 38270
## 6 RN Endorsement Pending 29552 27063 24104
## 7 RN Exam 2021 106564 99971 75466
## 8 RN Exam Pending 26661 22816 19380
## 9 LPN Endorsement 2021 7719 0.588 0.387
## 10 LPN Endorsement Pending 2973 0.919 0.825
## 11 LPN Exam 2021 23359 0.898 0.614
## 12 LPN Exam Pending 8450 0.907 0.791
## 13 RN Endorsement 2021 98422 0.607 0.389
## 14 RN Endorsement Pending 29552 0.916 0.816
## 15 RN Exam 2021 106564 0.938 0.708
## 16 RN Exam Pending 26661 0.856 0.727
## # … with 4 more variables: count_over90days <dbl>, count_over120days <dbl>,
## # count_over180days <dbl>, count_over1year <dbl>
How many states can break out by application type (exam vs. endorsement)?
state_averages %>%
filter (lic_type %in% c("LPN", "RN")) %>%
filter (year == "2021") %>%
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
ungroup() %>%
distinct (data_state)
## # A tibble: 31 × 1
## data_state
## <chr>
## 1 AR
## 2 CA
## 3 CO
## 4 FL
## 5 HI
## 6 IA
## 7 IL
## 8 IN
## 9 KY
## 10 LA
## # … with 21 more rows
How many states provided usable pending data?
valid_data %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (year=="Pending") %>%
ungroup() %>%
distinct (data_state)
## # A tibble: 22 × 1
## data_state
## <chr>
## 1 CA
## 2 CO
## 3 CT
## 4 FL
## 5 HI
## 6 IA
## 7 IN
## 8 LA
## 9 MA
## 10 ME
## # … with 12 more rows
FACT CHECK: How many nurses moved to new states in 2021?
Article text: “At least 147,000 nurses re-applied in a new state last year, according to NPR’s analysis, which is a serious undercount since Georgia, Florida, New York and several other states did not provide records.”
This code looks for endorsement nurses licensed by these 30 states in 2021, as well as currently pending applications from the 20 states that provided pending records. It re-runs a bit of code so as to include those additional records after Sept. 23 because I’m not comparing state to state but the nation as a whole.
Adding Florida * Increases to 126,193 licensed by endorsement in 2021; * 46,868 still-pending (Jan 2021-Dec 2021) * At least 173,061 nurses applied by endorsement in 2021.
#rerun standardization code to include ALL licensed and pending if state included others after Sept. 23
endorsement21 <- valid_data %>%
mutate (standard_app_type = case_when(
str_detect(app_type, "Reinstatement") ~ "Reinstatement",
str_detect(app_type, "Renewal") ~ "Renewal",
str_detect(lic_type, "Provisional") ~ "Temporary",
str_detect(lic_type, "Temp") ~ "Temporary",
lic_type=="Volunteer RN" ~ "Temporary", #for 22 records in PA
str_detect(app_type, "Endorsement") ~ "Endorsement",
str_detect(app_type, "Exam-retest") ~ "Exam-retest",
str_detect(app_type, "Exam") ~ "Exam",
app_type %in% c("Foreign Applicant", "Inactive", "SSL to MSL", "International") ~ "Other",
TRUE ~ app_type)) %>%
mutate (year = if_else (str_detect(year, "^Pending"), "Pending", year)) %>%
filter (year %in% c("2021", "Pending") & standard_app_type %in% c("Exam", "Endorsement")) %>%
count (standard_app_type, year)
#How many total endorsement licenses were issued and applied for in 2021 from these records?
sum(endorsement21[endorsement21$standard_app_type=="Endorsement", "n"])
## [1] 173061
#146,886 nurses -- without Florida
#173,061 with Florida, added March 15
Pull out averages for member station reporting partners in Pennsylvania and Texas
pa_averages <- state_averages %>%
filter (data_state=="PA" & lic_type %in% c("RN", "LPN")) %>%
filter (year == "2021") %>%
select (1:7)
#write_csv (pa_averages, "pa_averages_220301.csv")
tx_averages <- state_averages %>%
filter (data_state=="TX") %>%
filter (year == "2021") #%>%
#select (1:3, 5, 6, over60days, pct_over60days, over90days, pct_over90days)
tx_data <- valid_data3 %>%
filter (data_state=="TX") %>%
filter (year == "2021")
#what were the slowest processing times in Texas in 2021?
tx_data %>%
group_by (standard_app_type) %>%
summarize (max (process_time2))
## # A tibble: 3 × 2
## standard_app_type `max(process_time2)`
## * <chr> <drtn>
## 1 Endorsement 456 days
## 2 Exam 460 days
## 3 Exam-retest 426 days
By license type and application type:
#How do states rank for LPNs by endorsement?
state_averages %>%
filter (standard_app_type=="Endorsement" & year=="2021" & lic_type=="LPN") %>%
arrange (median) %>%
print(n=1e3)
## # A tibble: 29 × 19
## # Groups: data_state, lic_type, standard_app_type [29]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 VT LPN Endorsement 2021 7 days 1.0 days 210
## 2 FL LPN Endorsement 2021 41 days 17.0 days 1680
## 3 CO LPN Endorsement 2021 45 days 19.0 days 149
## 4 SC LPN Endorsement 2021 36 days 21.0 days 214
## 5 MI LPN Endorsement 2021 51 days 24.0 days 273
## 6 NM LPN Endorsement 2021 59 days 24.0 days 52
## 7 NE LPN Endorsement 2021 45 days 26.5 days 38
## 8 KY LPN Endorsement 2021 78 days 27.0 days 196
## 9 ME LPN Endorsement 2021 50 days 28.5 days 68
## 10 MA LPN Endorsement 2021 61 days 31.0 days 253
## 11 IL LPN Endorsement 2021 69 days 33.0 days 267
## 12 MT LPN Endorsement 2021 57 days 34.0 days 56
## 13 NH LPN Endorsement 2021 52 days 35.0 days 74
## 14 OR LPN Endorsement 2021 59 days 35.0 days 285
## 15 AR LPN Endorsement 2021 65 days 35.5 days 92
## 16 OK LPN Endorsement 2021 75 days 44.5 days 70
## 17 IA LPN Endorsement 2021 68 days 45.0 days 63
## 18 HI LPN Endorsement 2021 46 days 46.0 days 51
## 19 IN LPN Endorsement 2021 85 days 48.5 days 214
## 20 LA LPN Endorsement 2021 90 days 51.0 days 85
## 21 NJ LPN Endorsement 2021 91 days 52.0 days 199
## 22 MS LPN Endorsement 2021 82 days 54.0 days 63
## 23 NC LPN Endorsement 2021 89 days 58.5 days 462
## 24 TN LPN Endorsement 2021 73 days 63.0 days 201
## 25 TX LPN Endorsement 2021 90 days 73.5 days 904
## 26 CA LPN Endorsement 2021 121 days 78.0 days 548
## 27 OH LPN Endorsement 2021 137 days 106.0 days 267
## 28 MN LPN Endorsement 2021 145 days 107.0 days 452
## 29 PA LPN Endorsement 2021 155 days 126.0 days 233
## # … with 12 more variables: count_over30days <int>, pct_over30days <dbl>,
## # count_over60days <int>, pct_over60days <dbl>, count_over90days <int>,
## # pct_over90days <dbl>, count_over120days <int>, pct_over120days <dbl>,
## # count_over180days <int>, pct_over180days <dbl>, count_over1year <int>,
## # pct_over1year <dbl>
#How do states rank for RNs by endorsement?
state_averages %>%
filter (standard_app_type=="Endorsement" & year=="2021" & lic_type=="RN") %>%
arrange (median) %>%
print(n=1e3)
## # A tibble: 30 × 19
## # Groups: data_state, lic_type, standard_app_type [30]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 VT RN Endorsement 2021 8 days 1 days 2375
## 2 FL RN Endorsement 2021 40 days 14 days 13541
## 3 CO RN Endorsement 2021 36 days 19 days 3153
## 4 NM RN Endorsement 2021 37 days 21 days 961
## 5 NE RN Endorsement 2021 42 days 23 days 599
## 6 SC RN Endorsement 2021 36 days 24 days 2180
## 7 KY RN Endorsement 2021 71 days 28 days 1552
## 8 MA RN Endorsement 2021 58 days 28 days 5220
## 9 MI RN Endorsement 2021 59 days 29 days 4317
## 10 IL RN Endorsement 2021 65 days 30 days 5103
## 11 ME RN Endorsement 2021 59 days 30 days 1049
## 12 OR RN Endorsement 2021 51 days 31 days 5583
## 13 MT RN Endorsement 2021 58 days 32 days 1063
## 14 WY RN Endorsement 2021 59 days 34 days 345
## 15 WV RN Endorsement 2021 55 days 35 days 939
## 16 NH RN Endorsement 2021 50 days 36 days 1145
## 17 TN RN Endorsement 2021 50 days 36 days 2948
## 18 OK RN Endorsement 2021 55 days 38 days 544
## 19 AR RN Endorsement 2021 67 days 39 days 739
## 20 HI RN Endorsement 2021 46 days 41 days 1450
## 21 IN RN Endorsement 2021 76 days 41 days 2307
## 22 IA RN Endorsement 2021 73 days 44 days 969
## 23 NJ RN Endorsement 2021 89 days 49 days 3777
## 24 MS RN Endorsement 2021 83 days 50 days 563
## 25 NC RN Endorsement 2021 79 days 56 days 4421
## 26 TX RN Endorsement 2021 84 days 70 days 7530
## 27 MN RN Endorsement 2021 116 days 83 days 6182
## 28 PA RN Endorsement 2021 122 days 90 days 3989
## 29 CA RN Endorsement 2021 129 days 103 days 10668
## 30 OH RN Endorsement 2021 138 days 119 days 3210
## # … with 12 more variables: count_over30days <int>, pct_over30days <dbl>,
## # count_over60days <int>, pct_over60days <dbl>, count_over90days <int>,
## # pct_over90days <dbl>, count_over120days <int>, pct_over120days <dbl>,
## # count_over180days <int>, pct_over180days <dbl>, count_over1year <int>,
## # pct_over1year <dbl>
#How do states rank for LPNs by Exam?
state_averages %>%
filter (standard_app_type=="Exam" & year=="2021" & lic_type=="LPN") %>%
arrange (median) %>%
print(n=1e3)
## # A tibble: 29 × 19
## # Groups: data_state, lic_type, standard_app_type [29]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 VT LPN Exam 2021 44 days 36.5 days 140
## 2 ME LPN Exam 2021 61 days 37.0 days 33
## 3 CO LPN Exam 2021 62 days 42.0 days 266
## 4 SC LPN Exam 2021 69 days 44.0 days 403
## 5 MA LPN Exam 2021 74 days 47.0 days 469
## 6 NM LPN Exam 2021 54 days 49.0 days 85
## 7 MI LPN Exam 2021 81 days 50.0 days 785
## 8 OR LPN Exam 2021 90 days 53.0 days 227
## 9 IA LPN Exam 2021 71 days 56.0 days 576
## 10 FL LPN Exam 2021 81 days 57.0 days 1737
## 11 NH LPN Exam 2021 73 days 58.0 days 71
## 12 NC LPN Exam 2021 80 days 59.0 days 711
## 13 NE LPN Exam 2021 67 days 59.5 days 318
## 14 OH LPN Exam 2021 79 days 61.0 days 2356
## 15 IL LPN Exam 2021 99 days 63.0 days 924
## 16 KY LPN Exam 2021 89 days 63.0 days 533
## 17 TN LPN Exam 2021 106 days 73.0 days 729
## 18 MN LPN Exam 2021 104 days 74.0 days 1492
## 19 LA LPN Exam 2021 99 days 75.0 days 743
## 20 MS LPN Exam 2021 86 days 75.0 days 537
## 21 AR LPN Exam 2021 86 days 76.0 days 662
## 22 IN LPN Exam 2021 95 days 77.0 days 417
## 23 NJ LPN Exam 2021 136 days 78.0 days 803
## 24 MT LPN Exam 2021 90 days 78.5 days 34
## 25 OK LPN Exam 2021 95 days 80.0 days 635
## 26 HI LPN Exam 2021 79 days 82.5 days 8
## 27 CA LPN Exam 2021 115 days 83.0 days 3588
## 28 TX LPN Exam 2021 105 days 96.0 days 2609
## 29 PA LPN Exam 2021 151 days 111.0 days 1468
## # … with 12 more variables: count_over30days <int>, pct_over30days <dbl>,
## # count_over60days <int>, pct_over60days <dbl>, count_over90days <int>,
## # pct_over90days <dbl>, count_over120days <int>, pct_over120days <dbl>,
## # count_over180days <int>, pct_over180days <dbl>, count_over1year <int>,
## # pct_over1year <dbl>
#How do states rank for RNs by Exam?
state_averages %>%
filter (standard_app_type=="Exam" & year=="2021" & lic_type=="RN") %>%
arrange (median) %>%
print(n=1e3)
## # A tibble: 30 × 19
## # Groups: data_state, lic_type, standard_app_type [30]
## data_state lic_type standard_app_type year mean median count
## <chr> <chr> <chr> <chr> <drtn> <drtn> <int>
## 1 CO RN Exam 2021 75 days 42 days 2335
## 2 ME RN Exam 2021 60 days 48 days 838
## 3 NM RN Exam 2021 53 days 48 days 615
## 4 VT RN Exam 2021 64 days 50 days 337
## 5 NH RN Exam 2021 57 days 52 days 610
## 6 MA RN Exam 2021 73 days 56 days 3478
## 7 WY RN Exam 2021 64 days 57 days 245
## 8 SC RN Exam 2021 69 days 60 days 2215
## 9 FL RN Exam 2021 82 days 63 days 10358
## 10 OH RN Exam 2021 70 days 65 days 9649
## 11 IA RN Exam 2021 81 days 66 days 1674
## 12 TN RN Exam 2021 78 days 70 days 3626
## 13 MI RN Exam 2021 92 days 73 days 4746
## 14 NE RN Exam 2021 84 days 76 days 1147
## 15 OK RN Exam 2021 76 days 77 days 1614
## 16 NC RN Exam 2021 86 days 78 days 4881
## 17 KY RN Exam 2021 96 days 80 days 2507
## 18 NJ RN Exam 2021 99 days 80 days 3676
## 19 OR RN Exam 2021 97 days 82 days 1470
## 20 MN RN Exam 2021 107 days 83 days 7058
## 21 AR RN Exam 2021 87 days 84 days 1412
## 22 HI RN Exam 2021 88 days 84 days 237
## 23 WV RN Exam 2021 92 days 86 days 1023
## 24 IL RN Exam 2021 142 days 88 days 6825
## 25 MS RN Exam 2021 94 days 92 days 1784
## 26 IN RN Exam 2021 100 days 95 days 3482
## 27 CA RN Exam 2021 115 days 96 days 10175
## 28 MT RN Exam 2021 125 days 97 days 694
## 29 PA RN Exam 2021 117 days 97 days 6254
## 30 TX RN Exam 2021 119 days 118 days 11599
## # … with 12 more variables: count_over30days <int>, pct_over30days <dbl>,
## # count_over60days <int>, pct_over60days <dbl>, count_over90days <int>,
## # pct_over90days <dbl>, count_over120days <int>, pct_over120days <dbl>,
## # count_over180days <int>, pct_over180days <dbl>, count_over1year <int>,
## # pct_over1year <dbl>
Cannot can use change from 2020-2021 for some states where I didn’t get 2019-2021, like Texas, Maine, etc.
Some large decreases, too
rn_exam <- state_averages %>%
filter (lic_type=="RN" & standard_app_type=="Exam" & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020) %>%
arrange (desc(change_med))
rn_end <- state_averages %>%
filter (lic_type=="RN" & standard_app_type=="Endorsement" & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020) %>%
arrange (desc(change_med))
lpn_exam <- state_averages %>%
filter (lic_type=="LPN" & standard_app_type=="Exam" & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020) %>%
arrange (desc(change_med))
lpn_end <- state_averages %>%
filter (lic_type=="LPN" & standard_app_type=="Endorsement" & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020) %>%
arrange (desc(change_med))
exam_end <- state_averages %>%
filter (lic_type %in% c("LPN", "RN") & standard_app_type %in% c("Exam", "Endorsement", "Unknown") & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count, pct_over30days, pct_over60days, pct_over90days, pct_over120days, pct_over180days, pct_over1year)) %>%
arrange (standard_app_type) %>%
#mutate (change_med = median_2021 - median_2020) %>%
filter (count_2021>1) %>%#filter out colorado/michigan unknowns (1 entry)
ungroup() %>%
arrange (data_state) %>%
mutate (data_state = if_else (data_state %in% c("CT", "VA"), paste0(data_state, "*"), data_state))
#https://stackoverflow.com/questions/65849383/finding-the-differences-of-paired-columns-using-dplyr
#Calculate change from 2020 to 2021
exam_end_change <- exam_end %>%
mutate(across(ends_with("_2021"), .names = "{col}_diff") - across(ends_with("_2020"))) %>%
rename_with(~ sub("_\\d+", "", .), ends_with("_diff")) %>%
#reorder using colnames (change[ , order(names(change))]) to print names
select (data_state, lic_type, standard_app_type, count_2020, count_2021, mean_2020, mean_2021, mean_diff, median_2020, median_2021, median_diff, pct_over30days_2020, pct_over30days_2021, pct_over30days_diff, pct_over60days_2020, pct_over60days_2021, pct_over60days_diff, pct_over90days_2020, pct_over90days_2021, pct_over90days_diff,pct_over120days_2020, pct_over120days_2021, pct_over120days_diff, pct_over180days_2020, pct_over180days_2021, pct_over180days_diff, pct_over1year_2020, pct_over1year_2021 , pct_over1year_diff)
#write_csv (exam_end_change, "State-summary-lic-type-app-type_9232021cutoff.csv")
rns <- state_averages %>%
filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement") & year %in% c("2020", "2021")) %>%
pivot_wider (id_cols = c(data_state, lic_type, standard_app_type),
names_from = year,
values_from = c(mean, median, count)) %>%
arrange (standard_app_type) %>%
mutate (change_med = median_2021 - median_2020)
rn_exam$data_state <- fct_reorder(rn_exam$data_state, (rn_exam$median_2021))
Breaking down license type and application type for all 32 states in one graph.
exam_end %>%
mutate (type = paste0(standard_app_type, "-", lic_type)) %>%
mutate (type = case_when ( #USE TO INCLUDE IN GRAPH BELOW
type=="Unknown-RN" ~ "Exam-RN",
type=="Unknown-LPN" ~ "Exam-LPN",
TRUE ~ type)) %>%
ggplot(aes(x = type, y=median_2021, fill=type)) +
geom_bar(stat='identity', position="dodge") +
coord_flip() +
facet_wrap (~ data_state, ncol= 4, strip.position = "right") +
labs(y="Median count of days",
#x="State",
title="Nurse licenses issued in 2021",
subtitle="Median days from application submission to license issuance",
caption="Source: State boards of nursing\n*CT & VA didn't separate records by application type.",
fill = "Type") +
theme(axis.title.y=element_blank()) +
scale_y_continuous(breaks=c(0,30, 60, 90, 120)) +
theme(axis.text.x = element_text(angle=60, hjust=1))
Breaking down license type and application type for all 32 states in one graph.
exam_end %>%
mutate (type = paste0(standard_app_type, "-", lic_type)) %>%
mutate (type = case_when ( #USE TO INCLUDE IN GRAPH BELOW
type=="Unknown-RN" ~ "Exam-RN",
type=="Unknown-LPN" ~ "Exam-LPN",
TRUE ~ type)) %>%
ggplot(aes(x = type, y=mean_2021, fill=type)) +
geom_bar(stat='identity', position="dodge") +
coord_flip() +
facet_wrap (~ data_state, ncol= 4, strip.position = "right") +
labs(y="Average count of days",
#x="State",
title="Nurse licenses issued in 2021",
subtitle="Average days from application submission to license issuance",
caption="Source: State boards of nursing\n*CT & VA didn't separate records by application type.",
fill = "Type") +
theme(axis.title.y=element_blank()) +
scale_y_continuous(breaks=c(0,30, 60, 90, 120, 150)) +
theme(axis.text.x = element_text(angle=60, hjust=1))
Uses exam_end data frame just above (before NPR graphics version below)
lic_types <- c("RN", "LPN")
app_types <- c("Exam", "Endorsement")
methods <- c("median")
for (license in lic_types){
for (app in app_types){
for (method in methods){
graph <- exam_end %>%
filter (lic_type==license & standard_app_type==app) %>%
mutate (data_state = forcats::fct_reorder(data_state, get(paste0(method, "_2021")))) %>%
ggplot(aes(x = data_state, y=get(paste0(method, "_2021")), fill=get(paste0(method, "_2021")))) +
geom_bar(stat='identity', position="dodge") +
geom_text(aes(label = count_2021), position = position_stack(vjust=0.9), size=2.5) + #position_dodge(0.9)
coord_flip() +
#facet_wrap (~ data_state, ncol= 4, strip.position = "right") +
labs(y=paste(str_to_title(method), "count of days"),
#x="State",
title=paste("New", license, "licenses issued by", app, "in 2021"),
subtitle=paste(str_to_title(method), "days from application submission to license issuance"),
caption="Source: State boards of nursing\n*CT & VA didn't separate records by application type.\nNumber within bar represents count of licenses issued.",
fill = "Type") +
theme(axis.title.y=element_blank()) +
scale_y_continuous(breaks=c(0,30, 60, 90, 120, 150)) +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
scale_fill_gradient2(low=scales::muted("darkgreen"), mid="yellow", high = "red", na.value = NA,
#Set midpoint of color gradiant as national median for this type
midpoint=natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()) +
theme(legend.position="none") +
geom_hline(yintercept=(natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()), linetype="dashed") +
annotate("text", x=10, y=(natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()),
label=paste("National", method, "=", (natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()), "days"), size=3, angle=-90, vjust = -0.5)
print(graph)
#Save to file with lines below
# ggsave(plot = graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/_", license, "_", app, "_", method, ".png"),
#width = 6,
#height = 3
# )
}
}
}
Uses exam_end data frame just above (before NPR graphics version below). We did not use means because they’re sensitive to outliers, and these records have quite a few outliers that took hundreds of days to process.
lic_types <- c("RN", "LPN")
app_types <- c("Exam", "Endorsement")
methods <- c("mean")
for (license in lic_types){
for (app in app_types){
for (method in methods){
graph <- exam_end %>%
filter (lic_type==license & standard_app_type==app) %>%
mutate (data_state = forcats::fct_reorder(data_state, get(paste0(method, "_2021")))) %>%
ggplot(aes(x = data_state, y=get(paste0(method, "_2021")), fill=get(paste0(method, "_2021")))) +
geom_bar(stat='identity', position="dodge") +
geom_text(aes(label = count_2021), position = position_stack(vjust=0.9), size=2.5) + #position_dodge(0.9)
coord_flip() +
#facet_wrap (~ data_state, ncol= 4, strip.position = "right") +
labs(y=paste(str_to_title(method), "count of days"),
#x="State",
title=paste("New", license, "licenses issued by", app, "in 2021"),
subtitle=paste(str_to_title(method), "days from application submission to license issuance"),
caption="Source: State boards of nursing\n*CT & VA didn't separate records by application type.\nNumber within bar represents count of licenses issued.",
fill = "Type") +
theme(axis.title.y=element_blank()) +
scale_y_continuous(breaks=c(0,30, 60, 90, 120, 150)) +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
scale_fill_gradient2(low=scales::muted("darkgreen"), mid="yellow", high = "red", na.value = NA,
#Set midpoint of color gradiant as national median for this type
midpoint=natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()) +
theme(legend.position="none") +
geom_hline(yintercept=(natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()), linetype="dashed") +
annotate("text", x=10, y=(natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()),
label=paste("National", method, "=", (natl_averages %>% filter (lic_type==license & standard_app_type==app & year=="2021") %>% pull(method) %>% as.numeric()), "days"), size=3, angle=-90, vjust = -0.5)
print(graph)
#Save to file with lines below
# ggsave(plot = graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/_", license, "_", app, "_", method, ".png"),
#width = 6,
#height = 3
# )
}
}
}
Reformatting data for NPR graphics team
exam_end2 <- exam_end %>%
select (-c(count_2020, count_2021)) %>%
pivot_longer (cols = c("mean_2020", "mean_2021", "median_2020", "median_2021"), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "value")
exam_end3 <- exam_end2 %>%
pivot_wider (id_cols = c("type", "lic_type", "standard_app_type"),
names_from = "data_state",
values_from = "value")
#Keep only RN/LPN + Exam/Endorsement for 2021
exam_end <- state_averages %>%
filter (lic_type %in% c("LPN", "RN") & standard_app_type %in% c("Exam", "Endorsement") & year %in% c("2021")) %>%
select (-year)
#Pivot long
exam_end2 <- exam_end %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "value")
#Pivot wider
exam_end3 <- exam_end2 %>%
pivot_wider (id_cols = c("type", "lic_type", "standard_app_type"),
names_from = "data_state",
values_from = "value")
#Analyzing by license type only
exam_end_lic_only <- state_avg_lic_type_only %>%
filter (lic_type %in% c("LPN", "RN") & year %in% c("2021")) %>%
select (-year) %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "value") %>%
pivot_wider (id_cols = c("type", "lic_type"),
names_from = "data_state",
values_from = "value") %>%
mutate (standard_app_type = "All new licenses")
#Pull out national mean, median, min, max to add to Connie Jin's spreadsheet
natl_averages3 <- natl_averages %>%
filter (year=="2021" & standard_app_type %in% c("Exam", "Endorsement")) %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "natl_look") %>%
select (-year)
min <- state_averages %>%
filter (year=="2021" & standard_app_type %in% c("Exam", "Endorsement")) %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type, standard_app_type) %>%
arrange (lic_type, standard_app_type) %>%
summarize (across(3:17, ~min(.x))) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "state_min")
max <- state_averages %>%
filter (year=="2021" & standard_app_type %in% c("Exam", "Endorsement")) %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type, standard_app_type) %>%
arrange (lic_type, standard_app_type) %>%
summarize (across(3:17, ~max(.x))) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "state_max")
exam_end4 <- exam_end3 %>%
full_join (natl_averages3, by = c("lic_type", "standard_app_type", "type")) %>%
full_join (min, by = c("lic_type", "standard_app_type", "type")) %>%
full_join (max, by = c("lic_type", "standard_app_type", "type")) %>%
select (1:3, natl_look, state_min, state_max, 4:34)
Calculate by license type only, combining exam, endorsement, other, unknown app types THIS ALLOWS FOR CT & VA TO BE ADDED IN SINCE THEY DIDN’T SEPARATE BY APPLICATION TYPE
min_lic_type <- state_avg_lic_type_only %>%
filter (year=="2021") %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type) %>%
summarize (across(3:17, ~min(.x))) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "state_min")
max_lic_type <- state_avg_lic_type_only %>%
filter (year=="2021") %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type) %>%
summarize (across(3:17, ~max(.x))) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "state_max")
natl_averages4 <- natl_avg_lic_type %>%
filter (year=="2021" & lic_type %in% c("LPN", "RN")) %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
pivot_longer (cols = c("mean", "median", "count",
#"count_over30days", "pct_over30days",
#"count_over60days", "pct_over60days", "count_over90days", "pct_over90days",
#"count_over120days", "pct_over120days", "count_over180days", "pct_over180days",
#"count_over1year", "pct_over1year"
), #, "count_2020", "count_2021", "change_med"
names_to = "type",
values_to = "natl_look") #%>%
#select (-year)
exam_end_lic_only2 <- exam_end_lic_only %>%
full_join (natl_averages4, by = c("lic_type", "type")) %>%
full_join (min_lic_type, by = c("lic_type", "type")) %>%
full_join (max_lic_type, by = c("lic_type", "type")) %>%
mutate (standard_app_type = "All new licenses") %>%
select (1:2, standard_app_type, natl_look, state_min, state_max, 3:35)
Which states have other/unknown application types, meaning “All new licenses” will be more than sum of Exam+Endorsement?
state_averages %>%
filter (year=="2021") %>%
filter (standard_app_type %in% c("Other", "Unknown")) %>%
ungroup() %>%
distinct (data_state)
## # A tibble: 10 × 1
## data_state
## <chr>
## 1 AR
## 2 CO
## 3 CT
## 4 FL
## 5 NH
## 6 NJ
## 7 NM
## 8 OK
## 9 VA
## 10 WV
Double check counts match for exam/endorsement split Diff column should be 0 for all of these
avg_counts <- state_averages %>%
ungroup() %>%
filter (year=="2021") %>%
group_by (lic_type, standard_app_type) %>%
summarize (count = sum(count))
#Was there an error summarizing?
valid_data3 %>%
filter (year=="2021") %>%
group_by (lic_type, standard_app_type) %>%
summarize (raw_count = n()) %>%
full_join (avg_counts, by = c("lic_type", "standard_app_type")) %>%
mutate (diff = count - raw_count) %>%
arrange (desc(diff))
## # A tibble: 23 × 5
## # Groups: lic_type [11]
## lic_type standard_app_type raw_count count diff
## <chr> <chr> <int> <int> <int>
## 1 LPN Endorsement 7719 7719 0
## 2 LPN Exam 23359 23359 0
## 3 LPN Exam-retest 1569 1569 0
## 4 LPN Other 121 121 0
## 5 LPN Reinstatement 1373 1373 0
## 6 LPN Renewal 27157 27157 0
## 7 LPN Unknown 1423 1423 0
## 8 LPN-Provisional Temporary 1617 1617 0
## 9 LPN-Refresher Temporary Temporary 2 2 0
## 10 LPN-Temp Temporary 2137 2137 0
## # … with 13 more rows
Combined RNs+LPNs and combine application type, keeping only exam, endorsement, other, unknown app types THIS ALLOWS FOR CT & VA TO BE ADDED IN
#Analyzing by license type only
exam_end_combined <- state_averages_combined %>%
filter (year %in% c("2021")) %>%
select (-year) %>%
mutate (mean = as.integer(mean),
median= as.integer(median))
min_lic_type2 <- state_averages_combined %>%
filter (year=="2021") %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type) %>%
summarize (across(3:18, ~min(.x))) %>%
add_column(data_state="state_min") %>%
select (-year)
max_lic_type2 <- state_averages_combined %>%
filter (year=="2021") %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
group_by (lic_type) %>%
summarize (across(3:18, ~max(.x))) %>%
add_column (data_state="state_max") %>%
select (-year)
natl_averages5 <- natl_averages_combined %>%
filter (year=="2021") %>%
mutate (mean = as.integer(mean),
median= as.integer(median)) %>%
add_column(data_state="natl_look") %>%
select (-year)
exam_end_combined2 <- exam_end_combined %>%
rbind (natl_averages5, min_lic_type2, max_lic_type2) %>%
arrange (data_state) %>%
mutate (standard_app_type = "All new licenses (Endorsement, exam, other, unknown)",
lic_type = "All (LPN+RN)") %>%
select (1:6) %>%
pivot_longer (cols = c("mean", "median", "count"),
names_to = "type",
values_to = "value") %>%
pivot_wider (id_cols = c("type","lic_type", "standard_app_type"),
names_from = "data_state",
values_from = "value") %>%
select (1:3, natl_look, state_min, state_max, 4:19, 21:30, 33:39)
for_connie <- rbind (exam_end_combined2, exam_end_lic_only2, exam_end4) %>%
arrange (lic_type, standard_app_type) %>%
rename (app_type = standard_app_type)
write_csv (for_connie, "../output/2021_state_summary.csv")
#Convert back to tidy version
for_connie2 <- for_connie %>%
pivot_longer (cols = 4:38,
names_to = "state",
values_to = "value") %>%
pivot_wider (id_cols = c("lic_type", "app_type", "state"),
values_from = "value",
names_from = "type")
write_csv (for_connie2, "../output/2021_state_summary_tidy.csv")
Create histograms of each state’s number of weeks to license by license type and application type. Optionally, save graphs of each.
This code converts processing time to weeks, rounding up for any portion of a week.
weeks_to_license <- valid_data3 %>%
# group_by (year, data_state, lic_type, standard_app_type) %>%
# mutate (median21 = median (process_time2)) %>%
mutate (#week_count = round(as.numeric(process_time2/7)), #will round down if below 0.5
week_count = ceiling(as.numeric(process_time2/7))) %>% #always round up
# rename (week_count = process_time2) %>%
filter (year=="2021") %>%
count (data_state, lic_type, standard_app_type, week_count) #, median21
# rename (licensee_count = n)
#Check that week_count captures weeks accurately
valid_data3 %>%
mutate (week_count = ceiling(as.numeric(process_time2/7))) %>%
group_by (week_count) %>%
summarize (min (process_time2),
max(process_time2))
## # A tibble: 419 × 3
## week_count `min(process_time2)` `max(process_time2)`
## * <dbl> <drtn> <drtn>
## 1 0 0 days 0 days
## 2 1 1 days 7 days
## 3 2 8 days 14 days
## 4 3 15 days 21 days
## 5 4 22 days 28 days
## 6 5 29 days 35 days
## 7 6 36 days 42 days
## 8 7 43 days 49 days
## 9 8 50 days 56 days
## 10 9 57 days 63 days
## # … with 409 more rows
Convert data for NPR news apps team
valid_data4 <- valid_data3 %>%
filter (year == "2021") %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
summarize (median21 = median (process_time2),
count = n()) %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
mutate (pct = count / sum(count) * 100,
#med_weeks = ceiling (as.numeric(median21/7))
med_weeks = (as.numeric(median21/7)))
#EXPORT FOR NPR graphics team ========================= |
connie_histographs <- weeks_to_license %>%
rename (licensee_count = n) %>%
pivot_wider (id_cols = c(lic_type, standard_app_type, week_count),
names_from = data_state,
values_from = licensee_count)
#write_csv (connie_histographs, "Nurse licenses-weeks to licensure.csv")
#write_csv (weeks_to_license, "Nurse licenses-weeks to licensure-tidy.csv")
#write_csv (valid_data4, "Nurse licenses-weeks to licensure-state medians.csv")
#Separating for Connie into CA, PA, TX
pa_weeks_to_license <- weeks_to_license %>%
rename (licensee_count = n) %>%
filter (data_state == "PA" & standard_app_type %in% c("Exam", "Endorsement")) #%>%
# pivot_wider (id_cols = week_count,
# names_from = c(lic_type, standard_app_type),
# values_from = licensee_count) %>%
# arrange (week_count) %>%
# mutate (across(2:5, ~replace_na(.x, 0)))
#write_csv (tx, "Nurse licenses-weeks to licensure-TX.csv")
pa_weeks_to_license2 <- valid_data3 %>% #FOR JAN-SEPT
ungroup() %>%
filter (data_state=="PA" & year == "2021") %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
summarize (mean21 = mean (process_time2),
median21 = median (process_time2),
count = n()) %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
mutate (med_weeks = ceiling(as.numeric(median21/7)),#ceiling
mean_weeks = ceiling(as.numeric(mean21/7))) %>% #ceiling
filter (lic_type %in% c("RN", "LPN"))
#write_csv (pa_weeks_to_license2, "pa_averages_220307.csv")
Double-check stacked chart NPR news apps team created.
pa_graph <- pa_weeks_to_license %>%
#filter (data_state==state) %>%
#filter (lic_type %in% c("RN")) %>% #"LPN",
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
arrange (standard_app_type, week_count) %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = licensee_count, group=lic_type, fill=standard_app_type)) +
geom_bar(stat='identity', position='stack') +
facet_wrap (~ lic_type, ncol=1, scales='free',
labeller = label_wrap_gen(multi_line=FALSE)) +
# scale_x_continuous(breaks=c(0,30, 60, 90, 120)) +
scale_x_continuous(
breaks=seq(0, 52, by = 4), #(365 / 7 / 12))
# labels=as.character(seq(0, 12, by = 1)),
limits=c(-0.5, 52)) +
#Add padding so they appear equal width
scale_y_continuous(breaks=seq(0,850, by = 200),#900 Jan-sept #1500 fullyear
limits=c(0,850)
#labels = function(x) stringr::str_pad(x, width = 4, pad = "0")
) +
#Add median lines
geom_vline(data = (pa_weeks_to_license2 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = med_weeks)) +
# geom_vline(data = (ca_weeks_to_license5 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = mean_weeks), color="steelblue", linetype="twodash") +
# geom_vline (xintercept = 90, linetype = "dashed", colour = "black") +
labs(x="Weeks from application to licensure",
y="Count of licensees",
title=paste("New registered nurse licenses issued in PA in 2021"),
fill = "Application Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: Pennsylvania State Board of Nursing\nVertical lines represent that group's median processing time.") +#\nIncludes licenses issued Jan. 1 to Sept. 23, 2021.") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "right")
print(pa_graph)
NPR’s chart below.
How many licenses are cut off of chart?? Nearly 400 RNs
pa_weeks_to_license %>%
filter (week_count > 48) %>%
# group_by (lic_type, standard_app_type) %>%
group_by (lic_type) %>%
summarize (total = sum(licensee_count)) %>%
adorn_totals()
## lic_type total
## LPN 151
## RN 388
## Total 539
#Save states to list
states=(ls() %>% as.data.frame() %>% filter (str_detect(., "_state$")) %>% pull() %>% str_remove_all("_state") %>% str_to_upper())
#states <- str_to_upper(c("ar", "co", "hi", "ia", "il", "in", "ky", "ma", "me", "mi", "mn", "ms", "mt", "ne", "nh", "nj", "nm", "oh", "ok", "or", "pa", "sc", "tx", "vt", "LA", "WY", "WV", "CT", "VA", "NC", "CA", "TN"))
for (i in states){
state <- i
#CT/VA don't separate by exam/endorsement, so change here
if (state %in% c("CT", "VA")){
rn_med <- valid_data4 %>%
filter (data_state==state & lic_type=="RN" & standard_app_type=="Unknown") %>%
mutate (median21 = as.numeric(median21) / 7) %>%
pull (median21)
lpn_med <- valid_data4 %>%
filter (data_state==state & lic_type=="LPN" & standard_app_type=="Unknown") %>%
mutate (median21 = as.numeric(median21) / 7) %>%
pull (median21)
#Make RN graph
rn_graph <- weeks_to_license %>%
filter (data_state==state) %>%
filter (lic_type %in% c("RN")) %>% #"LPN",
#mutate (standard_app_type = "Exam & Endorsement") %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=lic_type)) +
geom_bar(stat='identity') +
facet_wrap (~ standard_app_type, ncol=1) +
scale_x_continuous(#breaks=seq(0, 53, by = 4.285714),
breaks=seq(0, 52, by = 4),
#labels=as.character(seq(0, 12, by = 1)),
limits=c(-1, 53)) +
#Add padding so they appear equal width
scale_y_continuous(labels = function(x) stringr::str_pad(x, width = 4, pad = "0")) +
#Add RN median
geom_vline(data = subset(weeks_to_license, standard_app_type == "Unknown"), aes(xintercept = rn_med)) +
labs(x="Weeks from application to licensure",
y="Count of applicants",
title=paste(state, "registered nurse licenses issued in 2021"),
fill = "Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: State board of nursing") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "none")
print(rn_graph)
#SAVE TO FILE WITH THIS CODE
#ggsave(plot = rn_graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/", state, "-RN.png"),
#width = 6,
#height = 3
# )
lpn_graph <- weeks_to_license %>%
filter (data_state==state) %>%
filter (lic_type %in% c("LPN")) %>% #"LPN",
#mutate (standard_app_type = "Exam & Endorsement") %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=lic_type)) +
geom_bar(stat='identity', color = "skyblue", fill ="skyblue") +
facet_wrap (~ standard_app_type, ncol=1) +
scale_x_continuous(#breaks=seq(0, 53, by = 4.285714),
breaks=seq(0, 52, by = 4),
#labels=as.character(seq(0, 12, by = 1)),
limits=c(-1, 53)) +
#Add padding so they appear equal width
scale_y_continuous(labels = function(x) stringr::str_pad(x, width = 4, pad = "0")) +
#ADD ALL MEDIANS TO ALL FACETS HERE
geom_vline(data = subset(weeks_to_license, standard_app_type == "Unknown"), aes(xintercept = lpn_med)) +
# geom_vline (xintercept = 90, linetype = "dashed", colour = "black") +
labs(x="Weeks from application to licensure",
y="Count of applicants",
title=paste(state, "practical nurse licenses issued in 2021"),
fill = "Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: State board of nursing") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "none")
print (lpn_graph)
#SAVE TO FILE WITH THIS CODE
#ggsave(plot = lpn_graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/", state, "-LPN.png")
# )
} else {
#Lousiana only has LPNs, so skip RN graph
if(state=="LA"){
}else{
rn_graph <- weeks_to_license %>%
filter (data_state==state) %>%
filter (lic_type %in% c("RN")) %>% #"LPN",
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=lic_type)) +
geom_bar(stat='identity') +
facet_wrap (~ standard_app_type, ncol=1) +
scale_x_continuous(#breaks=seq(0, 53, by = 4.285714),
breaks=seq(0, 52, by = 4),
#labels=as.character(seq(0, 12, by = 1)),
limits=c(-1, 53)) + #140 to reach max
#Add padding so they appear equal width
scale_y_continuous(labels = function(x) stringr::str_pad(x, width = 4, pad = "0"),
#limits=c(0, 800)
) +
#ADD ALL MEDIANS TO ALL FACETS HERE
geom_vline(data = (valid_data4 %>% filter (data_state == state & lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = med_weeks)) +
labs(x="Weeks from application to licensure",
y="Count of applicants",
title=paste(state, "registered nurse licenses issued in 2021"),
fill = "Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: State board of nursing") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "none")
print(rn_graph)
#SAVE TO FILE WITH THIS CODE
#ggsave(plot = rn_graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/", state, "-RN.png"),
#width = 6,
#height = 3
# )
}
#WY/WV only have RNs, so skip RN graph
if (state %in% c("WY", "WV")){
}else{
lpn_graph <- weeks_to_license %>%
filter (data_state==state) %>%
filter (lic_type %in% c("LPN")) %>% #"LPN",
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=lic_type)) +
geom_bar(stat='identity', color = "skyblue", fill ="skyblue") +
facet_wrap (~ standard_app_type, ncol=1) +
scale_x_continuous(#breaks=seq(0, 53, by = 4.285714),
breaks=seq(0, 52, by = 4),
#labels=as.character(seq(0, 12, by = 1)),
limits=c(-1, 53)) +
#Add padding so they appear equal width
scale_y_continuous(labels = function(x) stringr::str_pad(x, width = 4, pad = "0")) +
#ADD ALL MEDIANS TO ALL FACETS HERE
geom_vline(data = (valid_data4 %>% filter (data_state == state & lic_type=="LPN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = med_weeks)) +
# geom_vline (xintercept = 90, linetype = "dashed", colour = "black") +
labs(x="Weeks from application to licensure",
y="Count of applicants",
title=paste(state, "practical nurse licenses issued in 2021"),
fill = "Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: State board of nursing") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "none")
print (lpn_graph)
#SAVE TO FILE WITH THIS CODE
#ggsave(plot = lpn_graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/", state, "-LPN.png"),
#width = 6,
#height = 3
# )
}
}
}
What percent of American nurses are represented by these 33 states’ data (including Florida)?
Approx 71% of nurse licenses nationally, using counts of active licenses as a proxy for new nurse applications. Downloaded list of active RN & LPN licenses by state from the National Council of State Boards of Nursing, as of Nov. 23, 2021 https://www.ncsbn.org/14283.htm https://www.ncsbn.org/Aggregate-AllActiveLicensesExcel.xls
nnd <- read_excel ("../state-data/NCSBN-Aggregate-AllActiveLicensesExcel-112321.xls", skip=3) %>%
select(1,4,7) %>%
magrittr::set_colnames(c("state_name", "licenses", "pct")) %>%
filter (!(state_name=="TOTALS"|is.na(state_name)))
fips <- fips_codes %>%
distinct (state, state_name) %>%
mutate (state_name = str_to_upper(state_name),
state = str_to_lower(state))
nnd2 <- nnd %>%
left_join (fips, by = "state_name") %>%
mutate (state = case_when (
state_name=="CALIFORNIA-RN" ~ "ca-rn",
state_name=="CALIFORNIA-VN" ~ "ca-pn",
state_name=="LOUISIANA-RN" ~ "la-rn",
state_name=="LOUISIANA-PN" ~ "la-pn",
state_name=="WEST VIRGINIA-RN" ~ "wv-rn",
state_name=="WEST VIRGINIA-PN" ~ "wv-pn",
TRUE ~ str_to_lower(state))) %>%
add_row (state_name="MICHIGAN", licenses=177633, state="mi") #Michigan not included in NCSBN dataset, so adding figure from 2020 report: https://www.michigan.gov/documents/mdhhs/MI_Nurse_Survey_Report_2020_final_705633_7.pdf
#Create list of _state dataframes found in R environment to mark as data received
save_these=(ls() %>%
as.data.frame() %>%
filter (str_detect(., "_state$")) %>%
magrittr::set_colnames("name") %>%
add_row(name="ca-rn") %>% #manually set those states with multiple boards
add_row(name="ca-pn") %>%
add_row(name="wv-rn") %>%
add_row(name="la-pn") %>%
pull() %>%
str_remove_all("_state"))
nnd3 <- nnd2 %>%
mutate (data = if_else (state %in% save_these, "received", "missing"), #, "fl",
pct = licenses / sum(licenses) * 100)
nnd3 %>%
group_by (data) %>%
summarize (count = sum(licenses),
pct = sum (pct))
## # A tibble: 2 × 3
## data count pct
## * <chr> <dbl> <dbl>
## 1 missing 1789907 28.6
## 2 received 4470549 71.4
Article text: “In Virginia, the Board of Nursing reports that it processes 99% of applications within 30 days. … If you start the clock at the point nurses submitted an application, NPR’s analysis of Virginia’s records shows just 1 in 4 licenses were issued within 30 calendar days, well below the 99% figure.”
“The reason for the discrepancy: The board doesn’t start its clock until an application is marked ‘complete,’ meaning all materials are in and ready for review.”
In va_state, docs_time records days from being marked complete.
This code confirms their 99% claim within 30 days of getting marked complete, but also shows only 24.2% of the applications were issued within 30 calendar days from their submission.
va_summary <- va_state %>%
filter (year=="2021") %>%
group_by (lic_type, year) %>%
summarize (count = n(),
count_within_30 = sum(process_time <= 30),
pct = count_within_30 / count,
docs_within_30 = sum(docs_time <= 30),
docs_pct = docs_within_30 / count)
#What percent of VA's 2021 applicants were licensed within 30 calendar days of their application?
(144 + 1856) / (1051 + 7050) #24.7%, with all data (through 09-30)
## [1] 0.2468831
( va_summary[va_summary$lic_type=="LPN", "count_within_30"] + va_summary[va_summary$lic_type=="RN", "count_within_30"] ) /
( va_summary[va_summary$lic_type=="LPN", "count"] + va_summary[va_summary$lic_type=="RN", "count"] )
## count_within_30
## 1 0.2468831
#Double check looking for process times less than or equal to 30 days
va_state %>%
filter (year=="2021" & process_time <= 30 & lic_type %in% c("RN", "LPN")) %>%
nrow()
## [1] 2000
#Triple checking another way, but this is standardized timeframe, so it will be slightly less
valid_data3 %>%
filter (data_state=="VA" & lic_type %in% c("RN", "LPN") & standard_app_type=="Unknown" & year=="2021") %>%
group_by (data_state, lic_type, standard_app_type, year) %>%
summarize (count = n(),
count_within_30 = sum(process_time2 <= 30),
pct = count_within_30 / count,
docs_within_30 = sum(docs_time <= 30),
docs_pct = docs_within_30 / count)
## # A tibble: 2 × 9
## # Groups: data_state, lic_type, standard_app_type [2]
## data_state lic_type standard_app_type year count count_within_30 pct
## <chr> <chr> <chr> <chr> <int> <int> <dbl>
## 1 VA LPN Unknown 2021 1024 139 0.136
## 2 VA RN Unknown 2021 6889 1778 0.258
## # … with 2 more variables: docs_within_30 <int>, docs_pct <dbl>
#What percent of VA's 2021 applicants were licensed within 30 calendar days of their application from Jan. 1-Sept. 23, 2021?
#24.2%
(139 + 1778) / (1024 + 6889)
## [1] 0.2422596
Article text: “California’s records show registered nurses moving in from other states (known as “endorsement” licenses) waited about two months if they submitted everything necessary on the first go. But only one-quarter of nurses did that. The remaining three-quarters – more than 12,300 nurses – were marked “deficient,” which doubled the processing time."
This code creates a histogram showing differences in counts and time to issuance for California’s deficient vs complete applications.
ca_weeks_to_license <- ca_state %>%
filter (process_time2 > -1) %>% #Keep only valid processing times
filter (!is.na(process_time2)) %>%
filter (year != "2022") %>%
filter (!(str_detect(year, "^Pending") & (application_date < as.Date("2020-07-01")))) #remove pending apps older than July 2020 (11789 RNs & 1994 LPNs)
ca_weeks_to_license2 <- ca_weeks_to_license %>%
mutate (standard_app_type = case_when(
str_detect(app_type, "Reinstatement") ~ "Reinstatement",
str_detect(app_type, "Renewal") ~ "Renewal",
str_detect(lic_type, "Provisional") ~ "Temporary",
str_detect(lic_type, "Temp") ~ "Temporary",
str_detect(app_type, "Endorsement") ~ "Endorsement",
str_detect(app_type, "Exam-retest") ~ "Exam-retest",
str_detect(app_type, "Exam") ~ "Exam",
app_type %in% c("Foreign Applicant", "Inactive", "SSL to MSL", "International") ~ "Other",
TRUE ~ app_type))
ca_weeks_to_license2 %>%
ungroup() %>%
count (year)
## # A tibble: 4 × 2
## year n
## * <chr> <int>
## 1 2019 26740
## 2 2020 33307
## 3 2021 41352
## 4 Pending as of 01/04/22 41876
#38288 pending apps filed in 2021
ca_weeks_to_license2 %>%
ungroup() %>%
filter (year=="Pending") %>%
filter (between(application_date, as.Date("2021-01-01"), as.Date("2021-12-31"))) #%>%
## # A tibble: 0 × 12
## # … with 12 variables: lic_type <chr>, app_type <chr>, first_name <chr>,
## # city <chr>, state <chr>, application_date <date>, issue_date <date>,
## # process_time2 <drtn>, year <chr>, deficient <chr>, data_state <chr>,
## # standard_app_type <chr>
#summarize (range(application_date))
Among all 2021’s applications: (not just those licensed)
Exams are better, probably because schools help with applications
#How many of 2021's 68,362 applications were deficient? >> 29136
ca_weeks_to_license2 %>%
ungroup() %>%
filter (between(application_date, as.Date("2021-01-01"), as.Date("2021-12-31"))) %>%
count (deficient) %>%
mutate (pct = n / sum(n) * 100) %>%
adorn_totals("row")
## deficient n pct
## complete 39226 57.37983
## deficient 29136 42.62017
## Total 68362 100.00000
#55,550 exam/endorsements >> 28754 were deficient
#(18720 RN endorsement + 1092 LPN endorsement + 5993 RN exam + 2949 LPN exam)
ca_weeks_to_license2 %>%
ungroup() %>%
# filter (year=="2021") %>%
# filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
filter (between(application_date, as.Date("2021-01-01"), as.Date("2021-12-31"))) %>%
count (lic_type, standard_app_type, deficient) %>%
group_by (lic_type, standard_app_type) %>%
mutate (pct = n / sum(n) * 100) %>%
adorn_totals("row")
## lic_type standard_app_type deficient n pct
## LPN Endorsement complete 506 31.66458
## LPN Endorsement deficient 1092 68.33542
## LPN Exam complete 4539 60.61699
## LPN Exam deficient 2949 39.38301
## LPN Exam-retest complete 654 63.12741
## LPN Exam-retest deficient 382 36.87259
## RN Endorsement complete 10831 36.65189
## RN Endorsement deficient 18720 63.34811
## RN Exam complete 10908 64.54056
## RN Exam deficient 5993 35.45944
## RN-Temp Temporary complete 11788 100.00000
## Total - - 68362 600.00000
Removes applications filed and licenses issued after Sept 23, 2021, when first states sent records to standardize with Pennsylvania histogram published on NPR.org.
This also removes all pending applications.
ca_weeks_to_license3 <- ca_weeks_to_license2 %>%
filter (application_date <= as.Date("2021-09-23")) %>%
filter (issue_date <= as.Date("2021-09-23"))
#Create dataframe for histogram
ca_weeks_to_license4 <- ca_weeks_to_license3 %>% #change to 3 if want to take only Jan-Sept 2021
ungroup() %>%
mutate (week_count = ceiling(as.numeric(process_time2/7))) %>% #always round up
filter (year=="2021") %>%
count (data_state, lic_type, standard_app_type, deficient, week_count) %>% #, median21
mutate (deficient = if_else (is.na(deficient), "complete", deficient))
#Calculate medians
#ca_weeks_to_license5 <- ca_weeks_to_license2 %>% #FULL YEAR
ca_weeks_to_license5 <- ca_weeks_to_license3 %>% #FOR JAN-SEPT
ungroup() %>%
filter (year == "2021") %>%
# mutate (deficient = if_else (is.na(deficient), "complete", deficient)) %>%
group_by (year, data_state, lic_type, standard_app_type, deficient) %>%
summarize (mean21 = mean (process_time2),
median21 = median (process_time2),
count = n()) %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
mutate (pct = count / sum(count) * 100,
med_weeks = ceiling(as.numeric(median21/7)),#ceiling
mean_weeks = ceiling(as.numeric(mean21/7)))#ceiling
#Create graphic (Jan. 1-Sept. 23)
rn_graph <- ca_weeks_to_license4 %>%
#filter (data_state==state) %>%
filter (lic_type %in% c("RN")) %>% #"LPN",
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
arrange (standard_app_type, deficient, week_count) %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=standard_app_type)) +
geom_bar(stat='identity') +
facet_wrap (deficient ~ standard_app_type, ncol=2, scales='free',
labeller = label_wrap_gen(multi_line=FALSE)) +
# scale_x_continuous(breaks=c(0,30, 60, 90, 120)) +
scale_x_continuous(breaks=seq(0, 53, by = (365 / 7 / 12)), #4.34523
labels=as.character(seq(0, 12, by = 1)),
limits=c(-0.01, 53)) +
#Add padding so they appear equal width
scale_y_continuous(breaks=seq(0, 900, by = 150),
limits=c(0,900)
#labels = function(x) stringr::str_pad(x, width = 4, pad = "0")
) +
#Add median lines
geom_vline(data = (ca_weeks_to_license5 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = med_weeks)) +
# geom_vline(data = (ca_weeks_to_license5 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = mean_weeks), color="steelblue", linetype="twodash") +
# geom_vline (xintercept = 90, linetype = "dashed", colour = "black") +
labs(x="Months from application to licensure",
y="Count of licensees",
title=paste("New registered nurse licenses issued in CA in 2021"),
fill = "Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: California Board of Registered Nursing\nVertical lines represent that group's median processing time.\nIncludes licenses issued Jan. 1 to Sept. 23, 2021.") +#\nIncludes licenses issued Jan. 1 to Sept. 23, 2021.") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "none")
print(rn_graph)
#ggsave(plot = rn_graph,
# file = paste0("/Users/austinfast/Documents/NPR/210908-nursing licenses/Graphs/CA-RN-fullyear-deficient-app-type.png"),
# width = 6,
# height = 4
# )
What percent of all licenses are deficient?
Find by license type AND application type for LICENSES ISSUED ONLY Jan-Dec 2021
#Calculate medians
ca_weeks_to_license_full_year <- ca_weeks_to_license2 %>% #FULL YEAR
ungroup() %>%
filter (year == "2021") %>%
# mutate (deficient = if_else (is.na(deficient), "complete", deficient)) %>%
group_by (year, data_state, lic_type, standard_app_type, deficient) %>%
summarize (mean21 = mean (process_time2),
median21 = median (process_time2),
count = n()) %>%
group_by (year, data_state, lic_type, standard_app_type) %>%
mutate (pct = count / sum(count) * 100,
med_weeks = ceiling(as.numeric(median21/7)),#ceiling
mean_weeks = ceiling(as.numeric(mean21/7)))#ceiling
#What percent from Jan-Sept. were deficient?
ca_weeks_to_license5 %>% #for Jan-Dec, replace with ca_weeks_to_license_full_year
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
group_by (deficient) %>%
summarize (count = sum(count)) %>%
ungroup() %>%
mutate (pct = count / sum(count))
## # A tibble: 2 × 3
## deficient count pct
## * <chr> <int> <dbl>
## 1 complete 10960 0.439
## 2 deficient 14019 0.561
#How many of each type were deficient?
ca_weeks_to_license_full_year %>%
ungroup() %>%
filter (lic_type %in% c("RN", "LPN")) %>%
filter (standard_app_type != "Exam-retest" & deficient=="deficient") %>%
select (lic_type, standard_app_type, median21, count, pct) %>%
print(n=1e3)
## # A tibble: 4 × 5
## lic_type standard_app_type median21 count pct
## <chr> <chr> <drtn> <int> <dbl>
## 1 LPN Endorsement 87 days 633 85.1
## 2 LPN Exam 109 days 1736 35.3
## 3 RN Endorsement 130 days 12328 76.2
## 4 RN Exam 118 days 4522 35.1
Export data for NPR’s News Apps team and double-check stacked chart NPR news apps team created.
ca_weeks_to_license5 <- ca_weeks_to_license5 %>%
filter (lic_type=="RN")
#FULL YEAR DATA: 12328 deficient endorsement licenses & 3853 complete endorsement
ca_weeks_to_license4 %>%
group_by (lic_type, standard_app_type, deficient) %>%
summarize (total = sum(n))
## # A tibble: 11 × 4
## # Groups: lic_type, standard_app_type [6]
## lic_type standard_app_type deficient total
## <chr> <chr> <chr> <int>
## 1 LPN Endorsement complete 76
## 2 LPN Endorsement deficient 472
## 3 LPN Exam complete 2285
## 4 LPN Exam deficient 1303
## 5 LPN Exam-retest complete 415
## 6 LPN Exam-retest deficient 310
## 7 RN Endorsement complete 2049
## 8 RN Endorsement deficient 8619
## 9 RN Exam complete 6550
## 10 RN Exam deficient 3625
## 11 RN-Temp Temporary complete 3797
#Create stacked chart like Connie's
rn_graph <- ca_weeks_to_license4 %>%
#filter (data_state==state) %>%
filter (lic_type %in% c("RN")) %>% #"LPN",
filter (standard_app_type %in% c("Exam", "Endorsement")) %>%
arrange (standard_app_type, deficient, week_count) %>%
# filter (standard_app_type == "Exam") %>%
# filter (!(standard_app_type %in% c("Reinstatement", "Renewal", "Exam-retest"))) %>%
ggplot(aes(x = week_count, y = n, group=lic_type, fill=standard_app_type)) +
geom_bar(stat='identity', position='stack') +
facet_wrap (~ deficient, ncol=1, scales='free',
labeller = label_wrap_gen(multi_line=FALSE)) +
# scale_x_continuous(breaks=c(0,30, 60, 90, 120)) +
scale_x_continuous(breaks=seq(0, 52, by = 4), #(365 / 7 / 12))
# labels=as.character(seq(0, 12, by = 1)),
limits=c(-0.5, 52)) +
#Add padding so they appear equal width
scale_y_continuous(breaks=seq(0,850, by = 200),#900 Jan-sept #1500 fullyear
limits=c(0,850)
#labels = function(x) stringr::str_pad(x, width = 4, pad = "0")
) +
#Add median lines
geom_vline(data = (ca_weeks_to_license5 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = med_weeks)) +
# geom_vline(data = (ca_weeks_to_license5 %>% filter (lic_type=="RN" & standard_app_type %in% c("Exam", "Endorsement"))), mapping = aes(xintercept = mean_weeks), color="steelblue", linetype="twodash") +
# geom_vline (xintercept = 90, linetype = "dashed", colour = "black") +
labs(x="Weeks from application to licensure",
y="Count of licensees",
title=paste("New registered nurse licenses issued in CA in 2021"),
fill = "Application Type",
#subtitle="Weeks from application submission to license issuance",
caption="Source: California Board of Registered Nursing\nVertical lines represent that group's median processing time.") +#\nIncludes licenses issued Jan. 1 to Sept. 23, 2021.") +
theme(axis.text.x = element_text(angle=60, hjust=1)) +
theme(legend.position = "right")
print(rn_graph)
NPR’s chart below
#Can check how many should be in each week
ca_weeks_to_license4 %>%
filter (lic_type=="RN" & standard_app_type=="Exam")
## # A tibble: 183 × 6
## data_state lic_type standard_app_type deficient week_count n
## <chr> <chr> <chr> <chr> <dbl> <int>
## 1 CA RN Exam complete 0 2
## 2 CA RN Exam complete 1 3
## 3 CA RN Exam complete 2 6
## 4 CA RN Exam complete 3 4
## 5 CA RN Exam complete 4 15
## 6 CA RN Exam complete 5 45
## 7 CA RN Exam complete 6 108
## 8 CA RN Exam complete 7 179
## 9 CA RN Exam complete 8 248
## 10 CA RN Exam complete 9 318
## # … with 173 more rows
How many licenses are cut off of right side of chart? 734 nurses
ca_weeks_to_license4 %>%
filter (week_count > 48 & lic_type == "RN") %>%
group_by (lic_type, standard_app_type) %>%
summarize (total = sum(n)) %>%
adorn_totals()
## lic_type standard_app_type total
## RN Endorsement 443
## RN Exam 291
## Total - 734