-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSM01 CIS-level extract.R
74 lines (70 loc) · 2.12 KB
/
SM01 CIS-level extract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# This is an option for extracting CIS level data directly
# We need the data to be sorted and to take the first and last for
# each CIS - this is what causes issues.
# Install packages (if required)
# install.packages("odbc")
# install.packages("dplyr")
# install.packages("dbplyr")
# install.packages("janitor")
# Load Libraries
library(odbc)
library(dplyr)
library(dbplyr)
library(janitor)
# Create a connection to SMRA
smra_conn <- dbConnect(
drv = odbc(),
dsn = "SMRA",
uid = Sys.getenv("USER"),
pwd = rstudioapi::askForPassword("SMRA Password:")
)
cis_data <- tbl(smra_conn, in_schema("ANALYSIS", "SMR01_PI")) |>
# Use a semi_join to select only episode which are part of
# a CIS which has a discharge date in Q1-2 2023 with an emergency admission
# This ensures we get all the episodes from any relevant CISs
semi_join(
# Do any required filtering here
# e.g. HB / HSCP diag codes etc.
tbl(smra_conn, "SMR01_PI") |>
filter(
between(DISCHARGE_DATE, "01-APR-2023", "30-SEP-2023"),
ADMISSION_TYPE %in% c(20:22, 30:39)
),
by = c("LINK_NO", "CIS_MARKER")
) |>
# Use window_order + mutate + distinct
# This replicates arrange + summarise (which won't work for first/last)
group_by(LINK_NO, CIS_MARKER) |>
# Window order must come immediately before mutate
window_order(
LINK_NO,
CIS_MARKER,
ADMISSION_DATE,
DISCHARGE_DATE,
ADMISSION,
DISCHARGE,
URI
) |>
mutate(
cis_adm = first(ADMISSION_DATE),
cis_dis = last(DISCHARGE_DATE),
cis_adm_type = first(ADMISSION_TYPE)
) |>
ungroup() |>
# Now all episodes should have the same data (because of the mutate)
# We can use distinct to reduce to one row
distinct(LINK_NO, CIS_MARKER, .keep_all = TRUE) |>
# Now we have the rows we want, select only needed variables
# You might also want to do additional filtering here:
# e.g. Only CIS stays which began with an emergency adm_type
select(
LINK_NO,
CIS_MARKER,
cis_adm,
cis_dis,
cis_adm_type
) |>
# Tidy the variable names (optional, and could have gone anywhere)
clean_names() |>
show_query() |>
collect()