Source code for medicaid_utils.topics.obgyn.cohort_indicators

"""This module has functions used to flag cohort designations used in OB/GYN
studies"""
import os

import dask.dataframe as dd
import numpy as np
import pandas as pd

data_folder = os.path.join(os.path.dirname(__file__), "data")


[docs] def flag_religious_npis(df_claims: dd.DataFrame) -> dd.DataFrame: """ Adds columns denoting categorization of NPIs in claims. New columns: - catholic_npi: 0 or 1, 1 when claim contains an NPI that is a catholic hospital. - religious_npi: 0 or 1, 1 when claim contains an NPI that is a hospital with any religious affiliation. - secular_npi: : 0 or 1, 1 when claim contains an NPI that is a hospital with no reglious affiliation. - rural_npi: 0 or 1, when claim contains an NPI that is located in a rural location Parameters ---------- df_claims: dd.DataFrame Claims dataframe Returns ------- dd.DataFrame """ pdf_religious_aff = pd.read_excel( os.path.join(data_folder, "religious_provider_npis.xlsx"), dtype="object", engine="openpyxl", ) pdf_religious_aff = pdf_religious_aff.loc[ pdf_religious_aff["hosp_state"].str.strip() == "IL" ] pdf_religious_aff = pdf_religious_aff.assign( rel_aff=pd.to_numeric( pdf_religious_aff["rel_aff"], errors="coerce" ).astype(int), npi=pdf_religious_aff["npi"].astype(str).str.strip(), ) lst_cath_npi = pdf_religious_aff.loc[ pdf_religious_aff["rel_aff"] == 1 ].npi.tolist() lst_religious_npi = pdf_religious_aff.loc[ pdf_religious_aff["rel_aff"] == 2 ].npi.tolist() lst_nonreligious_npi = pdf_religious_aff.loc[ pdf_religious_aff["rel_aff"] == 3 ].npi.tolist() df_claims = df_claims.map_partitions( lambda pdf: pdf.assign( catholic_npi=pdf["NPI"].str.strip().isin(lst_cath_npi).astype(int), religious_npi=pdf["NPI"] .str.strip() .isin(lst_religious_npi) .astype(int), secular_npi=pdf["NPI"] .str.strip() .isin(lst_nonreligious_npi) .astype(int), rural_npi=np.select( [ pdf["NPI"] .str.strip() .isin( pdf_religious_aff.loc[ pdf_religious_aff["urbn_rrl"] .str.strip() .str.upper() == "R" ]["npi"].tolist() ), pdf["NPI"] .str.strip() .isin( pdf_religious_aff.loc[ pdf_religious_aff["urbn_rrl"] .str.strip() .str.upper() == "U" ]["npi"].tolist() ), ], [1, 0], default=-1, ), ) ) return df_claims
[docs] def flag_transfers(df_claims: dd.DataFrame) -> dd.DataFrame: """ Adds indicator columns denoting whether the claim has a discharge status indicating a transfer. Currently only supports MAX files. Parameters ---------- df_claims: dd.DataFrame IP or LT claim file Returns ------- dd.DataFrame """ return df_claims.map_partitions( lambda pdf: pdf.assign( transfer=pd.to_numeric(pdf["PATIENT_STATUS_CD"], errors="coerce") .isin([2, 3, 4, 5, 61, 65, 66, 71]) .astype(int) ) )