Source code for medicaid_utils.adapted_algorithms.py_ip_pqi.data_generation

import os

import pandas as pd

package_folder, filename = os.path.split(__file__)
data_folder = os.path.join(package_folder, "data")


[docs] def generate_dxgrp_file() -> None: """ Generate the PQI diagnosis group reference file. Creates a CSV file (dxgrp.csv) containing ICD-9 diagnosis code groups used by the Prevention Quality Indicators algorithm. The file is saved to the package data directory. Examples -------- >>> generate_dxgrp_file() # doctest: +SKIP """ df_pqi_dxgroup = pd.DataFrame( columns=["primary_outcome", "lst_dx", "var_name", "comments"] ) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Diabetes with Short Term Complications", "lst_dx": ",".join( [ "25010", "25011", "25012", "25013", "25020", "25021", "25022", "25023", "25030", "25031", "25032", "25033", ] ), "var_name": "ACDIASD", "comments": ( "Diabetes short term complication rate (ACDIAS) - ACSC #1" ), }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Gastroenteritis", "lst_dx": ",".join( [ "00861", "00862", "00863", "00864", "00865", "00866", "00867", "00869", "0088 ", "0090 ", "0091 ", "0092 ", "0093 ", "5589 ", ] ), "var_name": "ACPGASD", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Perforated Appendix", "lst_dx": ",".join(["5400 ", "5401 "]), "var_name": "ACSAPPD", "comments": "Perforated Appendix rate (ACSAPP) - ACSC #2", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Appendicitis (Population at Risk)", "lst_dx": ",".join(["5400 ", "5401 ", "5409 ", "541 "]), "var_name": "ACSAP2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Diabetes with Long Term Complication", "lst_dx": ",".join( [ "25040", "25041", "25042", "25043", "25050", "25051", "25052", "25053", "25060", "25061", "25062", "25063", "25070", "25071", "25072", "25073", "25080", "25081", "25082", "25083", "25090", "25091", "25092", "25093", ] ), "var_name": "ACDIALD", "comments": ( "Diabetes long term complication rate (ACDIAL) - ACSC #3" ), }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Asthma", "lst_dx": ",".join( [ "49300", "49301", "49302", "49310", "49311", "49312", "49320", "49321", "49322", "49381", "49382", "49390", "49391", "49392", ] ), "var_name": "ACSASTD", "comments": "Asthma rate (ACSAST) - ACSC #15 - Adult ", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": ( "EXCLUDE: CYSTIC FIBROSIS AND ANOMALIES OF RESPIRATORY" " SYSTEM" ), "lst_dx": ",".join( [ "27700", "27701", "27702", "27703", "27709", "51661", "51662", "51663", "51664", "51669", "74721", "7483 ", "7484 ", "7485 ", "74860", "74861", "74869", "7488 ", "7489 ", "7503 ", "7593 ", "7707 ", ] ), "var_name": "RESPAN", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "COPD (#1)", "lst_dx": ",".join( [ "4910 ", "4911 ", "49120", "49121", "4918 ", "4919 ", "4920 ", "4928 ", "494 ", "4940 ", "4941 ", "496 ", ] ), "var_name": "ACCOPDD", "comments": "COPD rate (ACCOPD) - ACSC #5", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "COPD (#2)", "lst_dx": ",".join(["4660 ", "490 "]), "var_name": "ACCPD2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Hypertension", "lst_dx": ",".join( [ "4010 ", "4019 ", "40200", "40210", "40290", "40300", "40310", "40390", "40400", "40410", "40490", ] ), "var_name": "ACSHYPD", "comments": "Hypertension rate (ACSHYP) - ACSC #7", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": ( "Exclude: Stage I-IV Kidney primary_outcome" ), "lst_dx": ",".join( ["40300", "40310", "40390", "40400", "40410", "40490"] ), "var_name": "ACSHY2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "EXCLUDE: CHRONIC RENAL FAILURE", "lst_dx": ",".join( [ "40300", "40301", "40310", "40311", "40390", "40391", "40400", "40401", "40402", "40403", "40410", "40411", "40412", "40413", "40490", "40491", "40492", "40493", "585 ", "5855 ", "5856 ", ] ), "var_name": "CRENLFD", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Renal Failure", "lst_dx": ",".join( ["5845", "5846", "5847", "5848", "5849", "586 ", "9975"] ), "var_name": "PHYSIDB", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Congestive Heart Failure", "lst_dx": ",".join( [ "39891", "40201", "40211", "40291", "40401", "40403", "40411", "40413", "40491", "40493", "4280 ", "4281 ", "42820", "42821", "42822", "42823", "42830", "42831", "42832", "42833", "42840", "42841", "42842", "42843", "4289 ", ] ), "var_name": "ACSCHFD", "comments": "Congestive Heart Failure rate (ACSCHF) - ACSC #8", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Congestive Heart Failure", "lst_dx": ",".join( [ "39891", "4280 ", "4281 ", "42820", "42821", "42822", "42823", "42830", "42831", "42832", "42833", "42840", "42841", "42842", "42843", "4289 ", ] ), "var_name": "ACSCH2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Liveborn (Populaton at Risk)", "lst_dx": ",".join( [ "V3000", "V3001", "V3100", "V3101", "V3200", "V3201", "V3300", "V3301", "V3400", "V3401", "V3500", "V3501", "V3600", "V3601", "V3700", "V3701", "V3900", "V3901", ] ), "var_name": "LIVEBND", "comments": "Low Birth Weight rate (ACSLBW) - ACSC #9", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Liveborn (Populaton at Risk)", "lst_dx": ",".join( ["V290", "V291", "V292", "V293", "V298", "V299"] ), "var_name": "V29D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Liveborn (Populaton at Risk)", "lst_dx": ",".join( [ "V301 ", "V302 ", "V311 ", "V312 ", "V321 ", "V322 ", "V331 ", "V332 ", "V341 ", "V342 ", "V351 ", "V352 ", "V361 ", "V362 ", "V371 ", "V372 ", "V391 ", "V392 ", ] ), "var_name": "LIVEB2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Dehydration", "lst_dx": ",".join(["2765 ", "27650", "27651", "27652"]), "var_name": "ACSDEHD", "comments": "Dehydration rate (ACSDEH) - ACSC #10", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Bacterial Pneumonia", "lst_dx": ",".join( [ "481 ", "4822 ", "48230", "48231", "48232", "48239", "48241", "48242", "4829 ", "4830 ", "4831 ", "4838 ", "485 ", "486 ", ] ), "var_name": "ACSBACD", "comments": "Bacterial Pneumonia rate (ACSBAC) - ACSC #11", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Exclude: Sickle Cell", "lst_dx": ",".join( [ "28241", "28242", "28260", "28261", "28262", "28263", "28264", "28268", "28269", ] ), "var_name": "ACSBA2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "HYPEROSMOLALITY AND /OR HYPERNATREMIA", "lst_dx": ",".join(["2760 "]), "var_name": "HYPERID", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Urinary Infection", "lst_dx": ",".join( [ "59010", "59011", "5902 ", "5903 ", "59080", "59081", "5909 ", "5950 ", "5959 ", "5990 ", ] ), "var_name": "ACSUTID", "comments": "Urinary Infection rate (ACSUTI) - ACSC #12", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "EXCLUDE: IMMUNOCOMPROMISED", "lst_dx": ",".join( [ "042 ", "1363 ", "1992 ", "23873", "23876", "23877", "23879", "260 ", "261 ", "262 ", "27900", "27901", "27902", "27903", "27904", "27905", "27906", "27909", "27910", "27911", "27912", "27913", "27919", "2792 ", "2793 ", "2794 ", "27941", "27949", "27950", "27951", "27952", "27953", "2798 ", "2799 ", "28409", "2841 ", "28411", "28412", "28419", "2880 ", "28800", "28801", "28802", "28803", "28809", "2881 ", "2882 ", "2884 ", "28850", "28851", "28859", "28953", "28983", "40301", "40311", "40391", "40402", "40403", "40412", "40413", "40492", "40493", "5793 ", "585 ", "5855 ", "5856 ", "9968 ", "99680", "99681", "99682", "99683", "99684", "99685", "99686", "99687", "99688", "99689", "V420 ", "V421 ", "V426 ", "V427 ", "V428 ", "V4281", "V4282", "V4283", "V4284", "V4289", "V451 ", "V4511", "V560 ", "V561 ", "V562 ", ] ), "var_name": "IMMUNID", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "XCLUDE: KIDNEY OR URINARY TRACT DISORDER", "lst_dx": ",".join( [ "59000", "59001", "59370", "59371", "59372", "59373", "7530 ", "75310", "75311", "75312", "75313", "75314", "75315", "75316", "75317", "75319", "75320", "75321", "75322", "75323", "75329", "7533 ", "7534 ", "7535 ", "7536 ", "7538 ", "7539 ", ] ), "var_name": "KIDNEY", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Angina", "lst_dx": ",".join( ["4111 ", "41181", "41189", "4130 ", "4131 ", "4139 "] ), "var_name": "ACSANGD", "comments": "Angina (w/o procedure) rate (ACSANG) - ACSC #13", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Diabetes uncontrolled", "lst_dx": ",".join(["25002", "25003"]), "var_name": "ACDIAUD", "comments": "Diabetes uncontrolled rate (ACDIAU) - ACSC #14", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Include only: Diabetes", "lst_dx": ",".join( [ "25000", "25001", "25002", "25003", "25010", "25011", "25012", "25013", "25020", "25021", "25022", "25023", "25030", "25031", "25032", "25033", "25040", "25041", "25042", "25043", "25050", "25051", "25052", "25053", "25060", "25061", "25062", "25063", "25070", "25071", "25072", "25073", "25080", "25081", "25082", "25083", "25090", "25091", "25092", "25093", ] ), "var_name": "ACSLEAD", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "Exclude: Trauma", "lst_dx": ",".join( [ "8950 ", "8951 ", "8960 ", "8961 ", "8962 ", "8963 ", "8970 ", "8971 ", "8972 ", "8973 ", "8974 ", "8975 ", "8976 ", "8977 ", ] ), "var_name": "ACLEA2D", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup = pd.concat([df_pqi_dxgroup, pd.DataFrame( { "primary_outcome": "EXCLUDE: TOE AMPUTATION PROCEDURE", "lst_dx": ",".join(["8411 "]), "var_name": "TOEAMIP", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_dxgroup["lst_dx"] = df_pqi_dxgroup["lst_dx"].str.replace( " ", "", regex=True ) df_pqi_dxgroup.to_csv( os.path.join(data_folder, "dxgrp.csv"), index=False, )
[docs] def generate_prgrp_file() -> None: """ Generate the PQI procedure group reference file. Creates a CSV file (prgrp.csv) containing ICD-9 procedure code groups used by the Prevention Quality Indicators algorithm. The file is saved to the package data directory. Examples -------- >>> generate_prgrp_file() # doctest: +SKIP """ df_pqi_prgrp = pd.DataFrame( columns=["primary_outcome", "lst_pr", "var_name", "comments"] ) df_pqi_prgrp = pd.concat([df_pqi_prgrp, pd.DataFrame( { "primary_outcome": "Haemodialysis", "lst_pr": ",".join( ["3895", "3927", "3929", "3942", "3943", "3993", "3994"] ), "var_name": "ACSHYPP", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_prgrp = pd.concat([df_pqi_prgrp, pd.DataFrame( { "primary_outcome": "Exclude: Cardiac Procedures", "lst_pr": ",".join( [ "0050", "0051", "0052", "0053", "0054", "0056", "0057", "0066", "1751", "1752", "1755", "3500", "3501", "3502", "3503", "3504", "3505", "3506", "3507", "3508", "3509", "3510", "3511", "3512", "3513", "3514", "3520", "3521", "3522", "3523", "3524", "3525", "3526", "3527", "3528", "3531", "3532", "3533", "3534", "3535", "3539", "3541", "3542", "3550", "3551", "3552", "3553", "3554", "3555", "3560", "3561", "3562", "3563", "3570", "3571", "3572", "3573", "3581", "3582", "3583", "3584", "3591", "3592", "3593", "3594", "3595", "3596", "3597", "3598", "3599", "3601", "3602", "3603", "3604", "3605", "3606", "3607", "3609", "3610", "3611", "3612", "3613", "3614", "3615", "3616", "3617", "3619", "362 ", "363 ", "3631", "3632", "3633", "3634", "3639", "3691", "3699", "3731", "3732", "3733", "3734", "3735", "3736", "3737", "3741", "375 ", "3751", "3752", "3753", "3754", "3755", "3760", "3761", "3762", "3763", "3764", "3765", "3766", "3770", "3771", "3772", "3773", "3774", "3775", "3776", "3777", "3778", "3779", "3780", "3781", "3782", "3783", "3785", "3786", "3787", "3789", "3794", "3795", "3796", "3797", "3798", "3826", ] ), "var_name": "ACSCARP", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_prgrp = pd.concat([df_pqi_prgrp, pd.DataFrame( { "primary_outcome": "EXCLUDE: IMMUNOCOMPROMISED", "lst_pr": ",".join( [ "0018", "335 ", "3350", "3351", "3352", "336 ", "375 ", "3751", "410 ", "4100", "4101", "4102", "4103", "4104", "4105", "4106", "4107", "4108", "4109", "5051", "5059", "5280", "5281", "5282", "5283", "5285", "5286", "5569", ] ), "var_name": "IMMUNIP", "comments": "", }, index=[0], )], ignore_index=True) df_pqi_prgrp = pd.concat([df_pqi_prgrp, pd.DataFrame( { "primary_outcome": "Lower extremity amputation", "lst_pr": ",".join( [ "8410", "8411", "8412", "8413", "8414", "8415", "8416", "8417", "8418", "8419", ] ), "var_name": "ACSLEAP", "comments": ( "Lower extremity amputation rate(ACSCLEA) - ACSC #16" ), }, index=[0], )], ignore_index=True) df_pqi_prgrp["lst_pr"] = df_pqi_prgrp["lst_pr"].str.replace( " ", "", regex=True ) df_pqi_prgrp.to_csv( os.path.join(data_folder, "prgrp.csv"), index=False, )
generate_dxgrp_file() generate_prgrp_file()