#!/usr/bin/env python
"""This program implements the Pediatric Medical Complexity Algorithm (Tamara D. Simon, Dorothy Lyons, Peter Woodcox et
al 2014) to identify children with complex and non-complex chronic conditions using Medicaid claims data and to
distinguish them from children with neither chronic nor chronic complex conditions (healthy children). ‘Complex’ and
‘Non-Complex’ designations are assigned based on whether a child’s condition(s) identified by ICD-9 code(s) can be
considered chronic, malignant, or progressive, and whether multiple body systems are involved.
"""
__author__ = "Manoradhan Murugesan"
__email__ = "manorathan@uchicago.edu"
import os
import numpy as np
import pandas as pd
import dask.dataframe as dd
[docs]
class PediatricMedicalComplexity:
"""This class packages functions to create indicator variables for conditions of interest based on their chronic
nature identified by Seattle Children's Research Institute subset from conditions defined by diagnostic codes
outlined in the Chronic Illness and Disability Payment System (CDPS) version 5.3
"""
package_folder, filename = os.path.split(__file__)
data_folder = os.path.join(package_folder, "data")
[docs]
@classmethod
def create_pmca_condition_counts(cls, df: dd.DataFrame, diag_cd_lst_col: str) -> dd.DataFrame:
"""
Create PMCA condition counts from diagnosis codes.
For each PMCA body system condition and progressive conditions,
counts how many diagnosis codes in the list match, and creates
binary indicator columns (any_{condition}, {condition}_2h).
Parameters
----------
df : dask.DataFrame
Patient-level DataFrame with a diagnosis code list column.
diag_cd_lst_col : str
Name of the column containing comma-separated diagnosis codes.
Returns
-------
dask.DataFrame
DataFrame with condition count and indicator columns appended.
Examples
--------
>>> # Requires a dask DataFrame with diagnosis code list column
>>> df = PediatricMedicalComplexity.create_pmca_condition_counts( # doctest: +SKIP
... df, 'LST_DIAG_CD_RAW')
"""
pdf_conditions = pd.read_csv(
os.path.join(cls.data_folder, "pmca_condition_codes.csv")
)
pdf_progressives = pd.read_csv(
os.path.join(cls.data_folder, "progressive_condition_codes.csv")
)
dct_conditions = pdf_conditions.set_index("condition").to_dict("index")
dct_progressives = pdf_progressives.set_index("condition").to_dict(
"index"
)
for condn in dct_conditions.keys():
dct_condn_codes = dct_conditions[condn]
dct_condn_codes["exclude_icd9"] = (
()
if pd.isnull(dct_condn_codes["exclude_icd9"])
else tuple(dct_condn_codes["exclude_icd9"].split(","))
)
dct_condn_codes["include_icd9"] = tuple(
dct_condn_codes["include_icd9"].split(",")
)
dct_conditions[condn] = dct_condn_codes
tpl_progressive_include_codes = tuple(
",".join(
[
cond["include_icd9"]
for key, cond in dct_progressives.items()
if pd.notnull(cond["include_icd9"])
]
).split(",")
)
tpl_progressive_exclude_codes = tuple(
",".join(
[
cond["exclude_icd9"]
for key, cond in dct_progressives.items()
if pd.notnull(cond["exclude_icd9"])
]
).split(",")
)
lst_conditions_with_exclude_codes = [
condn
for condn in dct_conditions.keys()
if len(dct_conditions[condn]["exclude_icd9"]) > 0
]
lst_conditions_without_exclude_codes = [
condn
for condn in dct_conditions.keys()
if condn not in lst_conditions_with_exclude_codes
]
_condition_order = (
lst_conditions_without_exclude_codes
+ lst_conditions_with_exclude_codes
+ ["progressive"]
)
_meta_conditions = df._meta.assign(
**{condn: 0 for condn in _condition_order}
)
df = df.map_partitions(
lambda pdf: pdf.assign(
**dict(
[
(
condn,
pdf[diag_cd_lst_col].apply(
lambda lst_cd: sum(
int(
cd.startswith(
dct_conditions[condn][ # pylint: disable=cell-var-from-loop
"include_icd9"
]
)
)
for cd in lst_cd.split(",")
)
),
)
for condn in lst_conditions_without_exclude_codes
]
+ [
(
condn,
pdf[diag_cd_lst_col].apply(
lambda lst_cd: sum(
int(
cd.startswith(
dct_conditions[condn][ # pylint: disable=cell-var-from-loop
"include_icd9"
]
)
& (
not cd.startswith(
dct_conditions[condn][ # pylint: disable=cell-var-from-loop
"exclude_icd9"
]
)
)
)
for cd in lst_cd.split(",")
)
),
)
for condn in lst_conditions_with_exclude_codes
]
+ [
(
"progressive",
pdf[diag_cd_lst_col].apply(
lambda lst_cd: sum(
int(
cd.startswith(
tpl_progressive_include_codes
)
& (
not cd.startswith(
tpl_progressive_exclude_codes
)
)
)
for cd in lst_cd.split(",")
)
),
)
]
)
),
meta=_meta_conditions,
)
_indicator_cols = {
"any_" + condn: 0
for condn in list(dct_conditions.keys()) + ["progressive"]
}
_indicator_cols.update(
{condn + "_2h": 0 for condn in dct_conditions.keys()}
)
_meta_indicators = df._meta.assign(**_indicator_cols)
df = df.map_partitions(
lambda pdf: pdf.assign(
**dict(
[
("any_" + condn, (pdf[condn] > 0).astype(int))
for condn in list(dct_conditions.keys())
+ ["progressive"]
]
+ [
(condn + "_2h", (pdf[condn] >= 2).astype(int))
for condn in dct_conditions.keys()
]
)
),
meta=_meta_indicators,
)
return df
[docs]
@classmethod
def get_pmca_chronic_condition_categories(cls, df: dd.DataFrame) -> dd.DataFrame:
"""
Assign PMCA chronic condition categories.
Creates cond_less (less conservative) and cond_more (more
conservative) category columns with values 1 (non-chronic),
2 (non-complex chronic), or 3 (complex chronic).
Parameters
----------
df : dask.DataFrame
DataFrame with PMCA condition indicator columns from
``create_pmca_condition_counts``.
Returns
-------
dask.DataFrame
DataFrame with cond_less and cond_more columns appended.
Examples
--------
>>> # Requires a dask DataFrame with PMCA condition indicator columns
>>> df = PediatricMedicalComplexity.get_pmca_chronic_condition_categories( # doctest: +SKIP
... df)
"""
pdf_conditions = pd.read_csv(
os.path.join(cls.data_folder, "pmca_condition_codes.csv")
)
lst_conditions = [
condn
for condn in set(pdf_conditions["condition"].tolist())
if condn not in ["malign"]
]
df = df.map_partitions(
lambda pdf: pdf.assign(
scount_less=pdf[
["any_" + condn for condn in lst_conditions]
].sum(axis="columns"),
scount_more=pdf[
[condn + "_2h" for condn in lst_conditions]
].sum(axis="columns"),
),
meta=df._meta.assign(scount_less=0, scount_more=0),
)
df = df.map_partitions(
lambda pdf: pdf.assign(
cond_less=np.select(
[
(
(pdf["scount_less"] >= 2)
| (pdf["any_progressive"] == 1)
| (pdf["any_malign"] == 1)
), # Complex Chronic
(pdf["scount_less"] == 1),
], # Non-complex Chronic
[3, 2],
default=1,
),
cond_more=np.select(
[
(
(pdf["scount_more"] >= 2)
| (pdf["any_progressive"] == 1)
| (pdf["any_malign"] == 1)
), # Complex Chronic
(pdf["scount_less"] == 1),
], # Non-complex Chronic
[3, 2],
default=1,
),
),
meta=df._meta.assign(cond_less=0, cond_more=0),
)
return df
[docs]
def pmca_chronic_conditions(df: dd.DataFrame, diag_cd_lst_col: str = "LST_DIAG_CD_RAW") -> dd.DataFrame:
"""
This function implements the Pediatric Medical Complexity Algorithm to identify children with complex and
non-complex chronic conditions using Medicaid claims data and to distinguish them from children with neither
chronic nor chronic complex conditions (healthy children).
'Complex' and 'Non-Complex' designations are assigned based on whether a child's condition(s) identified by ICD-9
code(s) can be considered chronic, malignant, or progressive, and whether multiple body systems are involved.
Definitions of the categories assigned by the Algorithm:
* The less conservative version (cond_less) calculates values as
* 'Complex Chronic':
1) more than one body system is involved, or
2) one or more conditions are progressive, or
3) one or more conditions are malignant
* 'Non-complex Chronic':
1) only one body system is involved, and
2) the condition is not progressive or malignant
* 'Non-Chronic':
1) no body system indicators are present, and
2) the condition is not progressive or malignant
* The more conservative version (cond_more) calculates values as
* 'Complex Chronic':
1) more than one body system is involved, and each must be indicated in more than one claim, or
2) one or more conditions are progressive, or
3) one or more conditions are malignant
* 'Non-complex Chronic':
1) only one body system is indicated in more than one claim, and
2) the condition is not progressive or malignant
* 'Non-Chronic':
1) no body system indicators are present in more than one claim, and
2) the condition is not progressive or malignant
Body Systems of interest and the variables used to indicate them:
======================= =========
Body system variable
======================= =========
cardiac cardiac
craniofacial cranio
dermatological derm
endocrinological endo
gastrointestinal gastro
genetic genetic
genitourinary genito
hematological hemato
immunological immuno
malignancy malign
mental health mh
metabolic metab
musculoskeletal musculo
neurological neuro
pulmonary-respiratory pulresp
renal renal
ophthalmological opthal
otologic otol
otolaryngological otolar
======================= =========
Parameters
----------
df : dask.DataFrame
Patient level dask dataframe
diag_cd_lst_col : str, default=LST_DIAG_CD_RAW
Column name that has comma separated list of diagnosis codes in the observation period
Returns
-------
dask.DataFrame
Examples
--------
>>> # Requires a patient-level dask DataFrame with diagnosis code list
>>> df = pmca_chronic_conditions(df, 'LST_DIAG_CD_RAW') # doctest: +SKIP
"""
lst_columns = list(df.columns)
df = PediatricMedicalComplexity.create_pmca_condition_counts(
df, diag_cd_lst_col
)
df = PediatricMedicalComplexity.get_pmca_chronic_condition_categories(df)
df = df.rename(
columns={
col: "pmca_" + col for col in ["cond_less", "cond_more"]
}
)
df = df[
lst_columns
+ ["pmca_" + metric for metric in ["cond_less", "cond_more"]]
]
return df