Source code for medicaid_utils.filters.claims.rx
"""This module has functions to add NDC code based indicator flags to claims"""
import dask.dataframe as dd
[docs]
def flag_prescriptions(
dct_ndc_codes: dict,
df_claims: dd.DataFrame,
ignore_missing_days_supply: bool = False,
) -> dd.DataFrame:
"""
Flags claims based on NDC codes
Parameters
----------
dct_ndc_codes : dict
Dictionary of NDC. Should be in the format
.. highlight:: python
.. code-block:: python
{condition_name: list of codes}
Eg:
.. highlight:: python
.. code-block:: python
{'buprenorphine': ['00378451905', '00378451993', '00378617005',
'00378617077']}
df_claims : dd.DataFrame
Claims dataframe
ignore_missing_days_supply : bool, default=False
Always flag claims with missing, negative or 0 days of supply as 0
Returns
-------
dd.DataFrame
Examples
--------
>>> import pandas as pd
>>> import dask.dataframe as dd
>>> pdf = pd.DataFrame({
... 'MSIS_ID': ['A', 'B', 'C'],
... 'NDC': ['00378451905', '99999999999', '00378617005'],
... 'DAYS_SUPPLY': ['30', '10', '0'],
... }).set_index('MSIS_ID')
>>> ddf = dd.from_pandas(pdf, npartitions=1)
>>> dct_ndc = {'buprenorphine': ['00378451905', '00378617005']}
>>> result = flag_prescriptions(dct_ndc, ddf)
>>> result.compute()['rx_buprenorphine'].tolist()
[1, 0, 0]
When ``ignore_missing_days_supply`` is True, claims with zero or missing
days of supply are still flagged:
>>> result2 = flag_prescriptions(dct_ndc, ddf, ignore_missing_days_supply=True)
>>> result2.compute()['rx_buprenorphine'].tolist()
[1, 0, 1]
"""
dct_ndc_codes = {
condn: [
str(ndc_code).replace(" ", "").zfill(12)
for ndc_code in dct_ndc_codes[condn]
]
for condn in dct_ndc_codes
}
df_claims = df_claims.assign(
**{f"rx_{condn}": 0 for condn in dct_ndc_codes}
)
if ignore_missing_days_supply:
df_claims = df_claims.assign(
**{
f"rx_{condn}": (
df_claims["NDC"].isin(dct_ndc_codes[condn])
).astype(int)
for condn in dct_ndc_codes
}
)
elif "DAYS_SUPPLY" in df_claims.columns:
df_claims = df_claims.assign(
**{
f"rx_{condn}": (
df_claims["NDC"].isin(dct_ndc_codes[condn])
& (
dd.to_numeric(
df_claims["DAYS_SUPPLY"], errors="coerce"
)
> 0
)
).astype(int)
for condn in dct_ndc_codes
}
)
return df_claims