PUMS Demo

Introduction

This notebook demonstrates how to load US Census American Community Survey (ACS) Public-Use Microdata Samples. The process is very much parallel to how we loaded and used US Census redistricting data in the SoMa DIS Demo and Seeing White notebooks.

Imports and configuration

[1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir)))
)
[2]:
import censusdis.data as ced
import censusdis.geography as cgeo
import censusdis.states
from censusdis.maps import ShapeReader
[3]:
# Set your API key here.
CENSUS_API_KEY = None
[4]:
YEAR = 2020
DATASET = "acs/acs5/pums"
[5]:
STATE = censusdis.states.MA

Query Metadata

First we will see what variables are avialable in the dataset.

[6]:
group = ced.variables.get_group(DATASET, YEAR, None)

group.keys()
[6]:
dict_keys(['WKW', 'FBATHP', 'DRIVESP', 'WGTP23', 'WGTP22', 'WGTP25', 'WGTP24', 'RACNH', 'FWATP', 'WGTP21', 'WGTP20', 'WGTP27', 'WGTP26', 'WGTP29', 'WGTP28', 'FBROADBNDP', 'FDRATXP', 'FWKWNP', 'HOTWAT', 'FWKHP', 'FJWDP', 'WORKSTAT', 'FRACP', 'FFULP', 'WGTP34', 'WGTP33', 'WGTP36', 'PINCP', 'WGTP35', 'FPOBP', 'WGTP30', 'WGTP32', 'STOV', 'FMHP', 'WGTP31', 'RACAIAN', 'WGTP38', 'WGTP37', 'WGTP39', 'PUBCOV', 'SRNT', 'SEX', 'WGTP45', 'WGTP44', 'FACCESSP', 'WGTP47', 'DOUT', 'WGTP46', 'WGTP41', 'WGTP40', 'OTHSVCEX', 'WGTP43', 'RACPI', 'WGTP42', 'INDP', 'WGTP49', 'WGTP48', 'PRIVCOV', 'SFN', 'FINTP', 'HUPAC', 'SFR', 'WGTP50', 'FBLDP', 'WGTP56', 'WGTP55', 'WGTP58', 'WGTP57', 'WGTP52', 'WGTP51', 'DEAR', 'WGTP54', 'DIS', 'WGTP53', 'ACR', 'VACS', 'FINSP', 'WGTP59', 'FMILPP', 'MARHYP', 'ADJHSG', 'PAP', 'WGTP7', 'PWGTP30', 'WGTP6', 'PWGTP31', 'HINCP', 'WGTP5', 'PWGTP32', 'WKWN', 'WGTP4', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'WGTP9', 'PWGTP36', 'WGTP8', 'WGTP3', 'WGTP2', 'RACWHT', 'WGTP1', 'GASP', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'FBDSP', 'FWKLP', 'FSCHP', 'FMARHTP', 'PWGTP20', 'PWGTP21', 'GRPIP', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'JWTRNS', 'FRNTMP', 'FOTHSVCEXP', 'CIT', 'LAPTOP', 'FMRGIP', 'JWRIP', 'PWGTP15', 'FCOMPOTHXP', 'PWGTP16', 'FSEMP', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'ENG', 'FLANP', 'PWGTP10', 'PWGTP11', 'FCITWP', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'FSSIP', 'FS', 'DIVISION', 'WRK', 'HICOV', 'DRATX', 'SMOCP', 'VPS', 'NWLA', 'FACRP', 'SCIENGRLP', 'FJWTRNSP', 'WGTP12', 'WGTP11', 'WGTP14', 'WGTP13', 'AGS', 'FSTOVP', 'WGTP10', 'WGTP19', 'WGTP16', 'WGTP15', 'WGTP18', 'WGTP17', 'FMARHWP', 'FTELP', 'PWGTP73', 'PWGTP74', 'QTRBIR', 'PWGTP75', 'FPERNP', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'FDISP', 'PWGTP', 'WAGP', 'RWAT', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'FPLMPRP', 'TAXAMT', 'ANC2P', 'NWLK', 'PWGTP60', 'FPUBCOVP', 'PWGTP61', 'REGION', 'FMARHYP', 'FRETP', 'SMP', 'PWGTP59', 'FTENP', 'BLD', 'MAR', 'SMX', 'FVEHP', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'GASFP', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'VALP', 'PWGTP50', 'FDEYEP', 'FCITP', 'RACNUM', 'PWGTP48', 'PWGTP49', 'SPORDER', 'FANCP', 'PWGTP40', 'PWGTP41', 'DRAT', 'PWGTP42', 'PWGTP43', 'MRGI', 'ESP', 'PWGTP44', 'WGTP', 'PWGTP45', 'OCCP', 'ESR', 'PWGTP46', 'PWGTP47', 'FMRGP', 'MRGP', 'COW', 'TABLET', 'MRGX', 'MULTG', 'MRGT', 'PWGTP37', 'FOCCP', 'PWGTP38', 'PWGTP39', 'FREFRP', 'WGTP61', 'WGTP60', 'WGTP67', 'WGTP66', 'WGTP69', 'WGTP68', 'WGTP63', 'FRWATP', 'WGTP62', 'WGTP65', 'WGTP64', 'RAC1P', 'RNTM', 'DREM', 'MIGSP', 'FHICOVP', 'NWRE', 'FDIALUPP', 'RNTP', 'WGTP70', 'HUPAOC', 'WGTP72', 'WGTP71', 'WGTP78', 'MV', 'WGTP77', 'WGTP79', 'WGTP74', 'WGTP73', 'WGTP76', 'WGTP75', 'FAGSP', 'ANC', 'OIP', 'WGTP80', 'NP', 'NR', 'LNGI', 'ANC1P', 'HISPEED', 'PLM', 'RAC3P', 'OC', 'LANP', 'FLAPTOPP', 'FPRIVCOVP', 'LANX', 'HUPARC', 'FMVP', 'SVAL', 'PWGTP80', 'RWATPR', 'FWKWP', 'RAC2P', 'FPOWSP', 'SSP', 'R18', 'NAICSP', 'WATFP', 'RMSP', 'FDRATP', 'BDSP', 'FGASP', 'RESMODE', 'FSEXP', 'MHP', 'REFR', 'POWSP', 'FPLMP', 'JWAP', 'DIALUP', 'ELEFP', 'RELSHIPP', 'MIG', 'RC', 'MIL', 'WAOB', 'FHINS7P', 'RT', 'INTP', 'RACASN', 'FRELSHIPP', 'FHISPEEDP', 'FINDP', 'FPAP', 'JWMNP', 'FHINS6P', 'FKITP', 'INSP', 'ST', 'FFINCP', 'YBL', 'FINCP', 'YOEP', 'HINS6', 'FSMXHP', 'HINS7', 'FTABLETP', 'FLANXP', 'GRNTP', 'FCOWP', 'FJWMNP', 'HINS1', 'HINS2', 'HINS3', 'HINS4', 'HINS5', 'FDDRSP', 'FWAGP', 'FGRNTP', 'PUMA', 'FHINS5P', 'FSCHGP', 'FHINS5C', 'FVACSP', 'FHOTWATP', 'R60', 'FMARHDP', 'FMIGSP', 'R65', 'MARHT', 'FHINS4P', 'SERIALNO', 'DECADE', 'MARHW', 'MARHM', 'FGCRP', 'FHINS4C', 'PSF', 'RACSOR', 'NOC', 'POBP', 'CPLT', 'TYPEHUGQ', 'NOP', 'FSMARTPHONP', 'FULP', 'FHINS3P', 'SINK', 'KIT', 'FVALP', 'CONCAT_ID', 'JWDP', 'WKEXREL', 'DPHY', 'FWRKP', 'NPF', 'FHINS3C', 'FDEARP', 'NPP', 'FHINS2P', 'FRWATPRP', 'FSCHLP', 'MLPCD', 'FPARC', 'MIGPUMA', 'HUGCL', 'DDRS', 'MARHD', 'FMRGXP', 'SATELLITE', 'POVPIP', 'FULFP', 'BATH', 'WATP', 'GCM', 'FSATELLITEP', 'GCL', 'FHINS1P', 'WKHP', 'RECORD_TYPE', 'GCR', 'FRNTP', 'NRC', 'HFL', 'FSMP', 'ACCESSINET', 'ADJINC', 'FRMSP', 'POWPUMA', 'PARTNER', 'FELEP', 'CONP', 'FMIGP', 'FFSP', 'FGCMP', 'HISP', 'FESRP', 'HHL', 'AGEP', 'DEYE', 'SEMP', 'HHT', 'OCPIP', 'FGCLP', 'FENGP', 'SCHG', 'FTAXP', 'MSP', 'RACBLK', 'FMRGTP', 'FAGEP', 'SCHL', 'PWGTP9', 'FER', 'MLPFG', 'PWGTP8', 'NATIVITY', 'PWGTP7', 'SMARTPHONE', 'PWGTP6', 'FES', 'PWGTP5', 'PWGTP4', 'VEH', 'PWGTP3', 'FFERP', 'PWGTP2', 'PWGTP1', 'HHT2', 'FYOEP', 'FDREMP', 'NWAB', 'FCONP', 'FYBLP', 'FOIP', 'FHINCP', 'FHISP', 'BROADBND', 'NWAV', 'FDPHYP', 'PAOC', 'FFODP', 'FMARHMP', 'SSIP', 'FJWRIP', 'COMPOTHX', 'FSINKP', 'ELEP', 'FHFLP', 'WIF', 'FSMXSP', 'FOD2P', 'RETP', 'FSSP', 'PLMPRP', 'SCIENGP', 'CITWP', 'FMILSP', 'FPINCP', 'FOD1P', 'FMARP', 'SOCP', 'MLPE', 'MLPH', 'MLPA', 'FSMOCP', 'MLPB', 'FDOUTP', 'PERNP', 'WKL', 'SCH', 'TEL', 'TEN', 'MLPI', 'MLPJ', 'MLPK'])
[7]:
group["AGEP"]
[7]:
{'label': 'Age',
 'predicateType': 'int',
 'group': 'N/A',
 'limit': 0,
 'suggested-weight': 'PWGTP',
 'values': {'item': {'0': 'Under 1 year'},
  'range': [{'min': '1',
    'max': '99',
    'description': '1 to 99 years (Top-coded)'}]},
 'name': 'AGEP'}

Next we will see what geographies are available. Note that PUMS data is available in a lot fewer geography hierarchies than the full ACS5 data set.

[8]:
cgeo.geo_path_snake_specs(DATASET, YEAR)
[8]:
{'020': ['region'],
 '030': ['division'],
 '040': ['state'],
 '795': ['state', 'public_use_microdata_area']}

Query Age and its Suggested Weight at the PUMA Level

[9]:
query_variables = ["AGEP"]
[10]:
variable_weights = {
    variable: group[variable]["suggested-weight"] for variable in query_variables
}

unique_weights = list(set(variable_weights.values()))
[11]:
df_acs5_pums = ced.download(
    DATASET,
    YEAR,
    query_variables + unique_weights,
    state=STATE,
    public_use_microdata_area="*",
)

df_acs5_pums
[11]:
STATE PUBLIC_USE_MICRODATA_AREA AGEP PWGTP
0 25 3900 46 14
1 25 3900 46 12
2 25 3900 12 13
3 25 302 52 14
4 25 302 21 17
... ... ... ... ...
335405 25 506 65 12
335406 25 4301 89 20
335407 25 400 51 5
335408 25 400 56 6
335409 25 400 5 6

335410 rows × 4 columns

[12]:
# Reformat and rename for easier merging with the map later.
df_acs5_pums["PUMACE"] = df_acs5_pums["PUBLIC_USE_MICRODATA_AREA"].apply(
    lambda p: f"{int(p):05d}"
)

Compute Average Age in Each Area

[13]:
df_average_age = (
    df_acs5_pums.groupby("PUMACE")
    .apply(
        lambda g: (g["AGEP"] * g[variable_weights["AGEP"]]).sum()
        / g[variable_weights["AGEP"]].sum()
    )
    .rename("avg_age")
    .reset_index()
)

Load map data and merge in our PUMS data

[14]:
reader = ShapeReader(year=YEAR)
[15]:
gdf_puma = reader.read_shapefile(STATE, "puma")
[17]:
gdf_avg_age = gdf_puma.merge(df_average_age, left_on="PUMACE", right_on="PUMACE")

Plot the Map

[18]:
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (12, 6)

ax = gdf_avg_age.plot(
    "avg_age", cmap="Greens", edgecolor="black", legend=True, linewidth=0.5
)

ax.set_title(
    f"Average Age by Public Use Microdata Area in {censusdis.states.NAMES_FROM_IDS[STATE]}"
)

ax.tick_params(
    left=False,
    right=False,
    bottom=False,
    labelleft=False,
    labelbottom=False,
)
../_images/nb_PUMS_Demo_25_0.png
[17]: