PUMS Demo
Introduction
This notebook demonstrates how to load US Census American Community Survey (ACS) Public-Use Microdata Samples. The process is very much parallel to how we loaded and used US Census redistricting data in the SoMa DIS Demo and Seeing White notebooks.
Imports and configuration
[1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys
sys.path.append(
os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir)))
)
[2]:
import censusdis.data as ced
import censusdis.geography as cgeo
import censusdis.states
from censusdis.maps import ShapeReader
[3]:
# Set your API key here.
CENSUS_API_KEY = None
[4]:
YEAR = 2020
DATASET = "acs/acs5/pums"
[5]:
STATE = censusdis.states.MA
Query Metadata
First we will see what variables are avialable in the dataset.
[6]:
group = ced.variables.get_group(DATASET, YEAR, None)
group.keys()
[6]:
dict_keys(['WKW', 'FBATHP', 'DRIVESP', 'WGTP23', 'WGTP22', 'WGTP25', 'WGTP24', 'RACNH', 'FWATP', 'WGTP21', 'WGTP20', 'WGTP27', 'WGTP26', 'WGTP29', 'WGTP28', 'FBROADBNDP', 'FDRATXP', 'FWKWNP', 'HOTWAT', 'FWKHP', 'FJWDP', 'WORKSTAT', 'FRACP', 'FFULP', 'WGTP34', 'WGTP33', 'WGTP36', 'PINCP', 'WGTP35', 'FPOBP', 'WGTP30', 'WGTP32', 'STOV', 'FMHP', 'WGTP31', 'RACAIAN', 'WGTP38', 'WGTP37', 'WGTP39', 'PUBCOV', 'SRNT', 'SEX', 'WGTP45', 'WGTP44', 'FACCESSP', 'WGTP47', 'DOUT', 'WGTP46', 'WGTP41', 'WGTP40', 'OTHSVCEX', 'WGTP43', 'RACPI', 'WGTP42', 'INDP', 'WGTP49', 'WGTP48', 'PRIVCOV', 'SFN', 'FINTP', 'HUPAC', 'SFR', 'WGTP50', 'FBLDP', 'WGTP56', 'WGTP55', 'WGTP58', 'WGTP57', 'WGTP52', 'WGTP51', 'DEAR', 'WGTP54', 'DIS', 'WGTP53', 'ACR', 'VACS', 'FINSP', 'WGTP59', 'FMILPP', 'MARHYP', 'ADJHSG', 'PAP', 'WGTP7', 'PWGTP30', 'WGTP6', 'PWGTP31', 'HINCP', 'WGTP5', 'PWGTP32', 'WKWN', 'WGTP4', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'WGTP9', 'PWGTP36', 'WGTP8', 'WGTP3', 'WGTP2', 'RACWHT', 'WGTP1', 'GASP', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'FBDSP', 'FWKLP', 'FSCHP', 'FMARHTP', 'PWGTP20', 'PWGTP21', 'GRPIP', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'JWTRNS', 'FRNTMP', 'FOTHSVCEXP', 'CIT', 'LAPTOP', 'FMRGIP', 'JWRIP', 'PWGTP15', 'FCOMPOTHXP', 'PWGTP16', 'FSEMP', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'ENG', 'FLANP', 'PWGTP10', 'PWGTP11', 'FCITWP', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'FSSIP', 'FS', 'DIVISION', 'WRK', 'HICOV', 'DRATX', 'SMOCP', 'VPS', 'NWLA', 'FACRP', 'SCIENGRLP', 'FJWTRNSP', 'WGTP12', 'WGTP11', 'WGTP14', 'WGTP13', 'AGS', 'FSTOVP', 'WGTP10', 'WGTP19', 'WGTP16', 'WGTP15', 'WGTP18', 'WGTP17', 'FMARHWP', 'FTELP', 'PWGTP73', 'PWGTP74', 'QTRBIR', 'PWGTP75', 'FPERNP', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'FDISP', 'PWGTP', 'WAGP', 'RWAT', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'FPLMPRP', 'TAXAMT', 'ANC2P', 'NWLK', 'PWGTP60', 'FPUBCOVP', 'PWGTP61', 'REGION', 'FMARHYP', 'FRETP', 'SMP', 'PWGTP59', 'FTENP', 'BLD', 'MAR', 'SMX', 'FVEHP', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'GASFP', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'VALP', 'PWGTP50', 'FDEYEP', 'FCITP', 'RACNUM', 'PWGTP48', 'PWGTP49', 'SPORDER', 'FANCP', 'PWGTP40', 'PWGTP41', 'DRAT', 'PWGTP42', 'PWGTP43', 'MRGI', 'ESP', 'PWGTP44', 'WGTP', 'PWGTP45', 'OCCP', 'ESR', 'PWGTP46', 'PWGTP47', 'FMRGP', 'MRGP', 'COW', 'TABLET', 'MRGX', 'MULTG', 'MRGT', 'PWGTP37', 'FOCCP', 'PWGTP38', 'PWGTP39', 'FREFRP', 'WGTP61', 'WGTP60', 'WGTP67', 'WGTP66', 'WGTP69', 'WGTP68', 'WGTP63', 'FRWATP', 'WGTP62', 'WGTP65', 'WGTP64', 'RAC1P', 'RNTM', 'DREM', 'MIGSP', 'FHICOVP', 'NWRE', 'FDIALUPP', 'RNTP', 'WGTP70', 'HUPAOC', 'WGTP72', 'WGTP71', 'WGTP78', 'MV', 'WGTP77', 'WGTP79', 'WGTP74', 'WGTP73', 'WGTP76', 'WGTP75', 'FAGSP', 'ANC', 'OIP', 'WGTP80', 'NP', 'NR', 'LNGI', 'ANC1P', 'HISPEED', 'PLM', 'RAC3P', 'OC', 'LANP', 'FLAPTOPP', 'FPRIVCOVP', 'LANX', 'HUPARC', 'FMVP', 'SVAL', 'PWGTP80', 'RWATPR', 'FWKWP', 'RAC2P', 'FPOWSP', 'SSP', 'R18', 'NAICSP', 'WATFP', 'RMSP', 'FDRATP', 'BDSP', 'FGASP', 'RESMODE', 'FSEXP', 'MHP', 'REFR', 'POWSP', 'FPLMP', 'JWAP', 'DIALUP', 'ELEFP', 'RELSHIPP', 'MIG', 'RC', 'MIL', 'WAOB', 'FHINS7P', 'RT', 'INTP', 'RACASN', 'FRELSHIPP', 'FHISPEEDP', 'FINDP', 'FPAP', 'JWMNP', 'FHINS6P', 'FKITP', 'INSP', 'ST', 'FFINCP', 'YBL', 'FINCP', 'YOEP', 'HINS6', 'FSMXHP', 'HINS7', 'FTABLETP', 'FLANXP', 'GRNTP', 'FCOWP', 'FJWMNP', 'HINS1', 'HINS2', 'HINS3', 'HINS4', 'HINS5', 'FDDRSP', 'FWAGP', 'FGRNTP', 'PUMA', 'FHINS5P', 'FSCHGP', 'FHINS5C', 'FVACSP', 'FHOTWATP', 'R60', 'FMARHDP', 'FMIGSP', 'R65', 'MARHT', 'FHINS4P', 'SERIALNO', 'DECADE', 'MARHW', 'MARHM', 'FGCRP', 'FHINS4C', 'PSF', 'RACSOR', 'NOC', 'POBP', 'CPLT', 'TYPEHUGQ', 'NOP', 'FSMARTPHONP', 'FULP', 'FHINS3P', 'SINK', 'KIT', 'FVALP', 'CONCAT_ID', 'JWDP', 'WKEXREL', 'DPHY', 'FWRKP', 'NPF', 'FHINS3C', 'FDEARP', 'NPP', 'FHINS2P', 'FRWATPRP', 'FSCHLP', 'MLPCD', 'FPARC', 'MIGPUMA', 'HUGCL', 'DDRS', 'MARHD', 'FMRGXP', 'SATELLITE', 'POVPIP', 'FULFP', 'BATH', 'WATP', 'GCM', 'FSATELLITEP', 'GCL', 'FHINS1P', 'WKHP', 'RECORD_TYPE', 'GCR', 'FRNTP', 'NRC', 'HFL', 'FSMP', 'ACCESSINET', 'ADJINC', 'FRMSP', 'POWPUMA', 'PARTNER', 'FELEP', 'CONP', 'FMIGP', 'FFSP', 'FGCMP', 'HISP', 'FESRP', 'HHL', 'AGEP', 'DEYE', 'SEMP', 'HHT', 'OCPIP', 'FGCLP', 'FENGP', 'SCHG', 'FTAXP', 'MSP', 'RACBLK', 'FMRGTP', 'FAGEP', 'SCHL', 'PWGTP9', 'FER', 'MLPFG', 'PWGTP8', 'NATIVITY', 'PWGTP7', 'SMARTPHONE', 'PWGTP6', 'FES', 'PWGTP5', 'PWGTP4', 'VEH', 'PWGTP3', 'FFERP', 'PWGTP2', 'PWGTP1', 'HHT2', 'FYOEP', 'FDREMP', 'NWAB', 'FCONP', 'FYBLP', 'FOIP', 'FHINCP', 'FHISP', 'BROADBND', 'NWAV', 'FDPHYP', 'PAOC', 'FFODP', 'FMARHMP', 'SSIP', 'FJWRIP', 'COMPOTHX', 'FSINKP', 'ELEP', 'FHFLP', 'WIF', 'FSMXSP', 'FOD2P', 'RETP', 'FSSP', 'PLMPRP', 'SCIENGP', 'CITWP', 'FMILSP', 'FPINCP', 'FOD1P', 'FMARP', 'SOCP', 'MLPE', 'MLPH', 'MLPA', 'FSMOCP', 'MLPB', 'FDOUTP', 'PERNP', 'WKL', 'SCH', 'TEL', 'TEN', 'MLPI', 'MLPJ', 'MLPK'])
[7]:
group["AGEP"]
[7]:
{'label': 'Age',
'predicateType': 'int',
'group': 'N/A',
'limit': 0,
'suggested-weight': 'PWGTP',
'values': {'item': {'0': 'Under 1 year'},
'range': [{'min': '1',
'max': '99',
'description': '1 to 99 years (Top-coded)'}]},
'name': 'AGEP'}
Next we will see what geographies are available. Note that PUMS data is available in a lot fewer geography hierarchies than the full ACS5 data set.
[8]:
cgeo.geo_path_snake_specs(DATASET, YEAR)
[8]:
{'020': ['region'],
'030': ['division'],
'040': ['state'],
'795': ['state', 'public_use_microdata_area']}
Query Age and its Suggested Weight at the PUMA Level
[9]:
query_variables = ["AGEP"]
[10]:
variable_weights = {
variable: group[variable]["suggested-weight"] for variable in query_variables
}
unique_weights = list(set(variable_weights.values()))
[11]:
df_acs5_pums = ced.download(
DATASET,
YEAR,
query_variables + unique_weights,
state=STATE,
public_use_microdata_area="*",
)
df_acs5_pums
[11]:
| STATE | PUBLIC_USE_MICRODATA_AREA | AGEP | PWGTP | |
|---|---|---|---|---|
| 0 | 25 | 3900 | 46 | 14 |
| 1 | 25 | 3900 | 46 | 12 |
| 2 | 25 | 3900 | 12 | 13 |
| 3 | 25 | 302 | 52 | 14 |
| 4 | 25 | 302 | 21 | 17 |
| ... | ... | ... | ... | ... |
| 335405 | 25 | 506 | 65 | 12 |
| 335406 | 25 | 4301 | 89 | 20 |
| 335407 | 25 | 400 | 51 | 5 |
| 335408 | 25 | 400 | 56 | 6 |
| 335409 | 25 | 400 | 5 | 6 |
335410 rows × 4 columns
[12]:
# Reformat and rename for easier merging with the map later.
df_acs5_pums["PUMACE"] = df_acs5_pums["PUBLIC_USE_MICRODATA_AREA"].apply(
lambda p: f"{int(p):05d}"
)
Compute Average Age in Each Area
[13]:
df_average_age = (
df_acs5_pums.groupby("PUMACE")
.apply(
lambda g: (g["AGEP"] * g[variable_weights["AGEP"]]).sum()
/ g[variable_weights["AGEP"]].sum()
)
.rename("avg_age")
.reset_index()
)
Load map data and merge in our PUMS data
[14]:
reader = ShapeReader(year=YEAR)
[15]:
gdf_puma = reader.read_shapefile(STATE, "puma")
[16]:
gdf_avg_age = gdf_puma.merge(df_average_age, left_on="PUMACE10", right_on="PUMACE")
Plot the Map
[17]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (12, 6)
ax = gdf_avg_age.plot(
"avg_age", cmap="Greens", edgecolor="black", legend=True, linewidth=0.5
)
ax.set_title(
f"Average Age by Public Use Microdata Area in {censusdis.states.NAMES_FROM_IDS[STATE]}"
)
ax.tick_params(
left=False,
right=False,
bottom=False,
labelleft=False,
labelbottom=False,
)
[17]: