PUMS Demo
Introduction
This notebook demonstrates how to load US Census American Community Survey (ACS) Public-Use Microdata Samples. The process is very much parallel to how we loaded and used US Census redistricting data in the SoMa DIS Demo and Seeing White notebooks.
Imports and configuration
[1]:
import censusdis.data as ced
import censusdis.geography as cgeo
import censusdis.states
from censusdis.maps import ShapeReader
[2]:
# Set your API key here.
CENSUS_API_KEY = None
[3]:
YEAR = 2020
DATASET = "acs/acs5/pums"
[4]:
STATE = censusdis.states.MA
Query Metadata
First we will see what variables are avialable in the dataset.
[5]:
group = ced.variables.get_group(DATASET, YEAR, None)
group.keys()
[5]:
dict_keys(['WKW', 'FBATHP', 'DRIVESP', 'WGTP23', 'WGTP22', 'WGTP25', 'WGTP24', 'RACNH', 'FWATP', 'WGTP21', 'WGTP20', 'WGTP27', 'WGTP26', 'WGTP29', 'WGTP28', 'FBROADBNDP', 'FDRATXP', 'FWKWNP', 'HOTWAT', 'FWKHP', 'FJWDP', 'WORKSTAT', 'FRACP', 'FFULP', 'WGTP34', 'WGTP33', 'WGTP36', 'PINCP', 'WGTP35', 'FPOBP', 'WGTP30', 'WGTP32', 'STOV', 'FMHP', 'WGTP31', 'RACAIAN', 'WGTP38', 'WGTP37', 'WGTP39', 'PUBCOV', 'SRNT', 'SEX', 'WGTP45', 'WGTP44', 'FACCESSP', 'WGTP47', 'DOUT', 'WGTP46', 'WGTP41', 'WGTP40', 'OTHSVCEX', 'WGTP43', 'RACPI', 'WGTP42', 'INDP', 'WGTP49', 'WGTP48', 'PRIVCOV', 'SFN', 'FINTP', 'HUPAC', 'SFR', 'WGTP50', 'FBLDP', 'WGTP56', 'WGTP55', 'WGTP58', 'WGTP57', 'WGTP52', 'WGTP51', 'DEAR', 'WGTP54', 'DIS', 'WGTP53', 'ACR', 'VACS', 'FINSP', 'WGTP59', 'FMILPP', 'MARHYP', 'ADJHSG', 'PAP', 'WGTP7', 'PWGTP30', 'WGTP6', 'PWGTP31', 'HINCP', 'WGTP5', 'PWGTP32', 'WKWN', 'WGTP4', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'WGTP9', 'PWGTP36', 'WGTP8', 'WGTP3', 'WGTP2', 'RACWHT', 'WGTP1', 'GASP', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'FBDSP', 'FWKLP', 'FSCHP', 'FMARHTP', 'PWGTP20', 'PWGTP21', 'GRPIP', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'JWTRNS', 'FRNTMP', 'FOTHSVCEXP', 'CIT', 'LAPTOP', 'FMRGIP', 'JWRIP', 'PWGTP15', 'FCOMPOTHXP', 'PWGTP16', 'FSEMP', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'ENG', 'FLANP', 'PWGTP10', 'PWGTP11', 'FCITWP', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'FSSIP', 'FS', 'DIVISION', 'WRK', 'HICOV', 'DRATX', 'SMOCP', 'VPS', 'NWLA', 'FACRP', 'SCIENGRLP', 'FJWTRNSP', 'WGTP12', 'WGTP11', 'WGTP14', 'WGTP13', 'AGS', 'FSTOVP', 'WGTP10', 'WGTP19', 'WGTP16', 'WGTP15', 'WGTP18', 'WGTP17', 'FMARHWP', 'FTELP', 'PWGTP73', 'PWGTP74', 'QTRBIR', 'PWGTP75', 'FPERNP', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'FDISP', 'PWGTP', 'WAGP', 'RWAT', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'FPLMPRP', 'TAXAMT', 'ANC2P', 'NWLK', 'PWGTP60', 'FPUBCOVP', 'PWGTP61', 'REGION', 'FMARHYP', 'FRETP', 'SMP', 'PWGTP59', 'FTENP', 'BLD', 'MAR', 'SMX', 'FVEHP', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'GASFP', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'VALP', 'PWGTP50', 'FDEYEP', 'FCITP', 'RACNUM', 'PWGTP48', 'PWGTP49', 'SPORDER', 'FANCP', 'PWGTP40', 'PWGTP41', 'DRAT', 'PWGTP42', 'PWGTP43', 'MRGI', 'ESP', 'PWGTP44', 'WGTP', 'PWGTP45', 'OCCP', 'ESR', 'PWGTP46', 'PWGTP47', 'FMRGP', 'MRGP', 'COW', 'TABLET', 'MRGX', 'MULTG', 'MRGT', 'PWGTP37', 'FOCCP', 'PWGTP38', 'PWGTP39', 'FREFRP', 'WGTP61', 'WGTP60', 'WGTP67', 'WGTP66', 'WGTP69', 'WGTP68', 'WGTP63', 'FRWATP', 'WGTP62', 'WGTP65', 'WGTP64', 'RAC1P', 'RNTM', 'DREM', 'MIGSP', 'FHICOVP', 'NWRE', 'FDIALUPP', 'RNTP', 'WGTP70', 'HUPAOC', 'WGTP72', 'WGTP71', 'WGTP78', 'MV', 'WGTP77', 'WGTP79', 'WGTP74', 'WGTP73', 'WGTP76', 'WGTP75', 'FAGSP', 'ANC', 'OIP', 'WGTP80', 'NP', 'NR', 'LNGI', 'ANC1P', 'HISPEED', 'PLM', 'RAC3P', 'OC', 'LANP', 'FLAPTOPP', 'FPRIVCOVP', 'LANX', 'HUPARC', 'FMVP', 'SVAL', 'PWGTP80', 'RWATPR', 'FWKWP', 'RAC2P', 'FPOWSP', 'SSP', 'R18', 'NAICSP', 'WATFP', 'RMSP', 'FDRATP', 'BDSP', 'FGASP', 'RESMODE', 'FSEXP', 'MHP', 'REFR', 'POWSP', 'FPLMP', 'JWAP', 'DIALUP', 'ELEFP', 'RELSHIPP', 'MIG', 'RC', 'MIL', 'WAOB', 'FHINS7P', 'RT', 'INTP', 'RACASN', 'FRELSHIPP', 'FHISPEEDP', 'FINDP', 'FPAP', 'JWMNP', 'FHINS6P', 'FKITP', 'INSP', 'ST', 'FFINCP', 'YBL', 'FINCP', 'YOEP', 'HINS6', 'FSMXHP', 'HINS7', 'FTABLETP', 'FLANXP', 'GRNTP', 'FCOWP', 'FJWMNP', 'HINS1', 'HINS2', 'HINS3', 'HINS4', 'HINS5', 'FDDRSP', 'FWAGP', 'FGRNTP', 'PUMA', 'FHINS5P', 'FSCHGP', 'FHINS5C', 'FVACSP', 'FHOTWATP', 'R60', 'FMARHDP', 'FMIGSP', 'R65', 'MARHT', 'FHINS4P', 'SERIALNO', 'DECADE', 'MARHW', 'MARHM', 'FGCRP', 'FHINS4C', 'PSF', 'RACSOR', 'NOC', 'POBP', 'CPLT', 'TYPEHUGQ', 'NOP', 'FSMARTPHONP', 'FULP', 'FHINS3P', 'SINK', 'KIT', 'FVALP', 'CONCAT_ID', 'JWDP', 'WKEXREL', 'DPHY', 'FWRKP', 'NPF', 'FHINS3C', 'FDEARP', 'NPP', 'FHINS2P', 'FRWATPRP', 'FSCHLP', 'MLPCD', 'FPARC', 'MIGPUMA', 'HUGCL', 'DDRS', 'MARHD', 'FMRGXP', 'SATELLITE', 'POVPIP', 'FULFP', 'BATH', 'WATP', 'GCM', 'FSATELLITEP', 'GCL', 'FHINS1P', 'WKHP', 'RECORD_TYPE', 'GCR', 'FRNTP', 'NRC', 'HFL', 'FSMP', 'ACCESSINET', 'ADJINC', 'FRMSP', 'POWPUMA', 'PARTNER', 'FELEP', 'CONP', 'FMIGP', 'FFSP', 'FGCMP', 'HISP', 'FESRP', 'HHL', 'AGEP', 'DEYE', 'SEMP', 'HHT', 'OCPIP', 'FGCLP', 'FENGP', 'SCHG', 'FTAXP', 'MSP', 'RACBLK', 'FMRGTP', 'FAGEP', 'SCHL', 'PWGTP9', 'FER', 'MLPFG', 'PWGTP8', 'NATIVITY', 'PWGTP7', 'SMARTPHONE', 'PWGTP6', 'FES', 'PWGTP5', 'PWGTP4', 'VEH', 'PWGTP3', 'FFERP', 'PWGTP2', 'PWGTP1', 'HHT2', 'FYOEP', 'FDREMP', 'NWAB', 'FCONP', 'FYBLP', 'FOIP', 'FHINCP', 'FHISP', 'BROADBND', 'NWAV', 'FDPHYP', 'PAOC', 'FFODP', 'FMARHMP', 'SSIP', 'FJWRIP', 'COMPOTHX', 'FSINKP', 'ELEP', 'FHFLP', 'WIF', 'FSMXSP', 'FOD2P', 'RETP', 'FSSP', 'PLMPRP', 'SCIENGP', 'CITWP', 'FMILSP', 'FPINCP', 'FOD1P', 'FMARP', 'SOCP', 'MLPE', 'MLPH', 'MLPA', 'FSMOCP', 'MLPB', 'FDOUTP', 'PERNP', 'WKL', 'SCH', 'TEL', 'TEN', 'MLPI', 'MLPJ', 'MLPK'])
[6]:
group["AGEP"]
[6]:
{'label': 'Age',
'predicateType': 'int',
'group': 'N/A',
'limit': 0,
'suggested-weight': 'PWGTP',
'values': {'item': {'0': 'Under 1 year'},
'range': [{'min': '1',
'max': '99',
'description': '1 to 99 years (Top-coded)'}]},
'name': 'AGEP'}
Next we will see what geographies are available. Note that PUMS data is available in a lot fewer geography hierarchies than the full ACS5 data set.
[7]:
cgeo.geo_path_snake_specs(DATASET, YEAR)
[7]:
{'020': ['region'],
'030': ['division'],
'040': ['state'],
'795': ['state', 'public_use_microdata_area']}
Query Age and its Suggested Weight at the PUMA Level
[8]:
query_variables = ["AGEP"]
[9]:
variable_weights = {
variable: group[variable]["suggested-weight"] for variable in query_variables
}
unique_weights = list(set(variable_weights.values()))
[10]:
df_acs5_pums = ced.download(
DATASET,
YEAR,
query_variables + unique_weights,
state=STATE,
public_use_microdata_area="*",
)
df_acs5_pums
[10]:
| STATE | PUBLIC_USE_MICRODATA_AREA | AGEP | PWGTP | |
|---|---|---|---|---|
| 0 | 25 | 3900 | 46 | 14 |
| 1 | 25 | 3900 | 46 | 12 |
| 2 | 25 | 3900 | 12 | 13 |
| 3 | 25 | 302 | 52 | 14 |
| 4 | 25 | 302 | 21 | 17 |
| ... | ... | ... | ... | ... |
| 335405 | 25 | 506 | 65 | 12 |
| 335406 | 25 | 4301 | 89 | 20 |
| 335407 | 25 | 400 | 51 | 5 |
| 335408 | 25 | 400 | 56 | 6 |
| 335409 | 25 | 400 | 5 | 6 |
335410 rows × 4 columns
[11]:
# Reformat and rename for easier merging with the map later.
df_acs5_pums["PUMACE"] = df_acs5_pums["PUBLIC_USE_MICRODATA_AREA"].apply(
lambda p: f"{int(p):05d}"
)
Compute Average Age in Each Area
[12]:
df_average_age = (
df_acs5_pums.groupby("PUMACE")
.apply(
lambda g: (g["AGEP"] * g[variable_weights["AGEP"]]).sum()
/ g[variable_weights["AGEP"]].sum()
)
.rename("avg_age")
.reset_index()
)
/var/folders/78/t8hxf7gn23543dfqt1z2m2d00000gn/T/ipykernel_25316/3164945290.py:2: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
df_acs5_pums.groupby("PUMACE")
Load map data and merge in our PUMS data
[13]:
reader = ShapeReader(year=YEAR)
[14]:
gdf_puma = reader.read_shapefile(STATE, "puma")
[15]:
gdf_avg_age = gdf_puma.merge(df_average_age, left_on="PUMACE", right_on="PUMACE")
Plot the Map
[16]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (12, 6)
ax = gdf_avg_age.plot(
"avg_age", cmap="Greens", edgecolor="black", legend=True, linewidth=0.5
)
ax.set_title(
f"Average Age by Public Use Microdata Area in {censusdis.states.NAMES_FROM_IDS[STATE]}"
)
ax.tick_params(
left=False,
right=False,
bottom=False,
labelleft=False,
labelbottom=False,
)
[ ]: