PUMS Demo

Introduction

This notebook demonstrates how to load US Census American Community Survey (ACS) Public-Use Microdata Samples. The process is very much parallel to how we loaded and used US Census redistricting data in the SoMa DIS Demo and Seeing White notebooks.

Imports and configuration

[1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir)))
)
[2]:
import os
import os.path

import pandas as pd

import censusdis.data as ced
import censusdis.geography as cgeo
import censusdis.states
from censusdis.maps import ShapeReader, clip_to_states

import divintseg as dis
[3]:
# Set your API key here.
CENSUS_API_KEY = None
[4]:
YEAR = 2020
DATASET = "acs/acs5/pums"
[5]:
STATE = censusdis.states.STATE_MA

Query Metadata

First we will see what variables are avialable in the dataset.

[6]:
group = ced.variables.get_group(DATASET, YEAR, None)

group.keys()
[6]:
dict_keys(['for', 'in', 'ucgid', 'WKW', 'FBATHP', 'DRIVESP', 'WGTP23', 'WGTP22', 'WGTP25', 'WGTP24', 'RACNH', 'FWATP', 'WGTP21', 'WGTP20', 'WGTP27', 'WGTP26', 'WGTP29', 'WGTP28', 'FBROADBNDP', 'FDRATXP', 'FWKWNP', 'HOTWAT', 'FWKHP', 'FJWDP', 'WORKSTAT', 'FRACP', 'FFULP', 'WGTP34', 'WGTP33', 'WGTP36', 'PINCP', 'WGTP35', 'FPOBP', 'WGTP30', 'WGTP32', 'STOV', 'FMHP', 'WGTP31', 'RACAIAN', 'WGTP38', 'WGTP37', 'WGTP39', 'PUBCOV', 'SRNT', 'SEX', 'WGTP45', 'WGTP44', 'FACCESSP', 'WGTP47', 'DOUT', 'WGTP46', 'WGTP41', 'WGTP40', 'OTHSVCEX', 'WGTP43', 'RACPI', 'WGTP42', 'INDP', 'WGTP49', 'WGTP48', 'PRIVCOV', 'SFN', 'FINTP', 'HUPAC', 'SFR', 'WGTP50', 'FBLDP', 'WGTP56', 'WGTP55', 'WGTP58', 'WGTP57', 'WGTP52', 'WGTP51', 'DEAR', 'WGTP54', 'DIS', 'WGTP53', 'ACR', 'VACS', 'FINSP', 'WGTP59', 'FMILPP', 'MARHYP', 'ADJHSG', 'PAP', 'WGTP7', 'PWGTP30', 'WGTP6', 'PWGTP31', 'HINCP', 'WGTP5', 'PWGTP32', 'WKWN', 'WGTP4', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'WGTP9', 'PWGTP36', 'WGTP8', 'WGTP3', 'WGTP2', 'RACWHT', 'WGTP1', 'GASP', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'FBDSP', 'FWKLP', 'FSCHP', 'FMARHTP', 'PWGTP20', 'PWGTP21', 'GRPIP', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'JWTRNS', 'FRNTMP', 'FOTHSVCEXP', 'CIT', 'LAPTOP', 'FMRGIP', 'JWRIP', 'PWGTP15', 'FCOMPOTHXP', 'PWGTP16', 'FSEMP', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'ENG', 'FLANP', 'PWGTP10', 'PWGTP11', 'FCITWP', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'FSSIP', 'FS', 'DIVISION', 'WRK', 'HICOV', 'DRATX', 'SMOCP', 'VPS', 'NWLA', 'FACRP', 'SCIENGRLP', 'FJWTRNSP', 'WGTP12', 'WGTP11', 'WGTP14', 'WGTP13', 'AGS', 'FSTOVP', 'WGTP10', 'WGTP19', 'WGTP16', 'WGTP15', 'WGTP18', 'WGTP17', 'FMARHWP', 'FTELP', 'PWGTP73', 'PWGTP74', 'QTRBIR', 'PWGTP75', 'FPERNP', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'FDISP', 'PWGTP', 'WAGP', 'RWAT', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'FPLMPRP', 'TAXAMT', 'ANC2P', 'NWLK', 'PWGTP60', 'FPUBCOVP', 'PWGTP61', 'REGION', 'FMARHYP', 'FRETP', 'SMP', 'PWGTP59', 'FTENP', 'BLD', 'MAR', 'SMX', 'FVEHP', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'GASFP', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'VALP', 'PWGTP50', 'FDEYEP', 'FCITP', 'RACNUM', 'PWGTP48', 'PWGTP49', 'SPORDER', 'FANCP', 'PWGTP40', 'PWGTP41', 'DRAT', 'PWGTP42', 'PWGTP43', 'MRGI', 'ESP', 'PWGTP44', 'WGTP', 'PWGTP45', 'OCCP', 'ESR', 'PWGTP46', 'PWGTP47', 'FMRGP', 'MRGP', 'COW', 'TABLET', 'MRGX', 'MULTG', 'MRGT', 'PWGTP37', 'FOCCP', 'PWGTP38', 'PWGTP39', 'FREFRP', 'WGTP61', 'WGTP60', 'WGTP67', 'WGTP66', 'WGTP69', 'WGTP68', 'WGTP63', 'FRWATP', 'WGTP62', 'WGTP65', 'WGTP64', 'RAC1P', 'RNTM', 'DREM', 'MIGSP', 'FHICOVP', 'NWRE', 'FDIALUPP', 'RNTP', 'WGTP70', 'HUPAOC', 'WGTP72', 'WGTP71', 'WGTP78', 'MV', 'WGTP77', 'WGTP79', 'WGTP74', 'WGTP73', 'WGTP76', 'WGTP75', 'FAGSP', 'ANC', 'OIP', 'WGTP80', 'NP', 'NR', 'LNGI', 'ANC1P', 'HISPEED', 'PLM', 'RAC3P', 'OC', 'LANP', 'FLAPTOPP', 'FPRIVCOVP', 'LANX', 'HUPARC', 'FMVP', 'SVAL', 'PWGTP80', 'RWATPR', 'FWKWP', 'RAC2P', 'FPOWSP', 'SSP', 'R18', 'NAICSP', 'WATFP', 'RMSP', 'FDRATP', 'BDSP', 'FGASP', 'RESMODE', 'FSEXP', 'MHP', 'REFR', 'POWSP', 'FPLMP', 'JWAP', 'DIALUP', 'ELEFP', 'RELSHIPP', 'MIG', 'RC', 'MIL', 'WAOB', 'FHINS7P', 'RT', 'INTP', 'RACASN', 'FRELSHIPP', 'FHISPEEDP', 'FINDP', 'FPAP', 'JWMNP', 'FHINS6P', 'FKITP', 'INSP', 'ST', 'FFINCP', 'YBL', 'FINCP', 'YOEP', 'HINS6', 'FSMXHP', 'HINS7', 'FTABLETP', 'FLANXP', 'GRNTP', 'FCOWP', 'FJWMNP', 'HINS1', 'HINS2', 'HINS3', 'HINS4', 'HINS5', 'FDDRSP', 'FWAGP', 'FGRNTP', 'PUMA', 'FHINS5P', 'FSCHGP', 'FHINS5C', 'FVACSP', 'FHOTWATP', 'R60', 'FMARHDP', 'FMIGSP', 'R65', 'MARHT', 'FHINS4P', 'SERIALNO', 'DECADE', 'MARHW', 'MARHM', 'FGCRP', 'FHINS4C', 'PSF', 'RACSOR', 'NOC', 'POBP', 'CPLT', 'TYPEHUGQ', 'NOP', 'FSMARTPHONP', 'FULP', 'FHINS3P', 'SINK', 'KIT', 'FVALP', 'CONCAT_ID', 'JWDP', 'WKEXREL', 'DPHY', 'FWRKP', 'NPF', 'FHINS3C', 'FDEARP', 'NPP', 'FHINS2P', 'FRWATPRP', 'FSCHLP', 'MLPCD', 'FPARC', 'MIGPUMA', 'HUGCL', 'DDRS', 'MARHD', 'FMRGXP', 'SATELLITE', 'POVPIP', 'FULFP', 'BATH', 'WATP', 'GCM', 'FSATELLITEP', 'GCL', 'FHINS1P', 'WKHP', 'RECORD_TYPE', 'GCR', 'FRNTP', 'NRC', 'HFL', 'FSMP', 'ACCESSINET', 'ADJINC', 'FRMSP', 'POWPUMA', 'PARTNER', 'FELEP', 'CONP', 'FMIGP', 'FFSP', 'FGCMP', 'HISP', 'FESRP', 'HHL', 'AGEP', 'DEYE', 'SEMP', 'HHT', 'OCPIP', 'FGCLP', 'FENGP', 'SCHG', 'FTAXP', 'MSP', 'RACBLK', 'FMRGTP', 'FAGEP', 'SCHL', 'PWGTP9', 'FER', 'MLPFG', 'PWGTP8', 'NATIVITY', 'PWGTP7', 'SMARTPHONE', 'PWGTP6', 'FES', 'PWGTP5', 'PWGTP4', 'VEH', 'PWGTP3', 'FFERP', 'PWGTP2', 'PWGTP1', 'HHT2', 'FYOEP', 'FDREMP', 'NWAB', 'FCONP', 'FYBLP', 'FOIP', 'FHINCP', 'FHISP', 'BROADBND', 'NWAV', 'FDPHYP', 'PAOC', 'FFODP', 'FMARHMP', 'SSIP', 'FJWRIP', 'COMPOTHX', 'FSINKP', 'ELEP', 'FHFLP', 'WIF', 'FSMXSP', 'FOD2P', 'RETP', 'FSSP', 'PLMPRP', 'SCIENGP', 'CITWP', 'FMILSP', 'FPINCP', 'FOD1P', 'FMARP', 'SOCP', 'MLPE', 'MLPH', 'MLPA', 'FSMOCP', 'MLPB', 'FDOUTP', 'PERNP', 'WKL', 'SCH', 'TEL', 'TEN', 'MLPI', 'MLPJ', 'MLPK'])
[7]:
group["AGEP"]
[7]:
{'label': 'Age',
 'predicateType': 'int',
 'group': 'N/A',
 'limit': 0,
 'suggested-weight': 'PWGTP',
 'values': {'item': {'0': 'Under 1 year'},
  'range': [{'min': '1',
    'max': '99',
    'description': '1 to 99 years (Top-coded)'}]},
 'name': 'AGEP'}

Next we will see what geographies are available. Note that PUMS data is available in a lot fewer geography hierarchies than the full ACS5 data set.

[8]:
cgeo.geo_path_snake_specs(DATASET, YEAR)
[8]:
{'020': ['region'],
 '030': ['division'],
 '040': ['state'],
 '795': ['state', 'public_use_microdata_area']}

Query Age and its Suggested Weight at the PUMA Level

[9]:
query_variables = ["AGEP"]
[10]:
variable_weights = {
    variable: group[variable]["suggested-weight"] for variable in query_variables
}

unique_weights = list(set(variable_weights.values()))
[11]:
df_acs5_pums = ced.download_detail(
    DATASET,
    YEAR,
    query_variables + unique_weights,
    state=STATE,
    public_use_microdata_area="*",
)

df_acs5_pums
[11]:
STATE PUBLIC_USE_MICRODATA_AREA AGEP PWGTP
0 25 3900 46 14
1 25 3900 46 12
2 25 3900 12 13
3 25 302 52 14
4 25 302 21 17
... ... ... ... ...
335405 25 1300 25 38
335406 25 1300 26 32
335407 25 1300 23 33
335408 25 1300 53 27
335409 25 1300 64 20

335410 rows × 4 columns

[12]:
# Reformat and rename for easier merging with the map later.
df_acs5_pums["PUMACE"] = df_acs5_pums["PUBLIC_USE_MICRODATA_AREA"].apply(
    lambda p: f"{int(p):05d}"
)

Compute Average Age in Each Area

[13]:
df_average_age = (
    df_acs5_pums.groupby("PUMACE")
    .apply(
        lambda g: (g["AGEP"] * g[variable_weights["AGEP"]]).sum()
        / g[variable_weights["AGEP"]].sum()
    )
    .rename("avg_age")
    .reset_index()
)

Load map data and merge in our PUMS data

[14]:
SHAPEFILE_ROOT = os.path.join(os.environ["HOME"], "data", "shapefiles")

# Make sure it is there.
if not os.path.isdir(SHAPEFILE_ROOT):
    os.mkdir(SHAPEFILE_ROOT)
[15]:
reader = ShapeReader(SHAPEFILE_ROOT, year=YEAR)
[16]:
gdf_puma = reader.read_shapefile(STATE, "puma")

Plot the Map

[17]:
gdf_avg_age = gdf_puma.merge(df_average_age, left_on="PUMACE10", right_on="PUMACE")
[18]:
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (12, 6)

ax = gdf_avg_age.plot(
    "avg_age", cmap="Greens", edgecolor="black", legend=True, linewidth=0.5
)

ax.set_title(
    f"Average Age by Public Use Microdata Area in {censusdis.states.STATE_NAMES_FROM_IDS[STATE]}"
)

ax.tick_params(
    left=False,
    right=False,
    bottom=False,
    labelleft=False,
    labelbottom=False,
)
../_images/nb_PUMS_Demo_26_0.png
[ ]: