PACs and Small Dollar Donations: Data Cleaning

Environment Preperation

This section clears the current working environment and loads the packages used to visualize the data. I also create the function comma() to format any in-line output values to have thousands separators and only two digits.

# Clear Environment -----------------------------------------------------------
rm(list = ls())

# Load Packages ---------------------------------------------------------------
packages <- c("tidyverse", "stargazer", "foreign", "sqldf", "readxl", 
              "openintro", "lubridate")
lapply(packages, require, character.only = TRUE)

# Inline Formatting -----------------------------------------------------------
comma <- function(x) format(x, digits = 2, big.mark = ",")

# Set Global Chunk Options ----------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  comment = "##",
  R.options = list(width = 70)
)

Legislator Data

leg.data <- read_csv("Data/Raw Data/house_roster.csv")

leg.data <- 
  leg.data %>%
  filter(congress == 116) %>%
  select(icpsr, opensecrets_id, freshman, cand_type)

PAC Pledge Data

Data on the 2018 Congressional candidates, whether or not they pledged to reject PAC money, and their contribution totals was compiled by OpenSecrets.org and is available on Google Drive here.

members.2018.data.raw <- read_xlsx("Data/Raw Data/OpenSecrets.org _ No Corp PAC Members.xlsx", 
                                   skip = 1,
                                   sheet = "All Cands 2018")
members.2018.data <- 
  members.2018.data.raw %>%
  mutate(NoCorpPACs = as_factor(NoCorpPACs),
         Party = as_factor(Party),
         Party = relevel(Party, ref = "I")) %>%
  rename(new_member = `New Member?`,
         no_corp_pacs = NoCorpPACs,
         opensecrets_id = `Candidate ID`,
         candidae_name = CRPName,
         party = Party,
         district = District,
         gen_election_result = `General Elec Result`,
         vote_pct = VotePercent,
         tot_pac_money = `Total PAC Money`,
         num_pac_contribs = `Number of PAC Contributions`,
         business_pacs = `Business PACs`,
         labor_pacs = `Labor PACs`,
         ideological_pacs = `Ideological PACs`,
         leadership_pacs = `Lead PACs`,
         other_pacs = `Other PACs`,
         net_receipts = `Net Receipts`,
         net_indiv_total = `Net Individual Total`,
         candidate_contribs = `Candidate Contributions`,
         large_indiv = `Large Indivs`,
         small_indiv = `Small Indivs`,
         net_spent = `Net Spent`) %>%
  separate(candidae_name, into = c("first_name", "last_name"), extra = "drop") %>%
  mutate(first_initial = str_sub(first_name, start = 1L, end = 1L),
         last_stub = str_sub(last_name, start = 1L, end = 3L))

members.data <- 
  members.2018.data %>%
  filter(party == "D") %>%
  filter(!is.na(gen_election_result))

Merge with Legislator Data

members.data <- 
  left_join(members.data, leg.data, by = c("opensecrets_id"))

#write_csv(members.data, file = "Data/Raw Data/members_data.csv")

# read in complete data set
member.data <- read_csv("Data/Raw Data/members_data_working.csv")

# identify chambers
member.data <- 
  member.data %>%
  mutate(chamber = ifelse(str_detect(district, "S1") | 
                            str_detect(district, "S2"), "Senate", "House"))

Demographic Data

The majority of demographic data used was compiled by Daily Kos Elections and is posted on Googe Drive here. Data on the total voting age population was collected from the US Census using the R package tidycensus.

demo.data.raw <- read_excel("Data/Raw Data/Demographic Data/115th Congress Members Guide with Elections and Demographic Data by District.xlsx",
                            sheet = "House - Working")

demo.data <- 
  demo.data.raw %>%
  mutate(district_code = str_replace(district_code, pattern = "-", replacement = ""),
         district_code = ifelse(district == "Alaska At-Large", "AK01", district_code),
         district_code = ifelse(district == "Delaware At-Large", "DE01", district_code),
         district_code = ifelse(district == "Montana At-Large", "MT01", district_code),
         district_code = ifelse(district == "North Dakota At-Large", "ND01", district_code),
         district_code = ifelse(district == "South Dakota At-Large", "SD01", district_code),
         district_code = ifelse(district == "Vermont At-Large", "VT01", district_code),
         district_code = ifelse(district == "Wyoming At-Large", "WY01", district_code)) %>%
  rename(state_dist = district_code)

# total voting age pop
library(tidycensus)
v18 <- load_variables(year = 2017, dataset = "acs5/subject", cache = TRUE)
    
voting.age <- get_acs(geography = "congressional district", 
                      variables = "S0101_C01_026",
                      key = "b52b951af88427dcca3cee53db4fb1f76da8009d")

library(openintro)
voting_age <- 
  voting.age %>%
  rename(tot_voting_age = estimate) %>%
  separate(NAME,
           into = c("district", "state"),
           sep = ",",
           extra = "merge") %>%
  mutate(district = str_replace(district, "\\(116th Congress\\)", ""),
         district = str_replace(district, "Congressional District ", ""),
         district = str_trim(district, side = "both"),
         district = str_pad(district, width = 2, side = "left", pad = "0"),
         state = str_trim(state, side = "both"),
         state_dist = paste(state2abbr(state), district, sep = ""),
         state_dist = str_replace(state_dist, pattern = "\\(at Large\\)", "01")) %>%
  dplyr::select(-variable, -moe, -GEOID, -district, -state)

demo.data <- left_join(demo.data, voting_age, by = c("state_dist"))

Merge Legislator Data with Demographic Data

sum(duplicated(members.2018.data[ , c("district")]))
## [1] 0
sum(duplicated(demo.data[ , c("district")]))
## [1] 0
member.data <- 
  left_join(member.data, demo.data, by = c("district" = "state_dist"))

Election Data

Data on each candidates’ total number of votes for the 2018 midterm election was collected from the MIT Election Data + Science Lab.

# House Elections
election.data.raw <- 
  read_csv("Data/Raw Data/Election Data/1976-2018-house.csv") %>% 
  filter(year == 2018) %>% 
  filter(stage == "gen")

election.data <- 
  election.data.raw %>%
  mutate(district = str_pad(district, width = 2, side = "left", pad = "0"),
         vote_pct = candidatevotes / totalvotes,
         first_initial = str_sub(candidate, start = 1L, end = 1L)) %>%
  filter(!is.na(candidate)) %>%
  separate(candidate, into = c("first_name", "last_name"), extra = "drop") %>%
  mutate(last_stub = str_sub(last_name, start = 1L, end = 3L),
         state_dist = paste(state_po, district, sep = ""),
         party = case_when(party == "democrat" ~ "D",
                           party == "republican" ~ "R")) %>%
  filter(!is.na(party)) %>%
  dplyr::select(state_dist, party, first_initial, first_name, last_name, last_stub,
         candidatevotes, totalvotes, office)

election.data <- 
  election.data %>%
  mutate(state_dist = ifelse(state_dist == "AK00", "AK01", state_dist),
         state_dist = ifelse(state_dist == "DE00", "DE01", state_dist),
         state_dist = ifelse(state_dist == "MT00", "MT01", state_dist),
         state_dist = ifelse(state_dist == "ND00", "ND01", state_dist),
         state_dist = ifelse(state_dist == "SD00", "SD01", state_dist),
         state_dist = ifelse(state_dist == "VT00", "VT01", state_dist),
         state_dist = ifelse(state_dist == "WY00", "WY01", state_dist))

Merge Election Data with Contribution Data

# house elections
sum(duplicated(election.data[ , c("state_dist", "party", "first_initial", "last_stub")]))
## [1] 0
sum(duplicated(member.data[ , c("district", "party", "first_initial", "last_stub")]))
## [1] 0
member.data <- 
  left_join(member.data, election.data,
  by = c("district" = "state_dist", "party", "first_initial", "last_stub"))

member.data <- 
  member.data %>%
  mutate(new_member = as_factor(new_member),
         party = as_factor(party),
         vap_turnout = candidatevotes / tot_voting_age) %>%
  dplyr::select(opensecrets_id, first_name.x, last_name.x, district, party, 
                new_member, no_corp_pacs, tot_pac_money, num_pac_contribs, 
                business_pacs, labor_pacs, ideological_pacs, leadership_pacs, 
                other_pacs, net_receipts, net_indiv_total, candidate_contribs, 
                large_indiv, small_indiv, net_spent, vote_pct, vap_turnout, 
                median_hh_income, pct_clinton_16, pct_obama_12, 
                pct_dem_house_16,pct_dem_house_14, pct_asian, pct_white, 
                pct_black, pct_latino, pct_bachelors, tot_voting_age, chamber,
                last_stub, first_initial) %>%
  rename(first_name = first_name.x,
         last_name = last_name.x)

Contribution Data

This contribution data come from OpenSecrets.org.

Individual Contributions

# Industry Categories
industry.cats <- read_excel("Data/Raw Data/CRP_industry_codes.xlsx")

industry.cats <- 
  industry.cats %>%
  filter(str_detect(Catcode, "0000")) %>%
  mutate(industry_letter = str_sub(Catcode, start = 1L, end = 1L)) %>%
  select(-Catcode, -Catorder)

# Individual Contribution data
indiv.contrib.data.raw <- 
  read_delim("Data/Raw Data/indivs18.txt", 
             delim = ",",
             quote = "|", 
             col_names = FALSE, 
             num_threads = 4) %>%
  rename(year = X1,
         fec_trans_id = X2,
         donor_id = X3,
         contrib_name = X4,
         receipt_id = X5,
         donor_org_name = X6,
         standard_org = X7,
         industry_code = X8,
         contrib_date = X9,
         contrib_amount = X10,
         contrib_city = X12,
         contrib_state = X13, 
         contrib_zip = X14,
         transaction_type = X16) %>%
  mutate(contrib_type = "Individual",
         contrib_date = str_replace(contrib_date, ",,", ","),
         industry_code = str_to_upper(industry_code),
         industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
  filter(str_detect(industry_code, pattern = "^Z4", negate = TRUE)) %>%
  filter(str_detect(industry_code, pattern = "^Z9", negate = TRUE)) %>%
  filter(str_detect(industry_code, pattern = "^z4", negate = TRUE)) %>%
  filter(str_detect(industry_code, pattern = "^z9", negate = TRUE)) %>%
  filter(str_detect(receipt_id, "^N")) %>%
  separate(contrib_date, into = c("drop", "contrib_date", "contrib_amount"), sep = ",") %>%
  mutate(contrib_amount, as.numeric(contrib_amount),
         contrib_data = mdy(contrib_date))

# code transaction types
indiv.contribs <- 
  indiv.contrib.data.raw %>%
  mutate(transaction_type = factor(transaction_type, 
                                   levels = c("10", "11", "15 ", "15E", "15I", "15J", "15T",
                                              "19J", "20Y", "22Y"),
                                   labels = c("Contrib. to Indp. Exp. Cmte.",
                                              "Native American Tribe Contribution",
                                              "Contrib. to Political Cmte.",
                                              "Earmarked Contrib. to Political Cmte.",
                                              "Earmarked Contrib. Check",
                                              "Pct. Recipient Contrib. to Joint Fund. Cmte.",
                                              "Earmarked Contrib. Treasury",
                                              "Pct. Recipient Contrib. Electioneering",
                                              "Nonfederal Refund",
                                              "Contrib. Refund"))) %>%
  filter(str_detect(transaction_type, "Contrib. to Indp. Exp. Cmte.", negate = TRUE)) %>%
  filter(str_detect(transaction_type, "Pct. Recipient Contrib. to Joint Fund. Cmte.", 
                    negate = TRUE)) %>%
  filter(str_detect(transaction_type, "Pct. Recipient Contrib. Electioneering", 
                    negate = TRUE)) %>%
  filter(str_detect(transaction_type, "Nonfederal Refund", negate = TRUE)) %>%
  filter(str_detect(transaction_type, "Contrib. Refund", negate = TRUE))

# filter out non-individual recipients
indiv.contrib.data <- 
  indiv.contrib.data.raw %>%
  filter(str_detect(receipt_id, "^N"))

# aggregate data to recepient-level
indiv.contrib.data <- 
  indiv.contrib.data %>%
  mutate(fec_trans_id = as.character(fec_trans_id),
         contrib_amount = as.numeric(contrib_amount),
         contrib_date = mdy(contrib_date)) %>%
  group_by(receipt_id, industry_code) %>%
  summarise(year = first(year),
            contrib_amount = sum(contrib_amount)) %>%
  mutate(contrib_type = "Individual") %>%
  arrange(receipt_id, industry_code, year)

# remove non-contributions
indiv.contrib.data <- 
  indiv.contrib.data %>%
  mutate(industry_code = str_to_upper(industry_code),
         industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
  filter(industry_code != "Z9100") %>%
  filter(industry_code != "Z9500") %>%
  filter(industry_code != "Z9600") %>%
  filter(industry_code != "Z9700") %>%
  filter(industry_code != "Z9800") %>%
  filter(industry_code != "Z9999") %>%
  mutate(industry_letter = ifelse(industry_code == "Z1000" | 
                                    industry_code == "Z1100" | 
                                    industry_code == "Z1200" |
                                    industry_code == "Z1300" |
                                    industry_code == "Z1400", "CC", industry_letter),
         industry_letter = ifelse(industry_code == "Z4100" | 
                                    industry_code == "Z4200" | 
                                    industry_code == "Z4300" |
                                    industry_code == "Z4400" |
                                    industry_code == "Z4500", "JCC", industry_letter),
         industry_letter = ifelse(industry_code == "Z5000" | 
                                    industry_code == "Z5100" | 
                                    industry_code == "Z5200" |
                                    industry_code == "Z5300", "PC", industry_letter),
         industry_letter = ifelse(industry_code == "Z9000", "Self-finance", industry_letter)) %>%
  group_by(receipt_id, industry_letter) %>%
  summarise(contrib_amount = sum(contrib_amount)) %>%
  arrange(receipt_id, industry_letter)

## Merge with contribution data
indiv.contrib.data <- left_join(
  indiv.contrib.data,
  industry.cats,
  by = c("industry_letter")
)

indiv.contrib.data <- 
  indiv.contrib.data %>%
  mutate(Catname = ifelse(industry_letter == "Z", "Committees", Catname)) %>%
  rename(industry_name = Catname,
         industry = Industry,
         sector = Sector,
         sector_long = `Sector Long`)

indiv.contrib.data <- 
  indiv.contrib.data %>%
  mutate(industry_name = ifelse(industry_letter == "Self-finance", 
                                "indiv_self_finance",
                                industry_name),
         industry_name = ifelse(industry_letter == "CC", 
                                "indiv_cand_committee",
                                industry_name),
         industry_name = ifelse(industry_letter == "JCC", 
                                "indiv_jnt_cand_committee",
                                industry_name),
         industry_name = ifelse(industry_letter == "PC", 
                                "indiv_party_committee",
                                industry_name))

sum(duplicated(indiv.contrib.data[ , c("receipt_id", "industry_letter")]))
## [1] 0
indiv.contrib.data <- 
  indiv.contrib.data %>%
  filter(!is.na(industry_name)) %>%
  select(receipt_id, industry_name, contrib_amount) %>%
  spread(key = industry_name, value = contrib_amount) %>%
  rename(indiv_ag = Agriculture,
         indiv_comms_electric = `Communications & Electronics`,
         indiv_const_pub_wrks = `Construction & Public Works`,
         indiv_defense = Defense,
         indiv_engy_res_envrmt = `Energy, Natural Resources and Environment`,
         indiv_fncs_ins_real = `Finance, Insurance & Real Estate`,
         indiv_commerce = `General commerce`,
         indiv_hlth_edu_human = `Health, Education & Human Resources`,
         indiv_ideological = `Ideological & Single Issue PACs`,
         indiv_labor = `Labor Unions`,
         indiv_legal = `Legal Services`,
         indiv_manu = Manufacturing,
         indiv_other = Other,
         indiv_trans = Transportation,
         indiv_unknown = Unknown) %>%
  mutate_at(vars(indiv_ag:indiv_unknown), funs(ifelse(is.na(.), 0, .))) %>%
  arrange(receipt_id)

indiv.contrib.data <- 
  indiv.contrib.data %>%
  mutate(indiv_biz = sum(indiv_ag, indiv_comms_electric, indiv_const_pub_wrks,
                         indiv_defense, indiv_engy_res_envrmt, indiv_legal,
                         indiv_fncs_ins_real, indiv_commerce, indiv_manu,
                         indiv_hlth_edu_human, indiv_trans)) %>%
  select(receipt_id, indiv_biz, indiv_labor, indiv_ideological, 
         indiv_cand_committee, indiv_party_committee)

PAC Contributions

## 2018 - Congress 116
pac.contrib.data.raw <- 
  read_delim("Data/Raw Data/pacs18.txt", 
             delim = ",",
             quote = "|",
             col_names = FALSE) %>%
  rename(year = X1,
         fec_trans_id = X2,
         pac_id = X3,
         receipt_id = X4,
         industry_code = X7,
         trans_type = X8,
         direct_indirect = X9,
         contrib_date = X6,
         contrib_amount = X5) %>%
  select(year, fec_trans_id, pac_id, receipt_id, industry_code, trans_type, 
         direct_indirect, contrib_date, contrib_amount) %>%
  # separate(contrib_date, into = c("drop", "contrib_amount", "contrib_date"), sep = ",") %>%
  mutate(fec_trans_id = as.character(fec_trans_id),
         contrib_amount = as.numeric(contrib_amount),
         contrib_date = mdy(contrib_date),
         trans_type = factor(trans_type, 
                             levels = c("24A", "24C", "24E", "24F", "24K", "24N", "24Z"),
                             labels = c("independent expenditure against the candidate",
                                        "coordinated expenditure",
                                        "independent expenditure for the candidate",
                                        "communication cost for the candidate",
                                        "direct contribution",
                                        "communication cost against the candidate",
                                        "in kind contribution")),
         direct_indirect = factor(direct_indirect,
                                  levels = c("D", "I"),
                                  labels = c("Direct", "Indirect"))) %>%
  mutate(contrib_type = "PAC") %>%
  filter(trans_type == "independent expenditure for the candidate" | 
           trans_type == "direct contribution" | 
           trans_type == "in kind contribution" |
           trans_type == "coordinated expenditure") %>%
  arrange(receipt_id, industry_code, year) 

# filter out non-individual recipients
pac.contrib.data <- 
  pac.contrib.data.raw %>%
  filter(str_detect(receipt_id, "^N"))

pac.contrib.data <- 
  pac.contrib.data.raw %>%
  mutate(industry_code = str_to_upper(industry_code),
         industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
  filter(industry_code != "Z9100") %>%
  filter(industry_code != "Z9500") %>%
  filter(industry_code != "Z9600") %>%
  filter(industry_code != "Z9700") %>%
  filter(industry_code != "Z9800") %>%
  filter(industry_code != "Z9999") %>%
  mutate(industry_letter = ifelse(industry_code == "Z1000" | 
                                    industry_code == "Z1100" | 
                                    industry_code == "Z1200" |
                                    industry_code == "Z1300" |
                                    industry_code == "Z1400", "pac_cand_committee", industry_letter),
         industry_letter = ifelse(industry_code == "Z4100" | 
                                    industry_code == "Z4200" | 
                                    industry_code == "Z4300" |
                                    industry_code == "Z4400" |
                                    industry_code == "Z4500", "pac_jnt_cand_committee", industry_letter),
         industry_letter = ifelse(industry_code == "Z5000" | 
                                    industry_code == "Z5100" | 
                                    industry_code == "Z5200" |
                                    industry_code == "Z5300", "pac_party_committee", industry_letter),
         industry_letter = ifelse(industry_code == "Z9000", "Self-finance", industry_letter)) %>%
  group_by(receipt_id, industry_letter, trans_type) %>%
  summarise(contrib_amount = sum(contrib_amount)) %>%
  arrange(receipt_id, industry_letter)

## Merge with contribution data
pac.contrib.data <- left_join(
  pac.contrib.data,
  industry.cats,
  by = c("industry_letter")
)

pac.contrib.data <- 
  pac.contrib.data %>%
  rename(industry_name = Catname,
         industry = Industry,
         sector = Sector,
         sector_long = `Sector Long`)

sum(duplicated(pac.contrib.data[ , c("receipt_id", "industry_letter", "trans_type")]))
## [1] 0
pac.contrib.data <- 
  pac.contrib.data %>%
  filter(!is.na(industry_letter)) %>%
  select(receipt_id, contrib_amount, industry_letter, trans_type) %>%
  unite(col = "group", trans_type, industry_letter, sep = "-") %>%
  spread(key = group, value = contrib_amount) %>%
  rename(dir_pac_ag = `direct contribution-A`,
         dir_pac_comms_electric = `direct contribution-C`,
         dir_pac_const_pub_wrks = `direct contribution-B`,
         dir_pac_defense = `direct contribution-D`,
         dir_pac_engy_res_envrmt = `direct contribution-E`,
         dir_pac_fncs_ins_real = `direct contribution-F`,
         dir_pac_commerce = `direct contribution-G`,
         dir_pac_hlth_edu_human = `direct contribution-H`,
         dir_pac_ideological = `direct contribution-J`,
         dir_pac_labor = `direct contribution-L`,
         dir_pac_legal = `direct contribution-K`,
         dir_pac_manu = `direct contribution-M`,
         dir_pac_other = `direct contribution-X`,
         dir_pac_trans = `direct contribution-T`,
         dir_pac_unknown = `direct contribution-Y`,
         dir_pac_cand_committee = `direct contribution-pac_cand_committee`,
         dir_pac_party_committee = `direct contribution-pac_party_committee`,
         ik_pac_ag = `in kind contribution-A`,
         ik_pac_comms_electric = `in kind contribution-C`,
         ik_pac_const_pub_wrks = `in kind contribution-B`,
         ik_pac_defense = `in kind contribution-D`,
         ik_pac_engy_res_envrmt = `in kind contribution-E`,
         ik_pac_fncs_ins_real = `in kind contribution-F`,
         ik_pac_commerce = `in kind contribution-G`,
         ik_pac_hlth_edu_human = `in kind contribution-H`,
         ik_pac_ideological = `in kind contribution-J`,
         ik_pac_labor = `in kind contribution-L`,
         ik_pac_legal = `in kind contribution-K`,
         ik_pac_manu = `in kind contribution-M`,
         ik_pac_other = `in kind contribution-X`,
         ik_pac_trans = `in kind contribution-T`,
         ik_pac_cand_committee = `in kind contribution-pac_cand_committee`,
         ik_pac_party_committee = `in kind contribution-pac_party_committee`,
         ind_pac_engy_res_envrmt = `independent expenditure for the candidate-E`,
         ind_pac_fncs_ins_real = `independent expenditure for the candidate-F`,
         ind_pac_commerce = `independent expenditure for the candidate-G`,
         ind_pac_hlth_edu_human = `independent expenditure for the candidate-H`,
         ind_pac_ideological = `independent expenditure for the candidate-J`,
         ind_pac_labor = `independent expenditure for the candidate-L`,
         ind_pac_manu = `independent expenditure for the candidate-M`,
         ind_pac_unknown = `independent expenditure for the candidate-Y`,
         ind_pac_party_committee = `independent expenditure for the candidate-pac_party_committee`,
         ce_pac_ideological= `coordinated expenditure-J`,
         ce_pac_legal = `coordinated expenditure-L`,
         ce_pac_party_committee = `coordinated expenditure-pac_party_committee`) %>%
  mutate_at(vars(ce_pac_ideological:ind_pac_unknown), funs(ifelse(is.na(.), 0, .))) %>%
  arrange(receipt_id)

pac.contrib.data <- 
  pac.contrib.data %>%
  mutate(dir_business_pacs = sum(dir_pac_ag, dir_pac_comms_electric, 
                                 dir_pac_const_pub_wrks, dir_pac_defense,
                                 dir_pac_engy_res_envrmt, dir_pac_fncs_ins_real,
                                 dir_pac_commerce, dir_pac_hlth_edu_human,
                                 dir_pac_legal, dir_pac_manu, dir_pac_other,
                                 dir_pac_trans, dir_pac_unknown,
                                 na.rm = TRUE),
         in_kind_business_pacs = sum(ik_pac_ag, ik_pac_commerce, 
                                     ik_pac_const_pub_wrks, ik_pac_defense,
                                     ik_pac_engy_res_envrmt, 
                                     ik_pac_fncs_ins_real, ik_pac_commerce, 
                                     ik_pac_hlth_edu_human, ik_pac_legal,
                                     ik_pac_manu, ik_pac_other, ik_pac_trans,
                                     na.rm = TRUE),
         ind_business_pacs = sum(ind_pac_engy_res_envrmt, ind_pac_fncs_ins_real,
                                  ind_pac_commerce, ind_pac_hlth_edu_human, 
                                  ind_pac_manu, ind_pac_unknown,
                                  na.rm = TRUE)) %>%
  select(receipt_id, dir_business_pacs, in_kind_business_pacs, 
         ind_business_pacs, dir_pac_cand_committee, dir_pac_party_committee,
         ik_pac_party_committee, ind_pac_party_committee, 
         ce_pac_party_committee, dir_pac_labor, ik_pac_labor, ind_pac_labor,
         dir_pac_ideological, ik_pac_ideological, ind_pac_ideological, 
         ce_pac_ideological)

Combine Contribution Data

# Combine data
contrib.data <- left_join(
  indiv.contrib.data,
  pac.contrib.data,
  by = c("receipt_id")
)

save(contrib.data, file = "Data/Clean Data/contribution_data.Rda")

Merge with Legislator Data

# load clean contribution data
load("Data/Raw Data/contrib_data.Rda")

member.data <- 
  left_join(member.data, contrib.data, by = c("opensecrets_id" = "receipt_id"))

Select Final Data Set

estimation.data <- 
  member.data %>%
  filter(party == "D") %>%
  select(district, no_corp_pacs, new_member, tot_pac_money, num_pac_contribs, 
         business_pacs, ideological_pacs, in_kind_business_pacs, ind_against_business_pacs,
         ind_for_business_pacs, party_exp, leadership_pacs, net_indiv_total, 
         large_indiv, small_indiv, indiv_ideological, indiv_cand_committee, 
         indiv_party_committee, indiv_biz, indiv_labor, 
         net_spent, net_receipts, vote_pct, vap_turnout, median_hh_income, 
         pct_clinton_16, pct_dem_house_16, pct_asian, pct_white, pct_black, 
         pct_latino, pct_bachelors, tot_voting_age, chamber) %>%
  mutate(vote_pct = vote_pct * 100,
         vap_turnout = vap_turnout * 100,
         log_median_hh_income = log(median_hh_income),
         log_tot_voting_age = log(tot_voting_age))

save(estimation.data, file = "Data/Clean Data/estimation_data2.Rda")

Final Data Set Summary Statistics

psych::describeBy(estimation.data, group = estimation.data$no_corp_pacs)
## 
##  Descriptive statistics by group 
## group: NO
##                           vars   n       mean         sd     median
## district*                    1 211     106.00      61.05     106.00
## no_corp_pacs*                2 211       1.00       0.00       1.00
## new_member*                  3 211       1.14       0.35       1.00
## tot_pac_money                4 211  709462.41  491163.01  573044.00
## num_pac_contribs             5 211     395.00     248.65     340.00
## business_pacs                6 211  471328.24  397024.94  369249.00
## ideological_pacs             7 211   45221.90   47966.69   24235.00
## in_kind_business_pacs        8 211     599.35    1562.72      58.00
## ind_against_business_pacs    9 211       0.47       6.88       0.00
## ind_for_business_pacs       10 211    4838.84   42991.98       0.00
## party_exp                   11 211    3198.88   24028.13       0.00
## leadership_pacs             12 211   55338.02   94753.56   14500.00
## net_indiv_total             13 211 1809157.48 3516390.94  630577.58
## large_indiv                 14 211 1408868.27 2463015.29  574789.37
## small_indiv                 15 211  400246.03 1132144.74   76499.88
## indiv_ideological           16 211  149021.16  358058.50   37935.00
## indiv_cand_committee        17 211      36.79     266.56       0.00
## indiv_party_committee       18 211       4.74      68.84       0.00
## indiv_biz                   19 211 1040628.14 1911582.76  403785.00
## indiv_labor                 20 211    1257.95    4670.65       0.00
## net_spent                   21 211 2334356.80 3920833.74 1134291.71
## net_receipts                22 211 2818376.65 4454548.66 1373799.04
## vote_pct                    23 211      64.99      19.01      65.20
## vap_turnout                 24 137      28.49       7.54      29.28
## median_hh_income            25 191   63081.43   19196.73   60756.00
## pct_clinton_16              26 191      62.51      12.70      60.57
## pct_dem_house_16            27 191      67.33      17.29      66.70
## pct_asian                   28 191       6.41       7.54       3.40
## pct_white                   29 191      57.00      23.64      59.74
## pct_black                   30 191      16.64      18.19       8.79
## pct_latino                  31 191      17.73      18.58      10.66
## pct_bachelors               32 191      33.38      11.96      32.50
## tot_voting_age              33 191  580685.53   41062.65  577328.00
## chamber*                    34 211       1.09       0.28       1.00
## log_median_hh_income        35 191      11.01       0.29      11.01
## log_tot_voting_age          36 191      13.27       0.07      13.27
##                              trimmed       mad       min         max
## district*                     106.00     78.58      1.00      211.00
## no_corp_pacs*                   1.00      0.00      1.00        1.00
## new_member*                     1.05      0.00      1.00        2.00
## tot_pac_money              628616.33 326357.32  17000.00  2673386.00
## num_pac_contribs              361.82    195.70      9.00     1346.00
## business_pacs              413386.73 296666.78      0.00  2216109.00
## ideological_pacs            36080.21  21512.53      0.00   263906.00
## in_kind_business_pacs         288.99     85.99  -4350.00    13569.00
## ind_against_business_pacs       0.00      0.00      0.00      100.00
## ind_for_business_pacs           0.00      0.00      0.00   571700.00
## party_exp                       0.00      0.00      0.00   283737.00
## leadership_pacs             31648.14  18532.50      0.00   467700.00
## net_indiv_total            966185.07 575054.05  27487.08 25591510.53
## large_indiv                804278.58 526129.30  23050.00 15346739.43
## small_indiv                136926.18  91801.87    813.12 10244760.10
## indiv_ideological           70052.61  49570.73      0.00  3353000.00
## indiv_cand_committee            0.00      0.00      0.00     3500.00
## indiv_party_committee           0.00      0.00      0.00     1000.00
## indiv_biz                  592428.17 366497.24  19765.00 12825778.00
## indiv_labor                   342.57      0.00      0.00    53775.00
## net_spent                 1394563.31 721300.32 173085.21 26747405.61
## net_receipts              1747834.45 865030.40 232915.61 29044284.32
## vote_pct                       66.19     13.34      0.00      100.00
## vap_turnout                    28.76      6.28      0.00       48.82
## median_hh_income            61152.41  16963.91  29234.00   125790.00
## pct_clinton_16                 62.15     12.97     31.03       93.77
## pct_dem_house_16               67.19     13.91      0.00      100.00
## pct_asian                       4.92      3.23      0.37       56.17
## pct_white                      57.59     30.22      3.49       95.42
## pct_black                      13.48      8.39      0.63       66.44
## pct_latino                     14.15     10.81      0.92       78.98
## pct_bachelors                  32.67     11.71      9.30       72.20
## tot_voting_age             578898.86  34033.08 427435.00   763472.00
## chamber*                        1.00      0.00      1.00        2.00
## log_median_hh_income           11.00      0.29     10.28       11.74
## log_tot_voting_age             13.27      0.06     12.97       13.55
##                                 range  skew kurtosis        se
## district*                      210.00  0.00    -1.22      4.20
## no_corp_pacs*                    0.00   NaN      NaN      0.00
## new_member*                      1.00  2.09     2.38      0.02
## tot_pac_money              2656386.00  1.70     3.13  33813.06
## num_pac_contribs              1337.00  1.30     1.82     17.12
## business_pacs              2216109.00  1.60     3.12  27332.32
## ideological_pacs            263906.00  1.82     3.38   3302.16
## in_kind_business_pacs        17919.00  4.64    30.26    107.58
## ind_against_business_pacs      100.00 14.32   204.03      0.47
## ind_for_business_pacs       571700.00 11.46   142.67   2959.69
## party_exp                   283737.00  9.38    95.69   1654.16
## leadership_pacs             467700.00  2.50     6.06   6523.10
## net_indiv_total           25564023.45  4.03    18.23 242078.34
## large_indiv               15323689.43  3.52    13.20 169560.97
## small_indiv               10243946.98  5.41    35.06  77940.06
## indiv_ideological          3353000.00  5.39    36.25  24649.76
## indiv_cand_committee          3500.00 10.93   133.81     18.35
## indiv_party_committee         1000.00 14.32   204.03      4.74
## indiv_biz                 12806013.00  3.99    17.59 131598.78
## indiv_labor                  53775.00  8.07    78.79    321.54
## net_spent                 26574320.40  3.88    15.86 269921.33
## net_receipts              28811368.71  3.69    14.46 306663.78
## vote_pct                       100.00 -1.48     4.20      1.31
## vap_turnout                     48.82 -0.89     2.88      0.64
## median_hh_income             96556.00  0.84     0.26   1389.03
## pct_clinton_16                  62.74  0.18    -0.43      0.92
## pct_dem_house_16               100.00 -0.27     1.34      1.25
## pct_asian                       55.81  2.85    11.42      0.55
## pct_white                       91.93 -0.21    -1.14      1.71
## pct_black                       65.81  1.33     0.39      1.32
## pct_latino                      78.06  1.54     1.50      1.34
## pct_bachelors                   62.90  0.51     0.06      0.87
## tot_voting_age              336037.00  0.53     2.27   2971.19
## chamber*                         1.00  2.95     6.72      0.02
## log_median_hh_income             1.46  0.20    -0.51      0.02
## log_tot_voting_age               0.58  0.10     2.15      0.01
## ---------------------------------------------------- 
## group: YES
##                           vars  n       mean         sd     median
## district*                    1 48      24.50      14.00      24.50
## no_corp_pacs*                2 48       1.00       0.00       1.00
## new_member*                  3 48       1.73       0.45       2.00
## tot_pac_money                4 48  235689.25  210827.95  207816.50
## num_pac_contribs             5 48     119.27     107.43      96.00
## business_pacs                6 48   47284.08  113392.45    6750.00
## ideological_pacs             7 48   60837.83   47760.99   59888.00
## in_kind_business_pacs        8 48     201.27     869.54       0.00
## ind_against_business_pacs    9 48       0.00       0.00       0.00
## ind_for_business_pacs       10 48       0.00       0.00       0.00
## party_exp                   11 48    2237.00   15486.16       0.00
## leadership_pacs             12 48   61250.58   64450.14   61850.00
## net_indiv_total             13 48 4001322.39 4912004.29 2765514.13
## large_indiv                 14 48 2764406.94 2471439.59 2178961.28
## small_indiv                 15 48 1236914.83 2837615.74  537468.26
## indiv_ideological           16 48  298297.29  299352.89  213393.00
## indiv_cand_committee        17 48       0.00       0.00       0.00
## indiv_party_committee       18 48       0.00       0.00       0.00
## indiv_biz                   19 48 2019083.29 1622332.35 1737106.50
## indiv_labor                 20 48    2177.69    2646.17    1175.00
## net_spent                   21 48 4144274.62 3964656.57 3239642.83
## net_receipts                22 48 5239448.54 5796262.66 3963065.91
## vote_pct                    23 48      58.23      14.86      53.50
## vap_turnout                 24 30      27.23       7.61      26.55
## median_hh_income            25 44   68972.59   17808.41   69511.50
## pct_clinton_16              26 44      54.16      12.99      49.88
## pct_dem_house_16            27 44      53.88      21.01      45.43
## pct_asian                   28 44       7.49       9.40       4.43
## pct_white                   29 44      67.24      19.53      71.30
## pct_black                   30 44       8.44       9.50       5.09
## pct_latino                  31 44      14.03      14.75       7.51
## pct_bachelors               32 44      36.43      11.99      36.75
## tot_voting_age              33 44  574978.18   43770.18  573281.00
## chamber*                    34 48       1.08       0.28       1.00
## log_median_hh_income        35 44      11.11       0.26      11.15
## log_tot_voting_age          36 44      13.26       0.08      13.26
##                              trimmed        mad       min         max
## district*                      24.50      17.79      1.00       48.00
## no_corp_pacs*                   1.00       0.00      1.00        1.00
## new_member*                     1.77       0.00      1.00        2.00
## tot_pac_money              207373.10  202354.88      0.00   967546.00
## num_pac_contribs              102.70      77.84      4.00      516.00
## business_pacs               16216.78   10007.55      0.00   479465.00
## ideological_pacs            57173.20   51358.01      0.00   184939.00
## in_kind_business_pacs           2.25       0.00   -116.00     5200.00
## ind_against_business_pacs       0.00       0.00      0.00        0.00
## ind_for_business_pacs           0.00       0.00      0.00        0.00
## party_exp                       0.00       0.00      0.00   107293.00
## leadership_pacs             52394.98   70349.37      0.00   282880.00
## net_indiv_total           3082167.15 2289393.28 352468.68 29600511.44
## large_indiv               2369043.96 1843908.10 219166.18 11909673.44
## small_indiv                671508.69  548124.58  15227.27 19012867.30
## indiv_ideological          258620.42  264246.02  14739.00  1659605.00
## indiv_cand_committee            0.00       0.00      0.00        0.00
## indiv_party_committee           0.00       0.00      0.00        0.00
## indiv_biz                 1819291.75 1552629.13 119428.00  8434746.00
## indiv_labor                  1750.07    1742.05      0.00    11279.00
## net_spent                 3454411.88 2922365.85 578179.05 20475491.00
## net_receipts              4153662.11 3272074.02 632548.22 34114039.97
## vote_pct                       57.23       4.74      0.00       97.40
## vap_turnout                    26.35       3.87     11.53       51.03
## median_hh_income            68313.86   18035.83  33995.00   121150.00
## pct_clinton_16                 52.54       8.73     39.32       86.89
## pct_dem_house_16               51.97      11.33      0.00      100.00
## pct_asian                       5.46       4.09      0.42       45.37
## pct_white                      68.26      23.13     29.11       96.02
## pct_black                       6.89       4.32      0.44       56.63
## pct_latino                     11.48       7.29      0.96       58.66
## pct_bachelors                  36.46      10.30      7.90       61.00
## tot_voting_age             575806.28   32506.75 424919.00   675408.00
## chamber*                        1.00       0.00      1.00        2.00
## log_median_hh_income           11.12       0.26     10.43       11.70
## log_tot_voting_age             13.26       0.06     12.96       13.42
##                                 range  skew kurtosis        se
## district*                       47.00  0.00    -1.28      2.02
## no_corp_pacs*                    0.00   NaN      NaN      0.00
## new_member*                      1.00 -1.00    -1.02      0.06
## tot_pac_money               967546.00  1.34     2.01  30430.39
## num_pac_contribs               512.00  1.57     2.77     15.51
## business_pacs               479465.00  2.77     6.33  16366.79
## ideological_pacs            184939.00  0.53    -0.52   6893.70
## in_kind_business_pacs         5316.00  4.67    22.02    125.51
## ind_against_business_pacs        0.00   NaN      NaN      0.00
## ind_for_business_pacs            0.00   NaN      NaN      0.00
## party_exp                   107293.00  6.50    41.12   2235.23
## leadership_pacs             282880.00  1.49     2.85   9302.58
## net_indiv_total           29248042.76  3.46    14.04 708986.75
## large_indiv               11690507.26  1.83     3.78 356721.58
## small_indiv               18997640.03  5.25    29.35 409574.55
## indiv_ideological          1644866.00  2.08     6.58  43207.87
## indiv_cand_committee             0.00   NaN      NaN      0.00
## indiv_party_committee            0.00   NaN      NaN      0.00
## indiv_biz                  8315318.00  1.48     3.15 234163.50
## indiv_labor                  11279.00  1.53     1.88    381.94
## net_spent                 19897311.95  2.08     5.14 572248.88
## net_receipts              33481491.75  3.01    11.13 836618.45
## vote_pct                        97.40 -0.27     4.18      2.14
## vap_turnout                     39.50  1.39     3.13      1.39
## median_hh_income             87155.00  0.45     0.30   2684.72
## pct_clinton_16                  47.57  1.10     0.08      1.96
## pct_dem_house_16               100.00  0.57     0.18      3.17
## pct_asian                       44.94  2.38     5.68      1.42
## pct_white                       66.91 -0.51    -0.99      2.94
## pct_black                       56.19  3.08    12.43      1.43
## pct_latino                      57.70  1.47     1.11      2.22
## pct_bachelors                   53.10 -0.10    -0.24      1.81
## tot_voting_age              250489.00 -0.49     2.08   6598.60
## chamber*                         1.00  2.92     6.67      0.04
## log_median_hh_income             1.27 -0.29    -0.16      0.04
## log_tot_voting_age               0.46 -0.98     3.28      0.01
# no_pacs <- c("YES", "NO")
# titles <- c("Summary Statistics for Candidates who Reject PACs",
#             "Summary Statistics for States who do not Reject PACs")
# 
# for (i in 1:2) {
#   estimation.data %>%
#     filter(no_corp_pacs == no_pacs[i]) %>%
#     fastDummies::dummy_cols(select_columns= c("party", "new_member")) %>%
#   as.data.frame() %>%
#   stargazer(., 
#             type = "text", 
#             style = "apsr", 
#             title = titles[i],
#             covariate.labels = c("Total PAC Money", 
#                                  "Number of PAC Contributors",
#                                  "Business PAC Contributions", 
#                                  "Direct Business PAC Contributions",
#                                  "In-kind Business PAC Contributions",
#                                  "Independent Business PAC Expendatures",
#                                  "Contributions from Candidates",
#                                  "Direct Candidate Contributions",
#                                  "Direct Party Contributions",
#                                  "In-kind Party Contributions",
#                                  "Independent Party Expendatures",
#                                  "Coordinated Party Expendatures",
#                                  "Labor PAC Contributions",
#                                  "Direct Labor PAC Contributions",
#                                  "In-kind Labor PAC Contributions",
#                                  "Independent Labor PAC Expendatures",
#                                  "Ideological PAC Contributions",
#                                  "Direct Ideological PAC Contributions",
#                                  "In-kind Ideological PAC Contributions",
#                                  "Independent Ideological PAC Expendatures",
#                                  "Coordinated Ideological PAC Expendautres",
#                                  "Leadership PAC Contributions",
#                                  "Total Individual Contributions",
#                                  "Large Individual Contributions",
#                                  "Small Individual Contributions",
#                                  "Individual Ideological Contributions",
#                                  "Individual Candidate Contributions",
#                                  "Individual Party Contributions",
#                                  "Self-financing", 
#                                  "Individual Business Contributions",
#                                  "Individual Labor Contributions",
#                                  "Net Spent", "Net Receipts", "Vote Percentage",
#                                  "Voting Age Pop. Turnout", 
#                                  "Median Household Income",
#                                  "% District Vote Share for Clinton in 2016",
#                                  "% District Democratic Vote Share in 2016 Congressional Elections",
#                                  "% Asian Pop. in District", 
#                                  "% White Pop. in District",
#                                  "% Black Pop. in District",
#                                  "% Latino Pop. in District",
#                                  "% District Pop. with Bachelors Degree",
#                                  "Total Voting Age District Pop.",
#                                  "Log(Median Household Income)",
#                                  "Log(Total Voting Age District Pop.)",
#                                  "New Member",
#                                  "Incumbent"))
# }
Next