PACs and Small Dollar Donations: Data Cleaning
Environment Preperation
This section clears the current working environment and loads the packages used to visualize the data. I also create the function comma()
to format any in-line output values to have thousands separators and only two digits.
# Clear Environment -----------------------------------------------------------
rm(list = ls())
# Load Packages ---------------------------------------------------------------
packages <- c("tidyverse", "stargazer", "foreign", "sqldf", "readxl",
"openintro", "lubridate")
lapply(packages, require, character.only = TRUE)
# Inline Formatting -----------------------------------------------------------
comma <- function(x) format(x, digits = 2, big.mark = ",")
# Set Global Chunk Options ----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE,
comment = "##",
R.options = list(width = 70)
)
Legislator Data
leg.data <- read_csv("Data/Raw Data/house_roster.csv")
leg.data <-
leg.data %>%
filter(congress == 116) %>%
select(icpsr, opensecrets_id, freshman, cand_type)
PAC Pledge Data
Data on the 2018 Congressional candidates, whether or not they pledged to reject PAC money, and their contribution totals was compiled by OpenSecrets.org and is available on Google Drive here.
members.2018.data.raw <- read_xlsx("Data/Raw Data/OpenSecrets.org _ No Corp PAC Members.xlsx",
skip = 1,
sheet = "All Cands 2018")
members.2018.data <-
members.2018.data.raw %>%
mutate(NoCorpPACs = as_factor(NoCorpPACs),
Party = as_factor(Party),
Party = relevel(Party, ref = "I")) %>%
rename(new_member = `New Member?`,
no_corp_pacs = NoCorpPACs,
opensecrets_id = `Candidate ID`,
candidae_name = CRPName,
party = Party,
district = District,
gen_election_result = `General Elec Result`,
vote_pct = VotePercent,
tot_pac_money = `Total PAC Money`,
num_pac_contribs = `Number of PAC Contributions`,
business_pacs = `Business PACs`,
labor_pacs = `Labor PACs`,
ideological_pacs = `Ideological PACs`,
leadership_pacs = `Lead PACs`,
other_pacs = `Other PACs`,
net_receipts = `Net Receipts`,
net_indiv_total = `Net Individual Total`,
candidate_contribs = `Candidate Contributions`,
large_indiv = `Large Indivs`,
small_indiv = `Small Indivs`,
net_spent = `Net Spent`) %>%
separate(candidae_name, into = c("first_name", "last_name"), extra = "drop") %>%
mutate(first_initial = str_sub(first_name, start = 1L, end = 1L),
last_stub = str_sub(last_name, start = 1L, end = 3L))
members.data <-
members.2018.data %>%
filter(party == "D") %>%
filter(!is.na(gen_election_result))
Merge with Legislator Data
members.data <-
left_join(members.data, leg.data, by = c("opensecrets_id"))
#write_csv(members.data, file = "Data/Raw Data/members_data.csv")
# read in complete data set
member.data <- read_csv("Data/Raw Data/members_data_working.csv")
# identify chambers
member.data <-
member.data %>%
mutate(chamber = ifelse(str_detect(district, "S1") |
str_detect(district, "S2"), "Senate", "House"))
Demographic Data
The majority of demographic data used was compiled by Daily Kos Elections and is posted on Googe Drive here. Data on the total voting age population was collected from the US Census using the R package tidycensus
.
demo.data.raw <- read_excel("Data/Raw Data/Demographic Data/115th Congress Members Guide with Elections and Demographic Data by District.xlsx",
sheet = "House - Working")
demo.data <-
demo.data.raw %>%
mutate(district_code = str_replace(district_code, pattern = "-", replacement = ""),
district_code = ifelse(district == "Alaska At-Large", "AK01", district_code),
district_code = ifelse(district == "Delaware At-Large", "DE01", district_code),
district_code = ifelse(district == "Montana At-Large", "MT01", district_code),
district_code = ifelse(district == "North Dakota At-Large", "ND01", district_code),
district_code = ifelse(district == "South Dakota At-Large", "SD01", district_code),
district_code = ifelse(district == "Vermont At-Large", "VT01", district_code),
district_code = ifelse(district == "Wyoming At-Large", "WY01", district_code)) %>%
rename(state_dist = district_code)
# total voting age pop
library(tidycensus)
v18 <- load_variables(year = 2017, dataset = "acs5/subject", cache = TRUE)
voting.age <- get_acs(geography = "congressional district",
variables = "S0101_C01_026",
key = "b52b951af88427dcca3cee53db4fb1f76da8009d")
library(openintro)
voting_age <-
voting.age %>%
rename(tot_voting_age = estimate) %>%
separate(NAME,
into = c("district", "state"),
sep = ",",
extra = "merge") %>%
mutate(district = str_replace(district, "\\(116th Congress\\)", ""),
district = str_replace(district, "Congressional District ", ""),
district = str_trim(district, side = "both"),
district = str_pad(district, width = 2, side = "left", pad = "0"),
state = str_trim(state, side = "both"),
state_dist = paste(state2abbr(state), district, sep = ""),
state_dist = str_replace(state_dist, pattern = "\\(at Large\\)", "01")) %>%
dplyr::select(-variable, -moe, -GEOID, -district, -state)
demo.data <- left_join(demo.data, voting_age, by = c("state_dist"))
Merge Legislator Data with Demographic Data
sum(duplicated(members.2018.data[ , c("district")]))
## [1] 0
sum(duplicated(demo.data[ , c("district")]))
## [1] 0
member.data <-
left_join(member.data, demo.data, by = c("district" = "state_dist"))
Election Data
Data on each candidates’ total number of votes for the 2018 midterm election was collected from the MIT Election Data + Science Lab.
# House Elections
election.data.raw <-
read_csv("Data/Raw Data/Election Data/1976-2018-house.csv") %>%
filter(year == 2018) %>%
filter(stage == "gen")
election.data <-
election.data.raw %>%
mutate(district = str_pad(district, width = 2, side = "left", pad = "0"),
vote_pct = candidatevotes / totalvotes,
first_initial = str_sub(candidate, start = 1L, end = 1L)) %>%
filter(!is.na(candidate)) %>%
separate(candidate, into = c("first_name", "last_name"), extra = "drop") %>%
mutate(last_stub = str_sub(last_name, start = 1L, end = 3L),
state_dist = paste(state_po, district, sep = ""),
party = case_when(party == "democrat" ~ "D",
party == "republican" ~ "R")) %>%
filter(!is.na(party)) %>%
dplyr::select(state_dist, party, first_initial, first_name, last_name, last_stub,
candidatevotes, totalvotes, office)
election.data <-
election.data %>%
mutate(state_dist = ifelse(state_dist == "AK00", "AK01", state_dist),
state_dist = ifelse(state_dist == "DE00", "DE01", state_dist),
state_dist = ifelse(state_dist == "MT00", "MT01", state_dist),
state_dist = ifelse(state_dist == "ND00", "ND01", state_dist),
state_dist = ifelse(state_dist == "SD00", "SD01", state_dist),
state_dist = ifelse(state_dist == "VT00", "VT01", state_dist),
state_dist = ifelse(state_dist == "WY00", "WY01", state_dist))
Merge Election Data with Contribution Data
# house elections
sum(duplicated(election.data[ , c("state_dist", "party", "first_initial", "last_stub")]))
## [1] 0
sum(duplicated(member.data[ , c("district", "party", "first_initial", "last_stub")]))
## [1] 0
member.data <-
left_join(member.data, election.data,
by = c("district" = "state_dist", "party", "first_initial", "last_stub"))
member.data <-
member.data %>%
mutate(new_member = as_factor(new_member),
party = as_factor(party),
vap_turnout = candidatevotes / tot_voting_age) %>%
dplyr::select(opensecrets_id, first_name.x, last_name.x, district, party,
new_member, no_corp_pacs, tot_pac_money, num_pac_contribs,
business_pacs, labor_pacs, ideological_pacs, leadership_pacs,
other_pacs, net_receipts, net_indiv_total, candidate_contribs,
large_indiv, small_indiv, net_spent, vote_pct, vap_turnout,
median_hh_income, pct_clinton_16, pct_obama_12,
pct_dem_house_16,pct_dem_house_14, pct_asian, pct_white,
pct_black, pct_latino, pct_bachelors, tot_voting_age, chamber,
last_stub, first_initial) %>%
rename(first_name = first_name.x,
last_name = last_name.x)
Contribution Data
This contribution data come from OpenSecrets.org.
Individual Contributions
# Industry Categories
industry.cats <- read_excel("Data/Raw Data/CRP_industry_codes.xlsx")
industry.cats <-
industry.cats %>%
filter(str_detect(Catcode, "0000")) %>%
mutate(industry_letter = str_sub(Catcode, start = 1L, end = 1L)) %>%
select(-Catcode, -Catorder)
# Individual Contribution data
indiv.contrib.data.raw <-
read_delim("Data/Raw Data/indivs18.txt",
delim = ",",
quote = "|",
col_names = FALSE,
num_threads = 4) %>%
rename(year = X1,
fec_trans_id = X2,
donor_id = X3,
contrib_name = X4,
receipt_id = X5,
donor_org_name = X6,
standard_org = X7,
industry_code = X8,
contrib_date = X9,
contrib_amount = X10,
contrib_city = X12,
contrib_state = X13,
contrib_zip = X14,
transaction_type = X16) %>%
mutate(contrib_type = "Individual",
contrib_date = str_replace(contrib_date, ",,", ","),
industry_code = str_to_upper(industry_code),
industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
filter(str_detect(industry_code, pattern = "^Z4", negate = TRUE)) %>%
filter(str_detect(industry_code, pattern = "^Z9", negate = TRUE)) %>%
filter(str_detect(industry_code, pattern = "^z4", negate = TRUE)) %>%
filter(str_detect(industry_code, pattern = "^z9", negate = TRUE)) %>%
filter(str_detect(receipt_id, "^N")) %>%
separate(contrib_date, into = c("drop", "contrib_date", "contrib_amount"), sep = ",") %>%
mutate(contrib_amount, as.numeric(contrib_amount),
contrib_data = mdy(contrib_date))
# code transaction types
indiv.contribs <-
indiv.contrib.data.raw %>%
mutate(transaction_type = factor(transaction_type,
levels = c("10", "11", "15 ", "15E", "15I", "15J", "15T",
"19J", "20Y", "22Y"),
labels = c("Contrib. to Indp. Exp. Cmte.",
"Native American Tribe Contribution",
"Contrib. to Political Cmte.",
"Earmarked Contrib. to Political Cmte.",
"Earmarked Contrib. Check",
"Pct. Recipient Contrib. to Joint Fund. Cmte.",
"Earmarked Contrib. Treasury",
"Pct. Recipient Contrib. Electioneering",
"Nonfederal Refund",
"Contrib. Refund"))) %>%
filter(str_detect(transaction_type, "Contrib. to Indp. Exp. Cmte.", negate = TRUE)) %>%
filter(str_detect(transaction_type, "Pct. Recipient Contrib. to Joint Fund. Cmte.",
negate = TRUE)) %>%
filter(str_detect(transaction_type, "Pct. Recipient Contrib. Electioneering",
negate = TRUE)) %>%
filter(str_detect(transaction_type, "Nonfederal Refund", negate = TRUE)) %>%
filter(str_detect(transaction_type, "Contrib. Refund", negate = TRUE))
# filter out non-individual recipients
indiv.contrib.data <-
indiv.contrib.data.raw %>%
filter(str_detect(receipt_id, "^N"))
# aggregate data to recepient-level
indiv.contrib.data <-
indiv.contrib.data %>%
mutate(fec_trans_id = as.character(fec_trans_id),
contrib_amount = as.numeric(contrib_amount),
contrib_date = mdy(contrib_date)) %>%
group_by(receipt_id, industry_code) %>%
summarise(year = first(year),
contrib_amount = sum(contrib_amount)) %>%
mutate(contrib_type = "Individual") %>%
arrange(receipt_id, industry_code, year)
# remove non-contributions
indiv.contrib.data <-
indiv.contrib.data %>%
mutate(industry_code = str_to_upper(industry_code),
industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
filter(industry_code != "Z9100") %>%
filter(industry_code != "Z9500") %>%
filter(industry_code != "Z9600") %>%
filter(industry_code != "Z9700") %>%
filter(industry_code != "Z9800") %>%
filter(industry_code != "Z9999") %>%
mutate(industry_letter = ifelse(industry_code == "Z1000" |
industry_code == "Z1100" |
industry_code == "Z1200" |
industry_code == "Z1300" |
industry_code == "Z1400", "CC", industry_letter),
industry_letter = ifelse(industry_code == "Z4100" |
industry_code == "Z4200" |
industry_code == "Z4300" |
industry_code == "Z4400" |
industry_code == "Z4500", "JCC", industry_letter),
industry_letter = ifelse(industry_code == "Z5000" |
industry_code == "Z5100" |
industry_code == "Z5200" |
industry_code == "Z5300", "PC", industry_letter),
industry_letter = ifelse(industry_code == "Z9000", "Self-finance", industry_letter)) %>%
group_by(receipt_id, industry_letter) %>%
summarise(contrib_amount = sum(contrib_amount)) %>%
arrange(receipt_id, industry_letter)
## Merge with contribution data
indiv.contrib.data <- left_join(
indiv.contrib.data,
industry.cats,
by = c("industry_letter")
)
indiv.contrib.data <-
indiv.contrib.data %>%
mutate(Catname = ifelse(industry_letter == "Z", "Committees", Catname)) %>%
rename(industry_name = Catname,
industry = Industry,
sector = Sector,
sector_long = `Sector Long`)
indiv.contrib.data <-
indiv.contrib.data %>%
mutate(industry_name = ifelse(industry_letter == "Self-finance",
"indiv_self_finance",
industry_name),
industry_name = ifelse(industry_letter == "CC",
"indiv_cand_committee",
industry_name),
industry_name = ifelse(industry_letter == "JCC",
"indiv_jnt_cand_committee",
industry_name),
industry_name = ifelse(industry_letter == "PC",
"indiv_party_committee",
industry_name))
sum(duplicated(indiv.contrib.data[ , c("receipt_id", "industry_letter")]))
## [1] 0
indiv.contrib.data <-
indiv.contrib.data %>%
filter(!is.na(industry_name)) %>%
select(receipt_id, industry_name, contrib_amount) %>%
spread(key = industry_name, value = contrib_amount) %>%
rename(indiv_ag = Agriculture,
indiv_comms_electric = `Communications & Electronics`,
indiv_const_pub_wrks = `Construction & Public Works`,
indiv_defense = Defense,
indiv_engy_res_envrmt = `Energy, Natural Resources and Environment`,
indiv_fncs_ins_real = `Finance, Insurance & Real Estate`,
indiv_commerce = `General commerce`,
indiv_hlth_edu_human = `Health, Education & Human Resources`,
indiv_ideological = `Ideological & Single Issue PACs`,
indiv_labor = `Labor Unions`,
indiv_legal = `Legal Services`,
indiv_manu = Manufacturing,
indiv_other = Other,
indiv_trans = Transportation,
indiv_unknown = Unknown) %>%
mutate_at(vars(indiv_ag:indiv_unknown), funs(ifelse(is.na(.), 0, .))) %>%
arrange(receipt_id)
indiv.contrib.data <-
indiv.contrib.data %>%
mutate(indiv_biz = sum(indiv_ag, indiv_comms_electric, indiv_const_pub_wrks,
indiv_defense, indiv_engy_res_envrmt, indiv_legal,
indiv_fncs_ins_real, indiv_commerce, indiv_manu,
indiv_hlth_edu_human, indiv_trans)) %>%
select(receipt_id, indiv_biz, indiv_labor, indiv_ideological,
indiv_cand_committee, indiv_party_committee)
PAC Contributions
## 2018 - Congress 116
pac.contrib.data.raw <-
read_delim("Data/Raw Data/pacs18.txt",
delim = ",",
quote = "|",
col_names = FALSE) %>%
rename(year = X1,
fec_trans_id = X2,
pac_id = X3,
receipt_id = X4,
industry_code = X7,
trans_type = X8,
direct_indirect = X9,
contrib_date = X6,
contrib_amount = X5) %>%
select(year, fec_trans_id, pac_id, receipt_id, industry_code, trans_type,
direct_indirect, contrib_date, contrib_amount) %>%
# separate(contrib_date, into = c("drop", "contrib_amount", "contrib_date"), sep = ",") %>%
mutate(fec_trans_id = as.character(fec_trans_id),
contrib_amount = as.numeric(contrib_amount),
contrib_date = mdy(contrib_date),
trans_type = factor(trans_type,
levels = c("24A", "24C", "24E", "24F", "24K", "24N", "24Z"),
labels = c("independent expenditure against the candidate",
"coordinated expenditure",
"independent expenditure for the candidate",
"communication cost for the candidate",
"direct contribution",
"communication cost against the candidate",
"in kind contribution")),
direct_indirect = factor(direct_indirect,
levels = c("D", "I"),
labels = c("Direct", "Indirect"))) %>%
mutate(contrib_type = "PAC") %>%
filter(trans_type == "independent expenditure for the candidate" |
trans_type == "direct contribution" |
trans_type == "in kind contribution" |
trans_type == "coordinated expenditure") %>%
arrange(receipt_id, industry_code, year)
# filter out non-individual recipients
pac.contrib.data <-
pac.contrib.data.raw %>%
filter(str_detect(receipt_id, "^N"))
pac.contrib.data <-
pac.contrib.data.raw %>%
mutate(industry_code = str_to_upper(industry_code),
industry_letter = str_sub(industry_code, start = 1L, end = 1L)) %>%
filter(industry_code != "Z9100") %>%
filter(industry_code != "Z9500") %>%
filter(industry_code != "Z9600") %>%
filter(industry_code != "Z9700") %>%
filter(industry_code != "Z9800") %>%
filter(industry_code != "Z9999") %>%
mutate(industry_letter = ifelse(industry_code == "Z1000" |
industry_code == "Z1100" |
industry_code == "Z1200" |
industry_code == "Z1300" |
industry_code == "Z1400", "pac_cand_committee", industry_letter),
industry_letter = ifelse(industry_code == "Z4100" |
industry_code == "Z4200" |
industry_code == "Z4300" |
industry_code == "Z4400" |
industry_code == "Z4500", "pac_jnt_cand_committee", industry_letter),
industry_letter = ifelse(industry_code == "Z5000" |
industry_code == "Z5100" |
industry_code == "Z5200" |
industry_code == "Z5300", "pac_party_committee", industry_letter),
industry_letter = ifelse(industry_code == "Z9000", "Self-finance", industry_letter)) %>%
group_by(receipt_id, industry_letter, trans_type) %>%
summarise(contrib_amount = sum(contrib_amount)) %>%
arrange(receipt_id, industry_letter)
## Merge with contribution data
pac.contrib.data <- left_join(
pac.contrib.data,
industry.cats,
by = c("industry_letter")
)
pac.contrib.data <-
pac.contrib.data %>%
rename(industry_name = Catname,
industry = Industry,
sector = Sector,
sector_long = `Sector Long`)
sum(duplicated(pac.contrib.data[ , c("receipt_id", "industry_letter", "trans_type")]))
## [1] 0
pac.contrib.data <-
pac.contrib.data %>%
filter(!is.na(industry_letter)) %>%
select(receipt_id, contrib_amount, industry_letter, trans_type) %>%
unite(col = "group", trans_type, industry_letter, sep = "-") %>%
spread(key = group, value = contrib_amount) %>%
rename(dir_pac_ag = `direct contribution-A`,
dir_pac_comms_electric = `direct contribution-C`,
dir_pac_const_pub_wrks = `direct contribution-B`,
dir_pac_defense = `direct contribution-D`,
dir_pac_engy_res_envrmt = `direct contribution-E`,
dir_pac_fncs_ins_real = `direct contribution-F`,
dir_pac_commerce = `direct contribution-G`,
dir_pac_hlth_edu_human = `direct contribution-H`,
dir_pac_ideological = `direct contribution-J`,
dir_pac_labor = `direct contribution-L`,
dir_pac_legal = `direct contribution-K`,
dir_pac_manu = `direct contribution-M`,
dir_pac_other = `direct contribution-X`,
dir_pac_trans = `direct contribution-T`,
dir_pac_unknown = `direct contribution-Y`,
dir_pac_cand_committee = `direct contribution-pac_cand_committee`,
dir_pac_party_committee = `direct contribution-pac_party_committee`,
ik_pac_ag = `in kind contribution-A`,
ik_pac_comms_electric = `in kind contribution-C`,
ik_pac_const_pub_wrks = `in kind contribution-B`,
ik_pac_defense = `in kind contribution-D`,
ik_pac_engy_res_envrmt = `in kind contribution-E`,
ik_pac_fncs_ins_real = `in kind contribution-F`,
ik_pac_commerce = `in kind contribution-G`,
ik_pac_hlth_edu_human = `in kind contribution-H`,
ik_pac_ideological = `in kind contribution-J`,
ik_pac_labor = `in kind contribution-L`,
ik_pac_legal = `in kind contribution-K`,
ik_pac_manu = `in kind contribution-M`,
ik_pac_other = `in kind contribution-X`,
ik_pac_trans = `in kind contribution-T`,
ik_pac_cand_committee = `in kind contribution-pac_cand_committee`,
ik_pac_party_committee = `in kind contribution-pac_party_committee`,
ind_pac_engy_res_envrmt = `independent expenditure for the candidate-E`,
ind_pac_fncs_ins_real = `independent expenditure for the candidate-F`,
ind_pac_commerce = `independent expenditure for the candidate-G`,
ind_pac_hlth_edu_human = `independent expenditure for the candidate-H`,
ind_pac_ideological = `independent expenditure for the candidate-J`,
ind_pac_labor = `independent expenditure for the candidate-L`,
ind_pac_manu = `independent expenditure for the candidate-M`,
ind_pac_unknown = `independent expenditure for the candidate-Y`,
ind_pac_party_committee = `independent expenditure for the candidate-pac_party_committee`,
ce_pac_ideological= `coordinated expenditure-J`,
ce_pac_legal = `coordinated expenditure-L`,
ce_pac_party_committee = `coordinated expenditure-pac_party_committee`) %>%
mutate_at(vars(ce_pac_ideological:ind_pac_unknown), funs(ifelse(is.na(.), 0, .))) %>%
arrange(receipt_id)
pac.contrib.data <-
pac.contrib.data %>%
mutate(dir_business_pacs = sum(dir_pac_ag, dir_pac_comms_electric,
dir_pac_const_pub_wrks, dir_pac_defense,
dir_pac_engy_res_envrmt, dir_pac_fncs_ins_real,
dir_pac_commerce, dir_pac_hlth_edu_human,
dir_pac_legal, dir_pac_manu, dir_pac_other,
dir_pac_trans, dir_pac_unknown,
na.rm = TRUE),
in_kind_business_pacs = sum(ik_pac_ag, ik_pac_commerce,
ik_pac_const_pub_wrks, ik_pac_defense,
ik_pac_engy_res_envrmt,
ik_pac_fncs_ins_real, ik_pac_commerce,
ik_pac_hlth_edu_human, ik_pac_legal,
ik_pac_manu, ik_pac_other, ik_pac_trans,
na.rm = TRUE),
ind_business_pacs = sum(ind_pac_engy_res_envrmt, ind_pac_fncs_ins_real,
ind_pac_commerce, ind_pac_hlth_edu_human,
ind_pac_manu, ind_pac_unknown,
na.rm = TRUE)) %>%
select(receipt_id, dir_business_pacs, in_kind_business_pacs,
ind_business_pacs, dir_pac_cand_committee, dir_pac_party_committee,
ik_pac_party_committee, ind_pac_party_committee,
ce_pac_party_committee, dir_pac_labor, ik_pac_labor, ind_pac_labor,
dir_pac_ideological, ik_pac_ideological, ind_pac_ideological,
ce_pac_ideological)
Combine Contribution Data
# Combine data
contrib.data <- left_join(
indiv.contrib.data,
pac.contrib.data,
by = c("receipt_id")
)
save(contrib.data, file = "Data/Clean Data/contribution_data.Rda")
Merge with Legislator Data
# load clean contribution data
load("Data/Raw Data/contrib_data.Rda")
member.data <-
left_join(member.data, contrib.data, by = c("opensecrets_id" = "receipt_id"))
Select Final Data Set
estimation.data <-
member.data %>%
filter(party == "D") %>%
select(district, no_corp_pacs, new_member, tot_pac_money, num_pac_contribs,
business_pacs, ideological_pacs, in_kind_business_pacs, ind_against_business_pacs,
ind_for_business_pacs, party_exp, leadership_pacs, net_indiv_total,
large_indiv, small_indiv, indiv_ideological, indiv_cand_committee,
indiv_party_committee, indiv_biz, indiv_labor,
net_spent, net_receipts, vote_pct, vap_turnout, median_hh_income,
pct_clinton_16, pct_dem_house_16, pct_asian, pct_white, pct_black,
pct_latino, pct_bachelors, tot_voting_age, chamber) %>%
mutate(vote_pct = vote_pct * 100,
vap_turnout = vap_turnout * 100,
log_median_hh_income = log(median_hh_income),
log_tot_voting_age = log(tot_voting_age))
save(estimation.data, file = "Data/Clean Data/estimation_data2.Rda")
Final Data Set Summary Statistics
psych::describeBy(estimation.data, group = estimation.data$no_corp_pacs)
##
## Descriptive statistics by group
## group: NO
## vars n mean sd median
## district* 1 211 106.00 61.05 106.00
## no_corp_pacs* 2 211 1.00 0.00 1.00
## new_member* 3 211 1.14 0.35 1.00
## tot_pac_money 4 211 709462.41 491163.01 573044.00
## num_pac_contribs 5 211 395.00 248.65 340.00
## business_pacs 6 211 471328.24 397024.94 369249.00
## ideological_pacs 7 211 45221.90 47966.69 24235.00
## in_kind_business_pacs 8 211 599.35 1562.72 58.00
## ind_against_business_pacs 9 211 0.47 6.88 0.00
## ind_for_business_pacs 10 211 4838.84 42991.98 0.00
## party_exp 11 211 3198.88 24028.13 0.00
## leadership_pacs 12 211 55338.02 94753.56 14500.00
## net_indiv_total 13 211 1809157.48 3516390.94 630577.58
## large_indiv 14 211 1408868.27 2463015.29 574789.37
## small_indiv 15 211 400246.03 1132144.74 76499.88
## indiv_ideological 16 211 149021.16 358058.50 37935.00
## indiv_cand_committee 17 211 36.79 266.56 0.00
## indiv_party_committee 18 211 4.74 68.84 0.00
## indiv_biz 19 211 1040628.14 1911582.76 403785.00
## indiv_labor 20 211 1257.95 4670.65 0.00
## net_spent 21 211 2334356.80 3920833.74 1134291.71
## net_receipts 22 211 2818376.65 4454548.66 1373799.04
## vote_pct 23 211 64.99 19.01 65.20
## vap_turnout 24 137 28.49 7.54 29.28
## median_hh_income 25 191 63081.43 19196.73 60756.00
## pct_clinton_16 26 191 62.51 12.70 60.57
## pct_dem_house_16 27 191 67.33 17.29 66.70
## pct_asian 28 191 6.41 7.54 3.40
## pct_white 29 191 57.00 23.64 59.74
## pct_black 30 191 16.64 18.19 8.79
## pct_latino 31 191 17.73 18.58 10.66
## pct_bachelors 32 191 33.38 11.96 32.50
## tot_voting_age 33 191 580685.53 41062.65 577328.00
## chamber* 34 211 1.09 0.28 1.00
## log_median_hh_income 35 191 11.01 0.29 11.01
## log_tot_voting_age 36 191 13.27 0.07 13.27
## trimmed mad min max
## district* 106.00 78.58 1.00 211.00
## no_corp_pacs* 1.00 0.00 1.00 1.00
## new_member* 1.05 0.00 1.00 2.00
## tot_pac_money 628616.33 326357.32 17000.00 2673386.00
## num_pac_contribs 361.82 195.70 9.00 1346.00
## business_pacs 413386.73 296666.78 0.00 2216109.00
## ideological_pacs 36080.21 21512.53 0.00 263906.00
## in_kind_business_pacs 288.99 85.99 -4350.00 13569.00
## ind_against_business_pacs 0.00 0.00 0.00 100.00
## ind_for_business_pacs 0.00 0.00 0.00 571700.00
## party_exp 0.00 0.00 0.00 283737.00
## leadership_pacs 31648.14 18532.50 0.00 467700.00
## net_indiv_total 966185.07 575054.05 27487.08 25591510.53
## large_indiv 804278.58 526129.30 23050.00 15346739.43
## small_indiv 136926.18 91801.87 813.12 10244760.10
## indiv_ideological 70052.61 49570.73 0.00 3353000.00
## indiv_cand_committee 0.00 0.00 0.00 3500.00
## indiv_party_committee 0.00 0.00 0.00 1000.00
## indiv_biz 592428.17 366497.24 19765.00 12825778.00
## indiv_labor 342.57 0.00 0.00 53775.00
## net_spent 1394563.31 721300.32 173085.21 26747405.61
## net_receipts 1747834.45 865030.40 232915.61 29044284.32
## vote_pct 66.19 13.34 0.00 100.00
## vap_turnout 28.76 6.28 0.00 48.82
## median_hh_income 61152.41 16963.91 29234.00 125790.00
## pct_clinton_16 62.15 12.97 31.03 93.77
## pct_dem_house_16 67.19 13.91 0.00 100.00
## pct_asian 4.92 3.23 0.37 56.17
## pct_white 57.59 30.22 3.49 95.42
## pct_black 13.48 8.39 0.63 66.44
## pct_latino 14.15 10.81 0.92 78.98
## pct_bachelors 32.67 11.71 9.30 72.20
## tot_voting_age 578898.86 34033.08 427435.00 763472.00
## chamber* 1.00 0.00 1.00 2.00
## log_median_hh_income 11.00 0.29 10.28 11.74
## log_tot_voting_age 13.27 0.06 12.97 13.55
## range skew kurtosis se
## district* 210.00 0.00 -1.22 4.20
## no_corp_pacs* 0.00 NaN NaN 0.00
## new_member* 1.00 2.09 2.38 0.02
## tot_pac_money 2656386.00 1.70 3.13 33813.06
## num_pac_contribs 1337.00 1.30 1.82 17.12
## business_pacs 2216109.00 1.60 3.12 27332.32
## ideological_pacs 263906.00 1.82 3.38 3302.16
## in_kind_business_pacs 17919.00 4.64 30.26 107.58
## ind_against_business_pacs 100.00 14.32 204.03 0.47
## ind_for_business_pacs 571700.00 11.46 142.67 2959.69
## party_exp 283737.00 9.38 95.69 1654.16
## leadership_pacs 467700.00 2.50 6.06 6523.10
## net_indiv_total 25564023.45 4.03 18.23 242078.34
## large_indiv 15323689.43 3.52 13.20 169560.97
## small_indiv 10243946.98 5.41 35.06 77940.06
## indiv_ideological 3353000.00 5.39 36.25 24649.76
## indiv_cand_committee 3500.00 10.93 133.81 18.35
## indiv_party_committee 1000.00 14.32 204.03 4.74
## indiv_biz 12806013.00 3.99 17.59 131598.78
## indiv_labor 53775.00 8.07 78.79 321.54
## net_spent 26574320.40 3.88 15.86 269921.33
## net_receipts 28811368.71 3.69 14.46 306663.78
## vote_pct 100.00 -1.48 4.20 1.31
## vap_turnout 48.82 -0.89 2.88 0.64
## median_hh_income 96556.00 0.84 0.26 1389.03
## pct_clinton_16 62.74 0.18 -0.43 0.92
## pct_dem_house_16 100.00 -0.27 1.34 1.25
## pct_asian 55.81 2.85 11.42 0.55
## pct_white 91.93 -0.21 -1.14 1.71
## pct_black 65.81 1.33 0.39 1.32
## pct_latino 78.06 1.54 1.50 1.34
## pct_bachelors 62.90 0.51 0.06 0.87
## tot_voting_age 336037.00 0.53 2.27 2971.19
## chamber* 1.00 2.95 6.72 0.02
## log_median_hh_income 1.46 0.20 -0.51 0.02
## log_tot_voting_age 0.58 0.10 2.15 0.01
## ----------------------------------------------------
## group: YES
## vars n mean sd median
## district* 1 48 24.50 14.00 24.50
## no_corp_pacs* 2 48 1.00 0.00 1.00
## new_member* 3 48 1.73 0.45 2.00
## tot_pac_money 4 48 235689.25 210827.95 207816.50
## num_pac_contribs 5 48 119.27 107.43 96.00
## business_pacs 6 48 47284.08 113392.45 6750.00
## ideological_pacs 7 48 60837.83 47760.99 59888.00
## in_kind_business_pacs 8 48 201.27 869.54 0.00
## ind_against_business_pacs 9 48 0.00 0.00 0.00
## ind_for_business_pacs 10 48 0.00 0.00 0.00
## party_exp 11 48 2237.00 15486.16 0.00
## leadership_pacs 12 48 61250.58 64450.14 61850.00
## net_indiv_total 13 48 4001322.39 4912004.29 2765514.13
## large_indiv 14 48 2764406.94 2471439.59 2178961.28
## small_indiv 15 48 1236914.83 2837615.74 537468.26
## indiv_ideological 16 48 298297.29 299352.89 213393.00
## indiv_cand_committee 17 48 0.00 0.00 0.00
## indiv_party_committee 18 48 0.00 0.00 0.00
## indiv_biz 19 48 2019083.29 1622332.35 1737106.50
## indiv_labor 20 48 2177.69 2646.17 1175.00
## net_spent 21 48 4144274.62 3964656.57 3239642.83
## net_receipts 22 48 5239448.54 5796262.66 3963065.91
## vote_pct 23 48 58.23 14.86 53.50
## vap_turnout 24 30 27.23 7.61 26.55
## median_hh_income 25 44 68972.59 17808.41 69511.50
## pct_clinton_16 26 44 54.16 12.99 49.88
## pct_dem_house_16 27 44 53.88 21.01 45.43
## pct_asian 28 44 7.49 9.40 4.43
## pct_white 29 44 67.24 19.53 71.30
## pct_black 30 44 8.44 9.50 5.09
## pct_latino 31 44 14.03 14.75 7.51
## pct_bachelors 32 44 36.43 11.99 36.75
## tot_voting_age 33 44 574978.18 43770.18 573281.00
## chamber* 34 48 1.08 0.28 1.00
## log_median_hh_income 35 44 11.11 0.26 11.15
## log_tot_voting_age 36 44 13.26 0.08 13.26
## trimmed mad min max
## district* 24.50 17.79 1.00 48.00
## no_corp_pacs* 1.00 0.00 1.00 1.00
## new_member* 1.77 0.00 1.00 2.00
## tot_pac_money 207373.10 202354.88 0.00 967546.00
## num_pac_contribs 102.70 77.84 4.00 516.00
## business_pacs 16216.78 10007.55 0.00 479465.00
## ideological_pacs 57173.20 51358.01 0.00 184939.00
## in_kind_business_pacs 2.25 0.00 -116.00 5200.00
## ind_against_business_pacs 0.00 0.00 0.00 0.00
## ind_for_business_pacs 0.00 0.00 0.00 0.00
## party_exp 0.00 0.00 0.00 107293.00
## leadership_pacs 52394.98 70349.37 0.00 282880.00
## net_indiv_total 3082167.15 2289393.28 352468.68 29600511.44
## large_indiv 2369043.96 1843908.10 219166.18 11909673.44
## small_indiv 671508.69 548124.58 15227.27 19012867.30
## indiv_ideological 258620.42 264246.02 14739.00 1659605.00
## indiv_cand_committee 0.00 0.00 0.00 0.00
## indiv_party_committee 0.00 0.00 0.00 0.00
## indiv_biz 1819291.75 1552629.13 119428.00 8434746.00
## indiv_labor 1750.07 1742.05 0.00 11279.00
## net_spent 3454411.88 2922365.85 578179.05 20475491.00
## net_receipts 4153662.11 3272074.02 632548.22 34114039.97
## vote_pct 57.23 4.74 0.00 97.40
## vap_turnout 26.35 3.87 11.53 51.03
## median_hh_income 68313.86 18035.83 33995.00 121150.00
## pct_clinton_16 52.54 8.73 39.32 86.89
## pct_dem_house_16 51.97 11.33 0.00 100.00
## pct_asian 5.46 4.09 0.42 45.37
## pct_white 68.26 23.13 29.11 96.02
## pct_black 6.89 4.32 0.44 56.63
## pct_latino 11.48 7.29 0.96 58.66
## pct_bachelors 36.46 10.30 7.90 61.00
## tot_voting_age 575806.28 32506.75 424919.00 675408.00
## chamber* 1.00 0.00 1.00 2.00
## log_median_hh_income 11.12 0.26 10.43 11.70
## log_tot_voting_age 13.26 0.06 12.96 13.42
## range skew kurtosis se
## district* 47.00 0.00 -1.28 2.02
## no_corp_pacs* 0.00 NaN NaN 0.00
## new_member* 1.00 -1.00 -1.02 0.06
## tot_pac_money 967546.00 1.34 2.01 30430.39
## num_pac_contribs 512.00 1.57 2.77 15.51
## business_pacs 479465.00 2.77 6.33 16366.79
## ideological_pacs 184939.00 0.53 -0.52 6893.70
## in_kind_business_pacs 5316.00 4.67 22.02 125.51
## ind_against_business_pacs 0.00 NaN NaN 0.00
## ind_for_business_pacs 0.00 NaN NaN 0.00
## party_exp 107293.00 6.50 41.12 2235.23
## leadership_pacs 282880.00 1.49 2.85 9302.58
## net_indiv_total 29248042.76 3.46 14.04 708986.75
## large_indiv 11690507.26 1.83 3.78 356721.58
## small_indiv 18997640.03 5.25 29.35 409574.55
## indiv_ideological 1644866.00 2.08 6.58 43207.87
## indiv_cand_committee 0.00 NaN NaN 0.00
## indiv_party_committee 0.00 NaN NaN 0.00
## indiv_biz 8315318.00 1.48 3.15 234163.50
## indiv_labor 11279.00 1.53 1.88 381.94
## net_spent 19897311.95 2.08 5.14 572248.88
## net_receipts 33481491.75 3.01 11.13 836618.45
## vote_pct 97.40 -0.27 4.18 2.14
## vap_turnout 39.50 1.39 3.13 1.39
## median_hh_income 87155.00 0.45 0.30 2684.72
## pct_clinton_16 47.57 1.10 0.08 1.96
## pct_dem_house_16 100.00 0.57 0.18 3.17
## pct_asian 44.94 2.38 5.68 1.42
## pct_white 66.91 -0.51 -0.99 2.94
## pct_black 56.19 3.08 12.43 1.43
## pct_latino 57.70 1.47 1.11 2.22
## pct_bachelors 53.10 -0.10 -0.24 1.81
## tot_voting_age 250489.00 -0.49 2.08 6598.60
## chamber* 1.00 2.92 6.67 0.04
## log_median_hh_income 1.27 -0.29 -0.16 0.04
## log_tot_voting_age 0.46 -0.98 3.28 0.01
# no_pacs <- c("YES", "NO")
# titles <- c("Summary Statistics for Candidates who Reject PACs",
# "Summary Statistics for States who do not Reject PACs")
#
# for (i in 1:2) {
# estimation.data %>%
# filter(no_corp_pacs == no_pacs[i]) %>%
# fastDummies::dummy_cols(select_columns= c("party", "new_member")) %>%
# as.data.frame() %>%
# stargazer(.,
# type = "text",
# style = "apsr",
# title = titles[i],
# covariate.labels = c("Total PAC Money",
# "Number of PAC Contributors",
# "Business PAC Contributions",
# "Direct Business PAC Contributions",
# "In-kind Business PAC Contributions",
# "Independent Business PAC Expendatures",
# "Contributions from Candidates",
# "Direct Candidate Contributions",
# "Direct Party Contributions",
# "In-kind Party Contributions",
# "Independent Party Expendatures",
# "Coordinated Party Expendatures",
# "Labor PAC Contributions",
# "Direct Labor PAC Contributions",
# "In-kind Labor PAC Contributions",
# "Independent Labor PAC Expendatures",
# "Ideological PAC Contributions",
# "Direct Ideological PAC Contributions",
# "In-kind Ideological PAC Contributions",
# "Independent Ideological PAC Expendatures",
# "Coordinated Ideological PAC Expendautres",
# "Leadership PAC Contributions",
# "Total Individual Contributions",
# "Large Individual Contributions",
# "Small Individual Contributions",
# "Individual Ideological Contributions",
# "Individual Candidate Contributions",
# "Individual Party Contributions",
# "Self-financing",
# "Individual Business Contributions",
# "Individual Labor Contributions",
# "Net Spent", "Net Receipts", "Vote Percentage",
# "Voting Age Pop. Turnout",
# "Median Household Income",
# "% District Vote Share for Clinton in 2016",
# "% District Democratic Vote Share in 2016 Congressional Elections",
# "% Asian Pop. in District",
# "% White Pop. in District",
# "% Black Pop. in District",
# "% Latino Pop. in District",
# "% District Pop. with Bachelors Degree",
# "Total Voting Age District Pop.",
# "Log(Median Household Income)",
# "Log(Total Voting Age District Pop.)",
# "New Member",
# "Incumbent"))
# }