import pandas as pd

# Loading the dataset
data = pd.read_table("C:/Users/juanc/Downloads/aip_states_ideology_v2022a.tab")

# Displaying the first few rows of the dataset to understand its structure
data.head()


# Checking for missing values in the 'mrp_ideology_se' and 'mrp_ideology' columns
missing_values = data[["mrp_ideology_se", "mrp_ideology"]].isnull().sum()

missing_values

mrp_ideology_se    0
mrp_ideology       0
dtype: int64


# Calculating the average mrp_ideology score for each state
average_ideology_per_state = data.groupby('state')['mrp_ideology'].mean().reset_index()

# Plotting the distribution of average mrp_ideology scores across states
plt.figure(figsize=(10, 6))
plt.hist(average_ideology_per_state['mrp_ideology'], bins=20, color='lightcoral', edgecolor='black')
plt.title('Distribution of Average MRP Ideology per State')
plt.xlabel('Average MRP Ideology')
plt.ylabel('Number of States')
plt.tight_layout()
plt.show()


# Sorting the data for better visualization
sorted_states = average_ideology_per_state.sort_values(by='mrp_ideology')

plt.figure(figsize=(10, 15))
plt.barh(sorted_states['state'], sorted_states['mrp_ideology'], color='cadetblue')
plt.xlabel('Average MRP Ideology')
plt.ylabel('State')
plt.title('Average MRP Ideology per State across Years')
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


import numpy as np
import seaborn as sns

# Grouping data by presidential_year and calculating the average mrp_ideology for each year
average_ideology_per_year = data.groupby('presidential_year')['mrp_ideology'].mean().reset_index()

# Plotting the time series graph
plt.figure(figsize=(12, 7))
sns.lineplot(x='presidential_year', y='mrp_ideology', data=average_ideology_per_year, marker='o', color='dodgerblue')
plt.title('Average MRP Ideology over Years')
plt.xlabel('Year')
plt.ylabel('Average MRP Ideology')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


# Scatter plot of sample_size vs. mrp_ideology_se
plt.figure(figsize=(10, 6))
sns.scatterplot(x=data['sample_size'], y=data['mrp_ideology_se'], alpha=0.6)
sns.regplot(x=data['sample_size'], y=data['mrp_ideology_se'], scatter=False, color='red', line_kws={'lw':2})
plt.title('Relationship between Sample Size and MRP Ideology Standard Error')
plt.xlabel('Sample Size')
plt.ylabel('MRP Ideology Standard Error')
plt.grid(axis='both', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Calculating the Pearson correlation coefficient
correlation_coefficient = data['sample_size'].corr(data['mrp_ideology_se'])
correlation_coefficient

-0.31877779833259184


# Joint plot comparing mrp_ideology and self_ideology using scatter points
sns.jointplot(x=data['mrp_ideology'], y=data['self_ideology'], color='royalblue')
plt.suptitle('Comparison of MRP Ideology and Self-reported Ideology', y=1.02)
plt.xlabel('MRP Ideology')
plt.ylabel('Self-reported Ideology')
plt.tight_layout()
plt.show()

<ipython-input-26-4d61dd4f668d>:6: UserWarning: The figure layout has changed to tight
  plt.tight_layout()

	state	presidential_year	abb	population_2020	fips	mrp_ideology_se	mrp_ideology	irt_ideology_unweighted	self_ideology	self_ideology_se	irt_ideology_unweighted_sd	irt_ideology_unweighted_se	sample_size	survey_period	demshare_pres
0	Alabama	2008	AL	5024279	1	0.064888	0.209469	0.214234	3.522654	0.018688	0.927312	0.016236	3262	2004-2011	0.391091
1	Alabama	2016	AL	5024279	1	0.036347	0.289731	0.234576	3.312552	0.021363	0.910887	0.017608	2676	2012-2016	0.356259
2	Alabama	2020	AL	5024279	1	0.036717	0.198308	0.208188	3.278374	0.011646	0.941574	0.009281	10293	2017-2021	0.370886
3	Alaska	2008	AK	733391	2	0.073391	0.167004	0.125482	3.297297	0.040185	1.008114	0.034887	835	2004-2011	0.389352
4	Alaska	2016	AK	733391	2	0.050497	0.196960	0.167457	3.225201	0.053345	1.055494	0.053175	394	2012-2016	0.416143

Basic analysis of U.S. political ideology by state¶

Juan Andrés Cabral¶