Stats Project Data
Stats Data Analysis
import pandas as pd
import matplotlib.pyplot as plt
def performAnalysis(csv, save_path):
df = pd.read_csv(csv)
# Correct column names
iphone_users_col = df.columns[2]
buy_new_iphone_col = df.columns[1]
# Filter data for iPhone users and non-iPhone users
iphone_users = df[df[iphone_users_col] == 'Yes']
non_iphone_users = df[df[iphone_users_col] == 'No']
# Filter data for users who want to buy a new iPhone and those who do not
iphone_users_buy = iphone_users[iphone_users[buy_new_iphone_col] == 'Yes']
iphone_users_no_buy = iphone_users[iphone_users[buy_new_iphone_col] == 'No']
non_iphone_users_buy = non_iphone_users[non_iphone_users[buy_new_iphone_col] == 'Yes']
non_iphone_users_no_buy = non_iphone_users[non_iphone_users[buy_new_iphone_col] == 'No']
print(f"iPhone users who want to buy a new iPhone: {len(iphone_users_buy)} out of {len(iphone_users)} total iPhone users.")
print(f"Non-iPhone users who want to buy a new iPhone: {len(non_iphone_users_buy)} out of {len(non_iphone_users)} total non-iPhone users.")
# Calculate percentages
iphone_users_buy_percentage = len(iphone_users_buy) / len(iphone_users)
iphone_users_no_buy_percentage = 1 - iphone_users_buy_percentage
non_iphone_users_buy_percentage = len(non_iphone_users_buy) / len(non_iphone_users)
non_iphone_users_no_buy_percentage = 1 - non_iphone_users_buy_percentage
print(
f"iPhone Users: Buy - {iphone_users_buy_percentage:.2%}, No Buy - {iphone_users_no_buy_percentage:.2%}")
print(
f"Non-iPhone Users: Buy - {non_iphone_users_buy_percentage:.2%}, No Buy - {non_iphone_users_no_buy_percentage:.2%}")
# Create a relative frequency distribution in matplotlib
categories = ['iPhone Users', 'Non-iPhone Users']
buy_percentages = [iphone_users_buy_percentage, non_iphone_users_buy_percentage]
no_buy_percentages = [iphone_users_no_buy_percentage, non_iphone_users_no_buy_percentage]
width = 0.35
x = range(len(categories))
plt.bar(x, buy_percentages, width, label='Yes', color='green')
plt.bar(x, no_buy_percentages, width, label='No', color='red', bottom=buy_percentages)
plt.xlabel('User Groups')
plt.ylabel('Relative Frequency')
plt.title('Would you buy a new iPhone? - Unbiased')
plt.xticks(x, categories)
plt.legend()
# Save the plot as an image
plt.savefig(save_path)
plt.show()
performAnalysis("/home/tirth/vscode/Mort-Pages-Personal/_data/CSV_Stats_Data/UnbiasSurvey.csv", "UnbiasedDistribution.png")
iPhone users who want to buy a new iPhone: 20 out of 26 total iPhone users.
Non-iPhone users who want to buy a new iPhone: 2 out of 5 total non-iPhone users.
iPhone Users: Buy - 76.92%, No Buy - 23.08%
Non-iPhone Users: Buy - 40.00%, No Buy - 60.00%
import pandas as pd
import matplotlib.pyplot as plt
def performAnalysis(csv, save_path):
df = pd.read_csv(csv)
# Correct column names
iphone_users_col = df.columns[2]
buy_new_iphone_col = df.columns[1]
# Filter data for iPhone users and non-iPhone users
iphone_users = df[df[iphone_users_col] == 'Yes']
non_iphone_users = df[df[iphone_users_col] == 'No']
# Filter data for users who want to buy a new iPhone and those who do not
iphone_users_buy = iphone_users[iphone_users[buy_new_iphone_col] == 'Yes']
iphone_users_no_buy = iphone_users[iphone_users[buy_new_iphone_col] == 'No']
non_iphone_users_buy = non_iphone_users[non_iphone_users[buy_new_iphone_col] == 'Yes']
non_iphone_users_no_buy = non_iphone_users[non_iphone_users[buy_new_iphone_col] == 'No']
print(f"iPhone users who want to buy a new iPhone: {len(iphone_users_buy)} out of {len(iphone_users)} total iPhone users.")
print(f"Non-iPhone users who want to buy a new iPhone: {len(non_iphone_users_buy)} out of {len(non_iphone_users)} total non-iPhone users.")
# Calculate percentages
iphone_users_buy_percentage = len(iphone_users_buy) / len(iphone_users)
iphone_users_no_buy_percentage = 1 - iphone_users_buy_percentage
non_iphone_users_buy_percentage = len(non_iphone_users_buy) / len(non_iphone_users)
non_iphone_users_no_buy_percentage = 1 - non_iphone_users_buy_percentage
print(
f"iPhone Users: Buy - {iphone_users_buy_percentage:.2%}, No Buy - {iphone_users_no_buy_percentage:.2%}")
print(
f"Non-iPhone Users: Buy - {non_iphone_users_buy_percentage:.2%}, No Buy - {non_iphone_users_no_buy_percentage:.2%}")
# Create a relative frequency distribution in matplotlib
categories = ['iPhone Users', 'Non-iPhone Users']
buy_percentages = [iphone_users_buy_percentage, non_iphone_users_buy_percentage]
no_buy_percentages = [iphone_users_no_buy_percentage, non_iphone_users_no_buy_percentage]
width = 0.35
x = range(len(categories))
plt.bar(x, buy_percentages, width, label='Yes', color='green')
plt.bar(x, no_buy_percentages, width, label='No!', color='red', bottom=buy_percentages)
plt.xlabel('User Groups')
plt.ylabel('Relative Frequency')
plt.title('Would you buy a new iPhone? - Biased')
plt.xticks(x, categories)
plt.legend()
# Save the plot as an image
plt.savefig(save_path)
plt.show()
performAnalysis("/home/tirth/vscode/Mort-Pages-Personal/_data/CSV_Stats_Data/BiasSurvey.csv", "BiasedDistribution.png")
iPhone users who want to buy a new iPhone: 20 out of 34 total iPhone users.
Non-iPhone users who want to buy a new iPhone: 1 out of 2 total non-iPhone users.
iPhone Users: Buy - 58.82%, No Buy - 41.18%
Non-iPhone Users: Buy - 50.00%, No Buy - 50.00%