import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
df = pd.read_csv("117612.csv")
CS 361 students answered the survey below.
Q1: What's your name? (student names removed for privacy)
Q2: We will roll a 6-sided die. Guess how many rolls will it take to see the first 6.
Q3: If the prize were $100, what's the most you would bet (in dollars) to play this game?
Q4: Have you ever been to Las Vegas?
plt.subplot(1, 2, 1)
plt.hist(df["Q2:Rolls"], bins=np.arange(0, 101, 10), color="r", ec="k")
plt.xlabel("Rolls")
plt.ylabel("Count")
plt.title("Histogram of rolls")
plt.subplot(1, 2, 2)
plt.hist(df["Q3:Bet"], bins=np.arange(0, 101, 10), color="b", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bet")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
print("ROLLS")
print("Mean:", df["Q2:Rolls"].mean())
print("Standard deviation:", df["Q2:Rolls"].std())
print("Variance:", df["Q2:Rolls"].var())
print("BETS")
print("Mean:", df["Q3:Bet"].mean())
print("Standard deviation:", df["Q3:Bet"].std())
print("Variance:", df["Q3:Bet"].var())
rolls_standardized = (df["Q2:Rolls"]-df["Q2:Rolls"].mean())/df["Q2:Rolls"].std()
bets_standardized = (df["Q3:Bet"]-df["Q3:Bet"].mean())/df["Q3:Bet"].std()
plt.subplot(1, 2, 1)
plt.hist(rolls_standardized, bins=10, color="r", ec="k")
plt.xlabel("Rolls")
plt.ylabel("Count")
plt.title("Histogram of standardized rolls")
plt.subplot(1, 2, 2)
plt.hist(bets_standardized, bins=10, color="b", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of standardized bet")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
print("ROLLS")
print("Median:", df["Q2:Rolls"].median())
print("Interquartile range:", stats.iqr(df["Q2:Rolls"]))
print("BETS")
print("Median:", df["Q3:Bet"].median())
print("Interquartile range:", stats.iqr(df["Q3:Bet"]))
plt.subplot(1, 2, 1)
plt.boxplot(df["Q2:Rolls"], labels=["rolls"])
plt.title("Box plot of rolls")
plt.subplot(1, 2, 2)
plt.boxplot(df["Q3:Bet"], labels=["bet"])
plt.title("Box plot of bet")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
df_no_outliers = df[df["Q2:Rolls"] <= np.percentile(df["Q2:Rolls"],75)+1.5*stats.iqr(df["Q2:Rolls"])]
df_no_outliers = df_no_outliers[df_no_outliers["Q3:Bet"] <= np.percentile(df["Q3:Bet"],75)+1.5*stats.iqr(df["Q3:Bet"])]
df_no_outliers.count()
plt.subplot(1, 2, 1)
plt.hist(df_no_outliers["Q2:Rolls"], bins=np.arange(0.5, 9.5, 1), color="r", ec="k")
plt.xlabel("Rolls")
plt.ylabel("Count")
plt.title("Histogram of rolls, outliers removed")
plt.subplot(1, 2, 2)
plt.hist(df_no_outliers["Q3:Bet"], bins=8, color="b", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bet, outliers removed")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
print("ROLLS")
print("Mean:", df_no_outliers["Q2:Rolls"].mean(), "(used to be", df["Q2:Rolls"].mean(), ")")
print("Standard deviation:", df_no_outliers["Q2:Rolls"].std(), "(used to be", df["Q2:Rolls"].std(), ")")
print("Median:", df_no_outliers["Q2:Rolls"].median(), "(used to be", df["Q2:Rolls"].median(), ")")
print("Interquartile range:", stats.iqr(df_no_outliers["Q2:Rolls"]), "(used to be", stats.iqr(df["Q2:Rolls"]), ")")
print("BETS")
print("Mean:", df_no_outliers["Q3:Bet"].mean(), "(used to be", df["Q3:Bet"].mean(), ")")
print("Standard deviation:", df_no_outliers["Q3:Bet"].std(), "(used to be", df["Q3:Bet"].std(), ")")
print("Median:", df_no_outliers["Q3:Bet"].median(), "(used to be", df["Q3:Bet"].median(), ")")
print("Interquartile range:", stats.iqr(df_no_outliers["Q3:Bet"]), "(used to be", stats.iqr(df["Q3:Bet"]), ")")
Data from Yahoo Finance (https://finance.yahoo.com/quote/FDX/history and https://finance.yahoo.com/quote/UPS/history)
fdx = pd.read_csv("FDX.csv").rename(index=str, columns={"Adj Close": "FDX"}).filter(['Date','FDX'])
ups = pd.read_csv("UPS.csv").rename(index=str, columns={"Adj Close": "UPS"}).filter(['UPS'])
stock_data = pd.concat([fdx, ups], axis=1, sort=False)
stock_data['Date'] = pd.to_datetime(stock_data['Date'], infer_datetime_format=True)
stock_data.tail()
plt.figure(figsize=(12,5))
plt.plot(stock_data.FDX, color="mediumorchid", linestyle="--", label='FDX')
plt.plot(stock_data.UPS, color="chocolate", linestyle="-", label='UPS')
plt.legend()
plt.xticks([0, 165], ["Jan 2", "Aug 28"])
plt.ylabel("Price in USD")
plt.title("Daily stock prices in 2018: FDX and UPS")
stock_data_standardized = stock_data.copy()
stock_data_standardized.FDX = (stock_data.FDX-stock_data.FDX.mean())/stock_data.FDX.std()
stock_data_standardized.UPS = (stock_data.UPS-stock_data.UPS.mean())/stock_data.UPS.std()
plt.figure(figsize=(12,5))
plt.plot(stock_data_standardized.FDX, color="mediumorchid", linestyle="--", label='FDX')
plt.plot(stock_data_standardized.UPS, color="chocolate", linestyle="-", label='UPS')
plt.legend()
plt.xticks([0, 165], ["Jan 2", "Aug 28"])
plt.ylabel("Price in USD")
plt.title("Standardized daily stock prices in 2018: FDX and UPS")
plt.subplot(1, 2, 1)
plt.scatter(stock_data.FDX, stock_data.UPS, color="red")
plt.title("Scatter plot of UPS vs FDX stock prices in 2018")
plt.xlabel("FDX stock price (USD)")
plt.ylabel("UPS stock price (USD)")
plt.subplot(1, 2, 2)
plt.scatter(stock_data_standardized.FDX, stock_data_standardized.UPS, color="blue")
plt.title("Scatter plot of standardized UPS vs FDX stock prices in 2018")
plt.xlabel("FDX stock price (USD)")
plt.ylabel("UPS stock price (USD)")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
print(stock_data.corr())
plt.subplot(1, 2, 1)
plt.scatter(df["Q2:Rolls"], df["Q3:Bet"], color="red")
plt.title("Scatter plot of Bet vs Rolls")
plt.xlabel("Rolls")
plt.ylabel("Bet")
plt.subplot(1, 2, 2)
plt.scatter(df_no_outliers["Q2:Rolls"], df_no_outliers["Q3:Bet"], color="blue")
plt.title("Scatter plot of Bet vs Rolls, outliers removed")
plt.xlabel("Rolls")
plt.ylabel("Bet")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
print("WITH OUTLIERS")
print(df.corr())
print("WITHOUT OUTLIERS")
print(df_no_outliers.corr())