In [36]:
import pandas as pd
In [37]:
import numpy as np
In [38]:
from matplotlib import pyplot as plt
In [39]:
df = pd.read_csv('sample_data_sales.csv')
print (df)
    month  budget_sales  actual_sales
0  Jan 21          4000          3800
1  Feb 21          5000          5100
2  Mar 21          5500          5400
3  Apr 21          6000          5900
4  May 21          5000          4500
5  Jun 21          7000          6500
In [40]:
plt.rcParams["figure.figsize"] = [8.50, 4.50]
plt.rcParams["figure.autolayout"] = True
df.set_index('month').plot()
plt.title("Sales / Jan 21 - Jun 21")
plt.xlabel("Month")
plt.ylabel("Budget and Actual")
plt.legend(["Budget", "Actual"])
plt.show()
In [41]:
# find difference between budget sales and actual sales 
budget_sum = df["budget_sales"].sum()
actual_sum = df["actual_sales"].sum()
diff_between_budget_and_actual = -(budget_sum - actual_sum)
diff_between_budget_and_actual_perc = ((actual_sum)/budget_sum)*100
print("Budget Sales sum:",budget_sum)
print("Actual Sales sum:",actual_sum)
print("Difference between Budget Sales sum and Actual Sales sum - Euro:", diff_between_budget_and_actual)
print("Actual Sales sum is", diff_between_budget_and_actual_perc, "% of Budget Sales sum.")
Budget Sales sum: 32500
Actual Sales sum: 31200
Difference between Budget Sales sum and Actual Sales sum - Euro: -1300
Actual Sales sum is 96.0 % of Budget Sales sum.
In [42]:
# add new columns difference_value and difference_perc
df = df.assign(difference_value = -(df["budget_sales"] - df["actual_sales"]))
difference_perc_var = -(((df["budget_sales"] - df["actual_sales"])/df["budget_sales"])*100)
df = df.assign(difference_perc = difference_perc_var)
print (df)
    month  budget_sales  actual_sales  difference_value  difference_perc
0  Jan 21          4000          3800              -200        -5.000000
1  Feb 21          5000          5100               100         2.000000
2  Mar 21          5500          5400              -100        -1.818182
3  Apr 21          6000          5900              -100        -1.666667
4  May 21          5000          4500              -500       -10.000000
5  Jun 21          7000          6500              -500        -7.142857
In [43]:
# save to new csv
df.to_csv("sample_data_sales_with_difference.csv", index=False)
In [44]:
# read new csv
data = pd.read_csv("sample_data_sales_with_difference.csv")
print (data)
    month  budget_sales  actual_sales  difference_value  difference_perc
0  Jan 21          4000          3800              -200        -5.000000
1  Feb 21          5000          5100               100         2.000000
2  Mar 21          5500          5400              -100        -1.818182
3  Apr 21          6000          5900              -100        -1.666667
4  May 21          5000          4500              -500       -10.000000
5  Jun 21          7000          6500              -500        -7.142857
In [45]:
# take data for new csv and create dataframe
data = pd.read_csv("sample_data_sales_with_difference.csv")

print ("------------------------------------------------------------------------------------")
print ("sample_data_sales_with_difference.csv", "view all columns")
print (data)
 
df3 = pd.DataFrame(data, columns=["month", "budget_sales", "actual_sales"])

print ("------------------------------------------------------------------------------------")
print ("sample_data_sales_with_difference.csv", "view columns : month, budget_sales, actual_sales")
print (df3)
 
# plot the dataframe
import seaborn as sns
sns.set_style("dark")
df3.plot(x="month", y=["budget_sales", "actual_sales"], kind="bar")

plt.rcParams["figure.figsize"] = [8.50, 6.50]
plt.rcParams["figure.autolayout"] = True
plt.xticks(rotation=30, horizontalalignment="center")
plt.title("Sales / Jan 21 - Jun 21")
plt.xlabel("Month")
plt.ylabel("Budget and Actual")
plt.legend(["Budget", "Actual"])

 
# print bar graph
plt.show()
------------------------------------------------------------------------------------
sample_data_sales_with_difference.csv view all columns
    month  budget_sales  actual_sales  difference_value  difference_perc
0  Jan 21          4000          3800              -200        -5.000000
1  Feb 21          5000          5100               100         2.000000
2  Mar 21          5500          5400              -100        -1.818182
3  Apr 21          6000          5900              -100        -1.666667
4  May 21          5000          4500              -500       -10.000000
5  Jun 21          7000          6500              -500        -7.142857
------------------------------------------------------------------------------------
sample_data_sales_with_difference.csv view columns : month, budget_sales, actual_sales
    month  budget_sales  actual_sales
0  Jan 21          4000          3800
1  Feb 21          5000          5100
2  Mar 21          5500          5400
3  Apr 21          6000          5900
4  May 21          5000          4500
5  Jun 21          7000          6500
In [46]:
plt.rcParams["figure.figsize"] = [8.50, 6.50]
plt.rcParams["figure.autolayout"] = True

cols = df3.columns
figure, ax1 = plt.subplots()
ax1.plot(df3[cols[0]],df3[cols[1]],linewidth=1,zorder=1, label = "Force1")
ax1.plot(df3[cols[0]],df3[cols[2]],linewidth=1,zorder=1, label = "Force2")
plt.title("Sales / Jan 21 - Jun 21")
plt.xlabel("Month")
plt.ylabel("Budget and Actual")
plt.legend(["Budget", "Actual",])

plt.show()
In [47]:
budget_sales_mean = df["budget_sales"].mean()
print ("Budget Sales mean is:", budget_sales_mean)
Budget Sales mean is: 5416.666666666667
In [48]:
actual_sales_mean = df["actual_sales"].mean()
print ("Actual Sales mean is:", actual_sales_mean)
Actual Sales mean is: 5200.0
In [49]:
# thanks for watching! :)