import pandas as pd
df1 = pd.read_excel('product_data_excel.xlsx')
print (df1)
product_id product_description quantity regular_price category 0 A0001 product 1 10 12 workwear 1 A0002 product 2 11 12 workwear 2 A0003 product 3 12 12 workwear 3 A0004 product 4 13 12 workwear 4 A0005 product 5 14 12 workwear .. ... ... ... ... ... 94 A0095 product 95 104 12 workwear 95 A0096 product 96 105 12 workwear 96 A0097 product 97 106 12 workwear 97 A0098 product 98 107 12 workwear 98 A0099 product 99 108 12 workwear [99 rows x 5 columns]
df2 = pd.read_excel('product_data_excel_2.xlsx')
print (df2)
product_id product_description quantity regular_price category 0 A0001 product 1 11 13.5 workwear 1 A0002 product 2 11 12.0 workwear 2 A0003 product 3 14 11.0 workwear 3 A0004 product 4 13 12.0 workwear 4 A0005 product 5 14 12.0 workwear .. ... ... ... ... ... 102 A0103 product 103 2 10.0 workwear 103 A0104 product 104 9 6.0 workwear 104 A0105 product 105 12 8.0 workwear 105 A0106 product 106 23 6.0 workwear 106 A0107 product 107 10 5.0 workwear [107 rows x 5 columns]
# Method : isin function/method
# find existing records with changes in product_data_excel_2.xlsx (df2)
result1 = df1[~df1.apply(tuple, 1).isin(df2.apply(tuple, 1))]
print(result1)
product_id product_description quantity regular_price category 0 A0001 product 1 10 12 workwear 2 A0003 product 3 12 12 workwear
# Method : isin function/method
# find existing records with no changes in product_data_excel_2.xlsx (df2)
result2 = df1[df1.apply(tuple, 1).isin(df2.apply(tuple, 1))]
print(result2)
product_id product_description quantity regular_price category 1 A0002 product 2 11 12 workwear 3 A0004 product 4 13 12 workwear 4 A0005 product 5 14 12 workwear 5 A0006 product 6 15 12 workwear 6 A0007 product 7 16 12 workwear .. ... ... ... ... ... 94 A0095 product 95 104 12 workwear 95 A0096 product 96 105 12 workwear 96 A0097 product 97 106 12 workwear 97 A0098 product 98 107 12 workwear 98 A0099 product 99 108 12 workwear [97 rows x 5 columns]
# Method : isin function/method
# find existing records with changes and new records in product_data_excel_2.xlsx (df2)
result3 = df2[~df2.apply(tuple, 1).isin(df1.apply(tuple, 1))]
print(result3)
product_id product_description quantity regular_price category 0 A0001 product 1 11 13.5 workwear 2 A0003 product 3 14 11.0 workwear 99 A0100 product 100 5 11.0 workwear 100 A0101 product 101 7 22.0 workwear 101 A0102 product 102 9 17.5 workwear 102 A0103 product 103 2 10.0 workwear 103 A0104 product 104 9 6.0 workwear 104 A0105 product 105 12 8.0 workwear 105 A0106 product 106 23 6.0 workwear 106 A0107 product 107 10 5.0 workwear
display(df1, df2)
product_id | product_description | quantity | regular_price | category | |
---|---|---|---|---|---|
0 | A0001 | product 1 | 10 | 12 | workwear |
1 | A0002 | product 2 | 11 | 12 | workwear |
2 | A0003 | product 3 | 12 | 12 | workwear |
3 | A0004 | product 4 | 13 | 12 | workwear |
4 | A0005 | product 5 | 14 | 12 | workwear |
... | ... | ... | ... | ... | ... |
94 | A0095 | product 95 | 104 | 12 | workwear |
95 | A0096 | product 96 | 105 | 12 | workwear |
96 | A0097 | product 97 | 106 | 12 | workwear |
97 | A0098 | product 98 | 107 | 12 | workwear |
98 | A0099 | product 99 | 108 | 12 | workwear |
99 rows × 5 columns
product_id | product_description | quantity | regular_price | category | |
---|---|---|---|---|---|
0 | A0001 | product 1 | 11 | 13.5 | workwear |
1 | A0002 | product 2 | 11 | 12.0 | workwear |
2 | A0003 | product 3 | 14 | 11.0 | workwear |
3 | A0004 | product 4 | 13 | 12.0 | workwear |
4 | A0005 | product 5 | 14 | 12.0 | workwear |
... | ... | ... | ... | ... | ... |
102 | A0103 | product 103 | 2 | 10.0 | workwear |
103 | A0104 | product 104 | 9 | 6.0 | workwear |
104 | A0105 | product 105 | 12 | 8.0 | workwear |
105 | A0106 | product 106 | 23 | 6.0 | workwear |
106 | A0107 | product 107 | 10 | 5.0 | workwear |
107 rows × 5 columns
# save changed existing records and new records to excel
result3.to_excel("product_data_excel_changed_and_new_records.xlsx", index = False, sheet_name='Sheet1')
# save changed existing records and new records to CSV
result3.to_csv("product_data_excel_changed_and_new_records.csv", index = False)