# how to select rows and columns in pandas
# Credit : Trifonas Papadopoulos, Oct 4th, 2022, Version 2.0
# start of code -----------------------------------------------------------------------------------------
import pandas as pd
import numpy as np
# data
data = [['A0001','','','',0,'variable',0,0],
['A0001','WHITE','S','Small',1,'variation',0,0],
['A0001','WHITE','M','Medium',2,'variation',0,0],
['A0001','WHITE','L','Large',3,'variation',0,0],
['A0001','WHITE','XL','Extra Large',4,'variation',0,0],
['A0001','BLACK','L','Large',5,'variation',0,0],
['A0003','','','',6,'variable',0,0],
['A0003','BLACK','XL','Extra Large',7,'variation',0,0],
['A0002','','','',8,'variable',0,0],
['A0002','BLACK','L','Large',9,'variation',0,0],
['A0002','BLACK','XL','Extra Large',10,'variation',0,0],
['A0004','','','',12,'variable',0,0],
['A0004','WHITE','S','Small',12,'variation',0,0],
['A0004','RED','S','Small',13,'variation',0,0],
['A0004','RED','M','Medium',14,'variation',0,0],
['A0001','WHITE','XXL','2 Extra Large',15,'variation',0,0],
]
# print list of columns
print('--list of columns-----------------------------------------------------------')
cols = list(df.columns.values)
print(cols)
# dataframe with defined column names
df = pd.DataFrame(data, columns=['parent', 'color', 'size', 'size_desc', 'order', 'type', 'quantity','price'])
print('')
print('--dataframe with defined column names---------------------------------------')
print(df)
--list of columns----------------------------------------------------------- ['parent', 'color', 'size', 'size_desc', 'order', 'type', 'quantity', 'price'] --dataframe with defined column names--------------------------------------- parent color size size_desc order type quantity price 0 A0001 0 variable 0 0 1 A0001 WHITE S Small 1 variation 0 0 2 A0001 WHITE M Medium 2 variation 0 0 3 A0001 WHITE L Large 3 variation 0 0 4 A0001 WHITE XL Extra Large 4 variation 0 0 5 A0001 BLACK L Large 5 variation 0 0 6 A0003 6 variable 0 0 7 A0003 BLACK XL Extra Large 7 variation 0 0 8 A0002 8 variable 0 0 9 A0002 BLACK L Large 9 variation 0 0 10 A0002 BLACK XL Extra Large 10 variation 0 0 11 A0004 12 variable 0 0 12 A0004 WHITE S Small 12 variation 0 0 13 A0004 RED S Small 13 variation 0 0 14 A0004 RED M Medium 14 variation 0 0 15 A0001 WHITE XXL 2 Extra Large 15 variation 0 0
# sort by multiple columns: parent, color and size
print('')
print('--data sort by columns (0)parent, (1)color and (2)size----------------------')
df.sort_values(by=['parent','color','size'], inplace=True)
print(df)
--data sort by columns (0)parent, (1)color and (2)size---------------------- parent color size size_desc order type quantity price 0 A0001 0 variable 0 0 5 A0001 BLACK L Large 5 variation 0 0 3 A0001 WHITE L Large 3 variation 0 0 2 A0001 WHITE M Medium 2 variation 0 0 1 A0001 WHITE S Small 1 variation 0 0 4 A0001 WHITE XL Extra Large 4 variation 0 0 15 A0001 WHITE XXL 2 Extra Large 15 variation 0 0 8 A0002 8 variable 0 0 9 A0002 BLACK L Large 9 variation 0 0 10 A0002 BLACK XL Extra Large 10 variation 0 0 6 A0003 6 variable 0 0 7 A0003 BLACK XL Extra Large 7 variation 0 0 11 A0004 12 variable 0 0 14 A0004 RED M Medium 14 variation 0 0 13 A0004 RED S Small 13 variation 0 0 12 A0004 WHITE S Small 12 variation 0 0
# selection of certain columns
print('')
print('----------------------------------------------------------------------------')
df_four_col = df[['parent', 'color','order', 'size']]
print("selection of columns : parent, color, order and size" , df_four_col, sep='\n')
---------------------------------------------------------------------------- selection of columns : parent, color, order and size parent color order size 0 A0001 0 5 A0001 BLACK 5 L 3 A0001 WHITE 3 L 2 A0001 WHITE 2 M 1 A0001 WHITE 1 S 4 A0001 WHITE 4 XL 15 A0001 WHITE 15 XXL 8 A0002 8 9 A0002 BLACK 9 L 10 A0002 BLACK 10 XL 6 A0003 6 7 A0003 BLACK 7 XL 11 A0004 12 14 A0004 RED 14 M 13 A0004 RED 13 S 12 A0004 WHITE 12 S
# selection of certain columns with filter
print('')
print('----------------------------------------------------------------------------')
df_selected_col = df.filter(like='size').head()
print("selection of columns with column name like 'size'" , df_selected_col, sep='\n')
---------------------------------------------------------------------------- selection of columns with column name like 'size' size size_desc 0 5 L Large 3 L Large 2 M Medium 1 S Small
# set of new columns 'price' and 'quantity' to 0
df['quantity'] = 1
df['price'] = 1
print('----------------------------------------------------------------------------')
print("columns : 'parent', 'color', 'size', 'size_desc', 'order', 'type' and creation of 2 new columns : 'quantity', 'price' " , df, sep='\n')
---------------------------------------------------------------------------- columns : 'parent', 'color', 'size', 'size_desc', 'order', 'type' and creation of 2 new columns : 'quantity', 'price' parent color size size_desc order type quantity price 0 A0001 0 variable 1 1 5 A0001 BLACK L Large 5 variation 1 1 3 A0001 WHITE L Large 3 variation 1 1 2 A0001 WHITE M Medium 2 variation 1 1 1 A0001 WHITE S Small 1 variation 1 1 4 A0001 WHITE XL Extra Large 4 variation 1 1 15 A0001 WHITE XXL 2 Extra Large 15 variation 1 1 8 A0002 8 variable 1 1 9 A0002 BLACK L Large 9 variation 1 1 10 A0002 BLACK XL Extra Large 10 variation 1 1 6 A0003 6 variable 1 1 7 A0003 BLACK XL Extra Large 7 variation 1 1 11 A0004 12 variable 1 1 14 A0004 RED M Medium 14 variation 1 1 13 A0004 RED S Small 13 variation 1 1 12 A0004 WHITE S Small 12 variation 1 1
# changing the order of columns with groups
group1 = ['color', 'parent']
group2 = ['order']
group3 = ['size']
group4 = ['quantity','price']
new_cols = group1+group2+group3+group4
set(df.columns)==set(new_cols)
df_2=df[new_cols]
print('')
print('----------------------------------------------------------------------------')
print("order of columns by (group1 : 'color', 'parent') and (group2 : 'order') and (group3 : 'size') and (group4 : 'quantity', 'price')" , df_2, sep='\n')
---------------------------------------------------------------------------- order of columns by (group1 : 'color', 'parent') and (group2 : 'order') and (group3 : 'size') and (group4 : 'quantity', 'price') color parent order size quantity price 0 A0001 0 1 1 5 BLACK A0001 5 L 1 1 3 WHITE A0001 3 L 1 1 2 WHITE A0001 2 M 1 1 1 WHITE A0001 1 S 1 1 4 WHITE A0001 4 XL 1 1 15 WHITE A0001 15 XXL 1 1 8 A0002 8 1 1 9 BLACK A0002 9 L 1 1 10 BLACK A0002 10 XL 1 1 6 A0003 6 1 1 7 BLACK A0003 7 XL 1 1 11 A0004 12 1 1 14 RED A0004 14 M 1 1 13 RED A0004 13 S 1 1 12 WHITE A0004 12 S 1 1
# sort by columns position : (0)parent and (1)color
data.sort(key = lambda x: (x[0],x[1]))
print('')
print("data sort col 0 (parent), col 1 (color)" , data, sep='\n')
# sort by columns position : (4)order and (1)color
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[4],x[1]))
print("data sort col 4 (order), col 1 (color)" , data, sep='\n')
# sort by columns position : (1)color and (0)parent
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[1],x[0]))
print("data sort col 1 (color), col 0 (parent)" , data, sep='\n')
# sort by columns position : (4)order and (0)parent
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[4],x[0]))
print("data sort col 4 (order), col 0 (parent)" , data, sep='\n')
# sort by columns position : (4)order
print('----------------------------------------------------------------------------')
data_sorted = sorted(data, key=lambda x: x[4])
print("data sort col 4 (order)" , data_sorted, sep='\n')
# sort by columns position : (0)parent, (5)type, (1)color and (4)order
print('----------------------------------------------------------------------------')
data_sorted2 = sorted(data, key=lambda x: (x[0],x[5],x[1],x[4]))
print("data sort col 0 (parent), col 5 (type), col 1 (color), col 4 (order)" , data_sorted2, sep='\n')
# end of code -----------------------------------------------------------------------------------------
data sort col 0 (parent), col 1 (color) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]] ---------------------------------------------------------------------------- data sort col 4 (order), col 1 (color) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]] ---------------------------------------------------------------------------- data sort col 1 (color), col 0 (parent) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]] ---------------------------------------------------------------------------- data sort col 4 (order), col 0 (parent) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]] ---------------------------------------------------------------------------- data sort col 4 (order) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]] ---------------------------------------------------------------------------- data sort col 0 (parent), col 5 (type), col 1 (color), col 4 (order) [['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]]