In [176]:
# how to select rows and columns in pandas

# Credit : Trifonas Papadopoulos, Oct 4th, 2022, Version 2.0 
# start of code -----------------------------------------------------------------------------------------

import pandas as pd
import numpy as np

# data
data = [['A0001','','','',0,'variable',0,0],
            ['A0001','WHITE','S','Small',1,'variation',0,0],
            ['A0001','WHITE','M','Medium',2,'variation',0,0],
            ['A0001','WHITE','L','Large',3,'variation',0,0],
            ['A0001','WHITE','XL','Extra Large',4,'variation',0,0],
            ['A0001','BLACK','L','Large',5,'variation',0,0],
            ['A0003','','','',6,'variable',0,0],
            ['A0003','BLACK','XL','Extra Large',7,'variation',0,0],
            ['A0002','','','',8,'variable',0,0],
            ['A0002','BLACK','L','Large',9,'variation',0,0],
            ['A0002','BLACK','XL','Extra Large',10,'variation',0,0],
            ['A0004','','','',12,'variable',0,0],
            ['A0004','WHITE','S','Small',12,'variation',0,0],
            ['A0004','RED','S','Small',13,'variation',0,0],
            ['A0004','RED','M','Medium',14,'variation',0,0],
            ['A0001','WHITE','XXL','2 Extra Large',15,'variation',0,0],
           ]
            
    # print list of columns
print('--list of columns-----------------------------------------------------------')
cols = list(df.columns.values)
print(cols)

# dataframe with defined column names
df = pd.DataFrame(data, columns=['parent', 'color', 'size', 'size_desc', 'order', 'type', 'quantity','price'])
print('')
print('--dataframe with defined column names---------------------------------------')
print(df)
--list of columns-----------------------------------------------------------
['parent', 'color', 'size', 'size_desc', 'order', 'type', 'quantity', 'price']

--dataframe with defined column names---------------------------------------
   parent  color size      size_desc  order       type  quantity  price
0   A0001                                 0   variable         0      0
1   A0001  WHITE    S          Small      1  variation         0      0
2   A0001  WHITE    M         Medium      2  variation         0      0
3   A0001  WHITE    L          Large      3  variation         0      0
4   A0001  WHITE   XL    Extra Large      4  variation         0      0
5   A0001  BLACK    L          Large      5  variation         0      0
6   A0003                                 6   variable         0      0
7   A0003  BLACK   XL    Extra Large      7  variation         0      0
8   A0002                                 8   variable         0      0
9   A0002  BLACK    L          Large      9  variation         0      0
10  A0002  BLACK   XL    Extra Large     10  variation         0      0
11  A0004                                12   variable         0      0
12  A0004  WHITE    S          Small     12  variation         0      0
13  A0004    RED    S          Small     13  variation         0      0
14  A0004    RED    M         Medium     14  variation         0      0
15  A0001  WHITE  XXL  2 Extra Large     15  variation         0      0
In [177]:
# sort by multiple columns: parent, color and size
print('')
print('--data sort by columns (0)parent, (1)color and (2)size----------------------')
df.sort_values(by=['parent','color','size'], inplace=True)
print(df)
--data sort by columns (0)parent, (1)color and (2)size----------------------
   parent  color size      size_desc  order       type  quantity  price
0   A0001                                 0   variable         0      0
5   A0001  BLACK    L          Large      5  variation         0      0
3   A0001  WHITE    L          Large      3  variation         0      0
2   A0001  WHITE    M         Medium      2  variation         0      0
1   A0001  WHITE    S          Small      1  variation         0      0
4   A0001  WHITE   XL    Extra Large      4  variation         0      0
15  A0001  WHITE  XXL  2 Extra Large     15  variation         0      0
8   A0002                                 8   variable         0      0
9   A0002  BLACK    L          Large      9  variation         0      0
10  A0002  BLACK   XL    Extra Large     10  variation         0      0
6   A0003                                 6   variable         0      0
7   A0003  BLACK   XL    Extra Large      7  variation         0      0
11  A0004                                12   variable         0      0
14  A0004    RED    M         Medium     14  variation         0      0
13  A0004    RED    S          Small     13  variation         0      0
12  A0004  WHITE    S          Small     12  variation         0      0
In [178]:
# selection of certain columns
print('')
print('----------------------------------------------------------------------------')
df_four_col = df[['parent', 'color','order', 'size']]
print("selection of columns : parent, color, order and size" , df_four_col, sep='\n')
----------------------------------------------------------------------------
selection of columns : parent, color, order and size
   parent  color  order size
0   A0001             0     
5   A0001  BLACK      5    L
3   A0001  WHITE      3    L
2   A0001  WHITE      2    M
1   A0001  WHITE      1    S
4   A0001  WHITE      4   XL
15  A0001  WHITE     15  XXL
8   A0002             8     
9   A0002  BLACK      9    L
10  A0002  BLACK     10   XL
6   A0003             6     
7   A0003  BLACK      7   XL
11  A0004            12     
14  A0004    RED     14    M
13  A0004    RED     13    S
12  A0004  WHITE     12    S
In [179]:
# selection of certain columns with filter
print('')
print('----------------------------------------------------------------------------')
df_selected_col = df.filter(like='size').head()
print("selection of columns with column name like 'size'" , df_selected_col, sep='\n')
----------------------------------------------------------------------------
selection of columns with column name like 'size'
  size size_desc
0               
5    L     Large
3    L     Large
2    M    Medium
1    S     Small
In [180]:
# set of new columns 'price' and 'quantity' to 0
df['quantity'] = 1
df['price'] = 1
print('----------------------------------------------------------------------------')
print("columns : 'parent', 'color', 'size', 'size_desc', 'order', 'type' and creation of 2 new columns : 'quantity', 'price' " , df, sep='\n')
----------------------------------------------------------------------------
columns : 'parent', 'color', 'size', 'size_desc', 'order', 'type' and creation of 2 new columns : 'quantity', 'price' 
   parent  color size      size_desc  order       type  quantity  price
0   A0001                                 0   variable         1      1
5   A0001  BLACK    L          Large      5  variation         1      1
3   A0001  WHITE    L          Large      3  variation         1      1
2   A0001  WHITE    M         Medium      2  variation         1      1
1   A0001  WHITE    S          Small      1  variation         1      1
4   A0001  WHITE   XL    Extra Large      4  variation         1      1
15  A0001  WHITE  XXL  2 Extra Large     15  variation         1      1
8   A0002                                 8   variable         1      1
9   A0002  BLACK    L          Large      9  variation         1      1
10  A0002  BLACK   XL    Extra Large     10  variation         1      1
6   A0003                                 6   variable         1      1
7   A0003  BLACK   XL    Extra Large      7  variation         1      1
11  A0004                                12   variable         1      1
14  A0004    RED    M         Medium     14  variation         1      1
13  A0004    RED    S          Small     13  variation         1      1
12  A0004  WHITE    S          Small     12  variation         1      1
In [181]:
# changing the order of columns with groups
group1 = ['color', 'parent']
group2 = ['order']
group3 = ['size']
group4 = ['quantity','price']
new_cols = group1+group2+group3+group4
set(df.columns)==set(new_cols)
df_2=df[new_cols]
print('')
print('----------------------------------------------------------------------------')
print("order of columns by (group1 : 'color', 'parent') and (group2 : 'order') and (group3 : 'size') and (group4 : 'quantity', 'price')" , df_2, sep='\n')
----------------------------------------------------------------------------
order of columns by (group1 : 'color', 'parent') and (group2 : 'order') and (group3 : 'size') and (group4 : 'quantity', 'price')
    color parent  order size  quantity  price
0          A0001      0              1      1
5   BLACK  A0001      5    L         1      1
3   WHITE  A0001      3    L         1      1
2   WHITE  A0001      2    M         1      1
1   WHITE  A0001      1    S         1      1
4   WHITE  A0001      4   XL         1      1
15  WHITE  A0001     15  XXL         1      1
8          A0002      8              1      1
9   BLACK  A0002      9    L         1      1
10  BLACK  A0002     10   XL         1      1
6          A0003      6              1      1
7   BLACK  A0003      7   XL         1      1
11         A0004     12              1      1
14    RED  A0004     14    M         1      1
13    RED  A0004     13    S         1      1
12  WHITE  A0004     12    S         1      1
In [182]:
# sort by columns position : (0)parent and (1)color
data.sort(key = lambda x: (x[0],x[1]))  
print('')
print("data sort col 0 (parent), col 1 (color)" , data, sep='\n')

# sort by columns position : (4)order and (1)color
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[4],x[1]))  
print("data sort col 4 (order), col 1 (color)" , data, sep='\n')

# sort by columns position : (1)color and (0)parent
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[1],x[0]))  
print("data sort col 1 (color), col 0 (parent)" , data, sep='\n')

# sort by columns position : (4)order and (0)parent
print('----------------------------------------------------------------------------')
data.sort(key = lambda x: (x[4],x[0]))  
print("data sort col 4 (order), col 0 (parent)" , data, sep='\n')

# sort by columns position : (4)order
print('----------------------------------------------------------------------------')
data_sorted = sorted(data, key=lambda x: x[4])
print("data sort col 4 (order)" , data_sorted, sep='\n')

# sort by columns position : (0)parent, (5)type, (1)color and (4)order
print('----------------------------------------------------------------------------')
data_sorted2 = sorted(data, key=lambda x: (x[0],x[5],x[1],x[4]))
print("data sort col 0 (parent), col 5 (type), col 1 (color), col 4 (order)" , data_sorted2, sep='\n')



# end of code -----------------------------------------------------------------------------------------
data sort col 0 (parent), col 1 (color)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]]
----------------------------------------------------------------------------
data sort col 4 (order), col 1 (color)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]]
----------------------------------------------------------------------------
data sort col 1 (color), col 0 (parent)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]]
----------------------------------------------------------------------------
data sort col 4 (order), col 0 (parent)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]]
----------------------------------------------------------------------------
data sort col 4 (order)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0]]
----------------------------------------------------------------------------
data sort col 0 (parent), col 5 (type), col 1 (color), col 4 (order)
[['A0001', '', '', '', 0, 'variable', 0, 0], ['A0001', 'BLACK', 'L', 'Large', 5, 'variation', 0, 0], ['A0001', 'WHITE', 'S', 'Small', 1, 'variation', 0, 0], ['A0001', 'WHITE', 'M', 'Medium', 2, 'variation', 0, 0], ['A0001', 'WHITE', 'L', 'Large', 3, 'variation', 0, 0], ['A0001', 'WHITE', 'XL', 'Extra Large', 4, 'variation', 0, 0], ['A0001', 'WHITE', 'XXL', '2 Extra Large', 15, 'variation', 0, 0], ['A0002', '', '', '', 8, 'variable', 0, 0], ['A0002', 'BLACK', 'L', 'Large', 9, 'variation', 0, 0], ['A0002', 'BLACK', 'XL', 'Extra Large', 10, 'variation', 0, 0], ['A0003', '', '', '', 6, 'variable', 0, 0], ['A0003', 'BLACK', 'XL', 'Extra Large', 7, 'variation', 0, 0], ['A0004', '', '', '', 12, 'variable', 0, 0], ['A0004', 'RED', 'S', 'Small', 13, 'variation', 0, 0], ['A0004', 'RED', 'M', 'Medium', 14, 'variation', 0, 0], ['A0004', 'WHITE', 'S', 'Small', 12, 'variation', 0, 0]]
In [ ]: