Filtering a Data Frame in Python

Filter a Data Frame

February 23, 2021 · 18 mins read

Filter a Data Frame with Pandas

import pandas as pd
stats = pd.read_csv('D:\\OneDrive - office365hubs.com\\.Python for data science\\Demographic-Data.csv')
stats.columns = ['CountryName','CountryCode','BirthRate','InternetUsers','IncomeGroup']
stats.head()
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
#Filtering is about Rows
stats.head()
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
stats.InternetUsers < 2
0      False
1      False
2      False
3      False
4      False
       ...  
190    False
191    False
192    False
193    False
194    False
Name: InternetUsers, Length: 195, dtype: bool
Filter = stats.InternetUsers < 2
Filter
0      False
1      False
2      False
3      False
4      False
       ...  
190    False
191    False
192    False
193    False
194    False
Name: InternetUsers, Length: 195, dtype: bool
stats[Filter]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
11 Burundi BDI 44.151 1.3 Low income
52 Eritrea ERI 34.800 0.9 Low income
55 Ethiopia ETH 32.925 1.9 Low income
64 Guinea GIN 37.337 1.6 Low income
117 Myanmar MMR 18.119 1.6 Lower middle income
127 Niger NER 49.661 1.7 Low income
154 Sierra Leone SLE 36.729 1.7 Low income
156 Somalia SOM 43.891 1.5 Low income
172 Timor-Leste TLS 35.755 1.1 Lower middle income
Filter.columns = ['Country Name']  #not working
stats[Filter]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
11 Burundi BDI 44.151 1.3 Low income
52 Eritrea ERI 34.800 0.9 Low income
55 Ethiopia ETH 32.925 1.9 Low income
64 Guinea GIN 37.337 1.6 Low income
117 Myanmar MMR 18.119 1.6 Lower middle income
127 Niger NER 49.661 1.7 Low income
154 Sierra Leone SLE 36.729 1.7 Low income
156 Somalia SOM 43.891 1.5 Low income
172 Timor-Leste TLS 35.755 1.1 Lower middle income
ourFilter = stats[Filter]
ourFilter
CountryName CountryCode BirthRate InternetUsers IncomeGroup
11 Burundi BDI 44.151 1.3 Low income
52 Eritrea ERI 34.800 0.9 Low income
55 Ethiopia ETH 32.925 1.9 Low income
64 Guinea GIN 37.337 1.6 Low income
117 Myanmar MMR 18.119 1.6 Lower middle income
127 Niger NER 49.661 1.7 Low income
154 Sierra Leone SLE 36.729 1.7 Low income
156 Somalia SOM 43.891 1.5 Low income
172 Timor-Leste TLS 35.755 1.1 Lower middle income
ourFilter.columns = ['Country Name',	'Country Code',	'Birth Rate','Internet Users','Income Group']
ourFilter
Country Name Country Code Birth Rate Internet Users Income Group
11 Burundi BDI 44.151 1.3 Low income
52 Eritrea ERI 34.800 0.9 Low income
55 Ethiopia ETH 32.925 1.9 Low income
64 Guinea GIN 37.337 1.6 Low income
117 Myanmar MMR 18.119 1.6 Lower middle income
127 Niger NER 49.661 1.7 Low income
154 Sierra Leone SLE 36.729 1.7 Low income
156 Somalia SOM 43.891 1.5 Low income
172 Timor-Leste TLS 35.755 1.1 Lower middle income
#Practice
stats.BirthRate > 40
0      False
1      False
2       True
3      False
4      False
       ...  
190    False
191    False
192     True
193     True
194    False
Name: BirthRate, Length: 195, dtype: bool
Filter2 = stats.BirthRate > 40
stats[Filter2]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
2 Angola AGO 45.985 19.1 Upper middle income
11 Burundi BDI 44.151 1.3 Low income
14 Burkina Faso BFA 40.551 9.1 Low income
65 Gambia, The GMB 42.525 14.0 Low income
115 Mali MLI 44.138 3.5 Low income
127 Niger NER 49.661 1.7 Low income
128 Nigeria NGA 40.045 38.0 Lower middle income
156 Somalia SOM 43.891 1.5 Low income
167 Chad TCD 45.745 2.3 Low income
178 Uganda UGA 43.474 16.2 Low income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
stats[stats.BirthRate>40]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
2 Angola AGO 45.985 19.1 Upper middle income
11 Burundi BDI 44.151 1.3 Low income
14 Burkina Faso BFA 40.551 9.1 Low income
65 Gambia, The GMB 42.525 14.0 Low income
115 Mali MLI 44.138 3.5 Low income
127 Niger NER 49.661 1.7 Low income
128 Nigeria NGA 40.045 38.0 Lower middle income
156 Somalia SOM 43.891 1.5 Low income
167 Chad TCD 45.745 2.3 Low income
178 Uganda UGA 43.474 16.2 Low income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
stats[(stats.BirthRate > 40) & (stats.InternetUsers < 2)]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
11 Burundi BDI 44.151 1.3 Low income
127 Niger NER 49.661 1.7 Low income
156 Somalia SOM 43.891 1.5 Low income
stats[stats.IncomeGroup == 'High income']
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.90 High income
4 United Arab Emirates ARE 11.044 88.00 High income
5 Argentina ARG 17.716 59.90 High income
7 Antigua and Barbuda ATG 16.447 63.40 High income
8 Australia AUS 13.200 83.00 High income
... ... ... ... ... ...
174 Trinidad and Tobago TTO 14.590 63.80 High income
180 Uruguay URY 14.374 57.69 High income
181 United States USA 12.500 84.20 High income
184 Venezuela, RB VEN 19.842 54.90 High income
185 Virgin Islands (U.S.) VIR 10.700 45.30 High income

67 rows × 5 columns

stats.head()
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
stats[(stats.BirthRate > 10) & (stats.IncomeGroup == 'High income')]
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.90000 High income
4 United Arab Emirates ARE 11.044 88.00000 High income
5 Argentina ARG 17.716 59.90000 High income
7 Antigua and Barbuda ATG 16.447 63.40000 High income
8 Australia AUS 13.200 83.00000 High income
12 Belgium BEL 11.200 82.17020 High income
17 Bahrain BHR 15.040 90.00004 High income
18 Bahamas, The BHS 15.339 72.00000 High income
22 Bermuda BMU 10.400 95.30000 High income
25 Barbados BRB 12.188 73.00000 High income
26 Brunei Darussalam BRN 16.405 64.50000 High income
30 Canada CAN 10.900 85.80000 High income
31 Switzerland CHE 10.200 86.34000 High income
32 Chile CHL 13.385 66.50000 High income
42 Cayman Islands CYM 12.500 74.10000 High income
43 Cyprus CYP 11.436 65.45480 High income
44 Czech Republic CZE 10.200 74.11040 High income
54 Estonia EST 10.300 79.40000 High income
56 Finland FIN 10.700 91.51440 High income
58 France FRA 12.300 81.91980 High income
61 United Kingdom GBR 12.200 89.84410 High income
67 Equatorial Guinea GNQ 35.362 16.40000 High income
70 Greenland GRL 14.500 65.80000 High income
72 Guam GUM 17.389 65.40000 High income
81 Ireland IRL 15.000 78.24770 High income
84 Iceland ISL 13.400 96.54680 High income
85 Israel ISR 21.300 70.80000 High income
96 Kuwait KWT 20.575 75.46000 High income
105 Lithuania LTU 10.100 68.45290 High income
106 Luxembourg LUX 11.300 93.77650 High income
107 Latvia LVA 10.200 75.23440 High income
108 Macao SAR, China MAC 11.256 65.80000 High income
126 New Caledonia NCL 17.000 66.00000 High income
130 Netherlands NLD 10.200 93.95640 High income
131 Norway NOR 11.600 95.05340 High income
133 New Zealand NZL 13.120 82.78000 High income
134 Oman OMN 20.419 66.45000 High income
141 Puerto Rico PRI 10.800 73.90000 High income
144 French Polynesia PYF 16.393 56.80000 High income
145 Qatar QAT 11.940 85.30000 High income
147 Russian Federation RUS 13.200 67.97000 High income
149 Saudi Arabia SAU 20.576 60.50000 High income
161 Slovak Republic SVK 10.100 77.88260 High income
162 Slovenia SVN 10.200 72.67560 High income
163 Sweden SWE 11.800 94.78360 High income
165 Seychelles SYC 18.600 50.40000 High income
174 Trinidad and Tobago TTO 14.590 63.80000 High income
180 Uruguay URY 14.374 57.69000 High income
181 United States USA 12.500 84.20000 High income
184 Venezuela, RB VEN 19.842 54.90000 High income
185 Virgin Islands (U.S.) VIR 10.700 45.30000 High income
#how to get the unique categories - find categorical data
stats.IncomeGroup.unique()
array(['High income', 'Low income', 'Upper middle income',
       'Lower middle income'], dtype=object)
#quick exercise find everithing about Malta
stats[stats.CountryName == 'Malta']
CountryName CountryCode BirthRate InternetUsers IncomeGroup
116 Malta MLT 9.5 68.9138 High income