Subsetting Data Frames in Pandas

Pandas Library

February 25, 2021 · 29 mins read
import pandas as pd
import os
stats = pd.read_csv('D:\\OneDrive - office365hubs.com\\.Python for data science\\Demographic-Data.csv')

stats
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
... ... ... ... ... ...
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
191 South Africa ZAF 20.850 46.5 Upper middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
194 Zimbabwe ZWE 35.715 18.5 Low income

195 rows × 5 columns

stats.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country Name    195 non-null    object 
 1   Country Code    195 non-null    object 
 2   Birth rate      195 non-null    float64
 3   Internet users  195 non-null    float64
 4   Income Group    195 non-null    object 
dtypes: float64(2), object(3)
memory usage: 7.7+ KB
stats.describe().transpose()
count mean std min 25% 50% 75% max
Birth rate 195.0 21.469928 10.605467 7.9 12.1205 19.68 29.7595 49.6610
Internet users 195.0 42.076471 29.030788 0.9 14.5200 41.00 66.2250 96.5468
### Susetting data frame in Pandas
stats.head()
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
#how to subset by the rows
#how to subset by the columns
#how to combine the two
### Susetting data frame in Pandas
stats.head()
Country Name Country Code Birth rate Internet users Income Group
21 Belize BLZ 23.092 33.60 Upper middle income
22 Bermuda BMU 10.400 95.30 High income
23 Bolivia BOL 24.236 36.94 Lower middle income
24 Brazil BRA 14.931 51.04 Upper middle income
25 Barbados BRB 12.188 73.00 High income
stats[100:110]
Country Name Country Code Birth rate Internet users Income Group
100 Libya LBY 21.425 16.5000 Upper middle income
101 St. Lucia LCA 15.430 46.2000 Upper middle income
102 Liechtenstein LIE 9.200 93.8000 High income
103 Sri Lanka LKA 17.863 21.9000 Lower middle income
104 Lesotho LSO 28.738 5.0000 Lower middle income
105 Lithuania LTU 10.100 68.4529 High income
106 Luxembourg LUX 11.300 93.7765 High income
107 Latvia LVA 10.200 75.2344 High income
108 Macao SAR, China MAC 11.256 65.8000 High income
109 Morocco MAR 21.023 56.0000 Lower middle income
stats[185:]
Country Name Country Code Birth rate Internet users Income Group
185 Virgin Islands (U.S.) VIR 10.700 45.3 High income
186 Vietnam VNM 15.537 43.9 Lower middle income
187 Vanuatu VUT 26.739 11.3 Lower middle income
188 West Bank and Gaza PSE 30.394 46.6 Lower middle income
189 Samoa WSM 26.172 15.3 Lower middle income
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
191 South Africa ZAF 20.850 46.5 Upper middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
194 Zimbabwe ZWE 35.715 18.5 Low income
stats[:10]
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9000 High income
1 Afghanistan AFG 35.253 5.9000 Low income
2 Angola AGO 45.985 19.1000 Upper middle income
3 Albania ALB 12.877 57.2000 Upper middle income
4 United Arab Emirates ARE 11.044 88.0000 High income
5 Argentina ARG 17.716 59.9000 High income
6 Armenia ARM 13.308 41.9000 Lower middle income
7 Antigua and Barbuda ATG 16.447 63.4000 High income
8 Australia AUS 13.200 83.0000 High income
9 Austria AUT 9.400 80.6188 High income
#%%
Country Name Country Code Birth rate Internet users Income Group
194 Zimbabwe ZWE 35.715 18.5 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
191 South Africa ZAF 20.850 46.5 Upper middle income
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
... ... ... ... ... ...
4 United Arab Emirates ARE 11.044 88.0 High income
3 Albania ALB 12.877 57.2 Upper middle income
2 Angola AGO 45.985 19.1 Upper middle income
1 Afghanistan AFG 35.253 5.9 Low income
0 Aruba ABW 10.244 78.9 High income

195 rows × 5 columns

stats[:50]
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.90000 High income
1 Afghanistan AFG 35.253 5.90000 Low income
2 Angola AGO 45.985 19.10000 Upper middle income
3 Albania ALB 12.877 57.20000 Upper middle income
4 United Arab Emirates ARE 11.044 88.00000 High income
5 Argentina ARG 17.716 59.90000 High income
6 Armenia ARM 13.308 41.90000 Lower middle income
7 Antigua and Barbuda ATG 16.447 63.40000 High income
8 Australia AUS 13.200 83.00000 High income
9 Austria AUT 9.400 80.61880 High income
10 Azerbaijan AZE 18.300 58.70000 Upper middle income
11 Burundi BDI 44.151 1.30000 Low income
12 Belgium BEL 11.200 82.17020 High income
13 Benin BEN 36.440 4.90000 Low income
14 Burkina Faso BFA 40.551 9.10000 Low income
15 Bangladesh BGD 20.142 6.63000 Lower middle income
16 Bulgaria BGR 9.200 53.06150 Upper middle income
17 Bahrain BHR 15.040 90.00004 High income
18 Bahamas, The BHS 15.339 72.00000 High income
19 Bosnia and Herzegovina BIH 9.062 57.79000 Upper middle income
20 Belarus BLR 12.500 54.17000 Upper middle income
21 Belize BLZ 23.092 33.60000 Upper middle income
22 Bermuda BMU 10.400 95.30000 High income
23 Bolivia BOL 24.236 36.94000 Lower middle income
24 Brazil BRA 14.931 51.04000 Upper middle income
25 Barbados BRB 12.188 73.00000 High income
26 Brunei Darussalam BRN 16.405 64.50000 High income
27 Bhutan BTN 18.134 29.90000 Lower middle income
28 Botswana BWA 25.267 15.00000 Upper middle income
29 Central African Republic CAF 34.076 3.50000 Low income
30 Canada CAN 10.900 85.80000 High income
31 Switzerland CHE 10.200 86.34000 High income
32 Chile CHL 13.385 66.50000 High income
33 China CHN 12.100 45.80000 Upper middle income
34 Cote d'Ivoire CIV 37.320 8.40000 Lower middle income
35 Cameroon CMR 37.236 6.40000 Lower middle income
36 Congo, Rep. COG 37.011 6.60000 Lower middle income
37 Colombia COL 16.076 51.70000 Upper middle income
38 Comoros COM 34.326 6.50000 Low income
39 Cabo Verde CPV 21.625 37.50000 Lower middle income
40 Costa Rica CRI 15.022 45.96000 Upper middle income
41 Cuba CUB 10.400 27.93000 Upper middle income
42 Cayman Islands CYM 12.500 74.10000 High income
43 Cyprus CYP 11.436 65.45480 High income
44 Czech Republic CZE 10.200 74.11040 High income
45 Germany DEU 8.500 84.17000 High income
46 Djibouti DJI 25.486 9.50000 Lower middle income
47 Denmark DNK 10.000 94.62970 High income
48 Dominican Republic DOM 21.198 45.90000 Upper middle income
49 Algeria DZA 24.738 16.50000 Upper middle income
stats[50::-1]
Country Name Country Code Birth rate Internet users Income Group
50 Ecuador ECU 21.070 40.353684 Upper middle income
49 Algeria DZA 24.738 16.500000 Upper middle income
48 Dominican Republic DOM 21.198 45.900000 Upper middle income
47 Denmark DNK 10.000 94.629700 High income
46 Djibouti DJI 25.486 9.500000 Lower middle income
45 Germany DEU 8.500 84.170000 High income
44 Czech Republic CZE 10.200 74.110400 High income
43 Cyprus CYP 11.436 65.454800 High income
42 Cayman Islands CYM 12.500 74.100000 High income
41 Cuba CUB 10.400 27.930000 Upper middle income
40 Costa Rica CRI 15.022 45.960000 Upper middle income
39 Cabo Verde CPV 21.625 37.500000 Lower middle income
38 Comoros COM 34.326 6.500000 Low income
37 Colombia COL 16.076 51.700000 Upper middle income
36 Congo, Rep. COG 37.011 6.600000 Lower middle income
35 Cameroon CMR 37.236 6.400000 Lower middle income
34 Cote d'Ivoire CIV 37.320 8.400000 Lower middle income
33 China CHN 12.100 45.800000 Upper middle income
32 Chile CHL 13.385 66.500000 High income
31 Switzerland CHE 10.200 86.340000 High income
30 Canada CAN 10.900 85.800000 High income
29 Central African Republic CAF 34.076 3.500000 Low income
28 Botswana BWA 25.267 15.000000 Upper middle income
27 Bhutan BTN 18.134 29.900000 Lower middle income
26 Brunei Darussalam BRN 16.405 64.500000 High income
25 Barbados BRB 12.188 73.000000 High income
24 Brazil BRA 14.931 51.040000 Upper middle income
23 Bolivia BOL 24.236 36.940000 Lower middle income
22 Bermuda BMU 10.400 95.300000 High income
21 Belize BLZ 23.092 33.600000 Upper middle income
20 Belarus BLR 12.500 54.170000 Upper middle income
19 Bosnia and Herzegovina BIH 9.062 57.790000 Upper middle income
18 Bahamas, The BHS 15.339 72.000000 High income
17 Bahrain BHR 15.040 90.000040 High income
16 Bulgaria BGR 9.200 53.061500 Upper middle income
15 Bangladesh BGD 20.142 6.630000 Lower middle income
14 Burkina Faso BFA 40.551 9.100000 Low income
13 Benin BEN 36.440 4.900000 Low income
12 Belgium BEL 11.200 82.170200 High income
11 Burundi BDI 44.151 1.300000 Low income
10 Azerbaijan AZE 18.300 58.700000 Upper middle income
9 Austria AUT 9.400 80.618800 High income
8 Australia AUS 13.200 83.000000 High income
7 Antigua and Barbuda ATG 16.447 63.400000 High income
6 Armenia ARM 13.308 41.900000 Lower middle income
5 Argentina ARG 17.716 59.900000 High income
4 United Arab Emirates ARE 11.044 88.000000 High income
3 Albania ALB 12.877 57.200000 Upper middle income
2 Angola AGO 45.985 19.100000 Upper middle income
1 Afghanistan AFG 35.253 5.900000 Low income
0 Aruba ABW 10.244 78.900000 High income
stats = stats[::-1]
stats
Country Name Country Code Birth rate Internet users Income Group
194 Zimbabwe ZWE 35.715 18.5 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
191 South Africa ZAF 20.850 46.5 Upper middle income
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
... ... ... ... ... ...
4 United Arab Emirates ARE 11.044 88.0 High income
3 Albania ALB 12.877 57.2 Upper middle income
2 Angola AGO 45.985 19.1 Upper middle income
1 Afghanistan AFG 35.253 5.9 Low income
0 Aruba ABW 10.244 78.9 High income

195 rows × 5 columns

stats = stats[::1]
stats = stats[::-1]
stats
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
... ... ... ... ... ...
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
191 South Africa ZAF 20.850 46.5 Upper middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
194 Zimbabwe ZWE 35.715 18.5 Low income

195 rows × 5 columns

#get only every 20th row
stats[::20]
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9000 High income
20 Belarus BLR 12.500 54.1700 Upper middle income
40 Costa Rica CRI 15.022 45.9600 Upper middle income
60 Gabon GAB 30.555 9.2000 Upper middle income
80 India IND 20.291 15.1000 Lower middle income
100 Libya LBY 21.425 16.5000 Upper middle income
120 Mozambique MOZ 39.705 5.4000 Low income
140 Poland POL 9.600 62.8492 High income
160 Suriname SUR 18.455 37.4000 Upper middle income
180 Uruguay URY 14.374 57.6900 High income
#part 2. Columns
stats.columns
Index(['Country Name', 'Country Code', 'Birth rate', 'Internet users',
       'Income Group'],
      dtype='object')
stats.head()
Country Name Country Code Birth rate Internet users Income Group
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
stats['Country Name']
0                     Aruba
1               Afghanistan
2                    Angola
3                   Albania
4      United Arab Emirates
               ...         
190             Yemen, Rep.
191            South Africa
192        Congo, Dem. Rep.
193                  Zambia
194                Zimbabwe
Name: Country Name, Length: 195, dtype: object
stats.columns = ['CountryName','CountryCode','BirthRate','InternetUsers','IncomeGroup']
stats
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
... ... ... ... ... ...
190 Yemen, Rep. YEM 32.947 20.0 Lower middle income
191 South Africa ZAF 20.850 46.5 Upper middle income
192 Congo, Dem. Rep. COD 42.394 2.2 Low income
193 Zambia ZMB 40.471 15.4 Lower middle income
194 Zimbabwe ZWE 35.715 18.5 Low income

195 rows × 5 columns

stats['CountryName']
0                     Aruba
1               Afghanistan
2                    Angola
3                   Albania
4      United Arab Emirates
               ...         
190             Yemen, Rep.
191            South Africa
192        Congo, Dem. Rep.
193                  Zambia
194                Zimbabwe
Name: CountryName, Length: 195, dtype: object
stats['CountryName'].head()
0                   Aruba
1             Afghanistan
2                  Angola
3                 Albania
4    United Arab Emirates
Name: CountryName, dtype: object
['CountryName','BirthRate']
['CountryName', 'BirthRate']
stats[['CountryName', 'BirthRate']].head()
CountryName BirthRate
0 Aruba 10.244
1 Afghanistan 35.253
2 Angola 45.985
3 Albania 12.877
4 United Arab Emirates 11.044
stats['CountryName'].tail()
190         Yemen, Rep.
191        South Africa
192    Congo, Dem. Rep.
193              Zambia
194            Zimbabwe
Name: CountryName, dtype: object
stats[['CountryName','BirthRate']].tail()
CountryName BirthRate
190 Yemen, Rep. 32.947
191 South Africa 20.850
192 Congo, Dem. Rep. 42.394
193 Zambia 40.471
194 Zimbabwe 35.715
#Quick Access - requires the name to be one word
stats.head()
CountryName CountryCode BirthRate InternetUsers IncomeGroup
0 Aruba ABW 10.244 78.9 High income
1 Afghanistan AFG 35.253 5.9 Low income
2 Angola AGO 45.985 19.1 Upper middle income
3 Albania ALB 12.877 57.2 Upper middle income
4 United Arab Emirates ARE 11.044 88.0 High income
stats['BirthRate']
0      10.244
1      35.253
2      45.985
3      12.877
4      11.044
        ...  
190    32.947
191    20.850
192    42.394
193    40.471
194    35.715
Name: BirthRate, Length: 195, dtype: float64
stats.BirthRate
0      10.244
1      35.253
2      45.985
3      12.877
4      11.044
        ...  
190    32.947
191    20.850
192    42.394
193    40.471
194    35.715
Name: BirthRate, Length: 195, dtype: float64
stats.BirthRate.head()
0    10.244
1    35.253
2    45.985
3    12.877
4    11.044
Name: BirthRate, dtype: float64
#Part 3 - combaining the two: rows and columns
stats[2:20][['CountryName','BirthRate']]
CountryName BirthRate
2 Angola 45.985
3 Albania 12.877
4 United Arab Emirates 11.044
5 Argentina 17.716
6 Armenia 13.308
7 Antigua and Barbuda 16.447
8 Australia 13.200
9 Austria 9.400
10 Azerbaijan 18.300
11 Burundi 44.151
12 Belgium 11.200
13 Benin 36.440
14 Burkina Faso 40.551
15 Bangladesh 20.142
16 Bulgaria 9.200
17 Bahrain 15.040
18 Bahamas, The 15.339
19 Bosnia and Herzegovina 9.062