Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
" <td>22.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>2.22</td>\n",
" <td>5158.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>271</td>\n",
" <td>285.0</td>\n",
" <td>30.0</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>0.53</td>\n",
" <td>5702.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>89</td>\n",
" <td>90.0</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>43.0</td>\n",
" <td>0.30</td>\n",
" <td>2054.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>153</td>\n",
" <td>157.0</td>\n",
" <td>22.0</td>\n",
" <td>8.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>0.38</td>\n",
" <td>4127.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>87</td>\n",
" <td>90.0</td>\n",
" <td>16.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>50.0</td>\n",
" <td>0.65</td>\n",
" <td>1445.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>234</td>\n",
" <td>238.0</td>\n",
" <td>25.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>1.61</td>\n",
" <td>2087.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>106</td>\n",
" <td>116.0</td>\n",
" <td>20.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>0.22</td>\n",
" <td>2818.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>175</td>\n",
" <td>180.0</td>\n",
" <td>22.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>15.0</td>\n",
" <td>2.06</td>\n",
" <td>3917.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>165</td>\n",
" <td>170.0</td>\n",
" <td>17.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>33.0</td>\n",
" <td>0.46</td>\n",
" <td>2220.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>166</td>\n",
" <td>170.0</td>\n",
" <td>23.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>37.0</td>\n",
" <td>0.27</td>\n",
" <td>3498.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>136</td>\n",
" <td>140.0</td>\n",
" <td>19.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>0.63</td>\n",
" <td>3607.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>148</td>\n",
" <td>160.0</td>\n",
" <td>17.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>13.0</td>\n",
" <td>0.36</td>\n",
" <td>3648.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>151</td>\n",
" <td>153.0</td>\n",
" <td>19.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>24.0</td>\n",
" <td>0.34</td>\n",
" <td>3561.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>180</td>\n",
" <td>190.0</td>\n",
" <td>24.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>1.55</td>\n",
" <td>4681.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>293</td>\n",
" <td>305.0</td>\n",
" <td>26.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>0.46</td>\n",
" <td>7088.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>167</td>\n",
" <td>170.0</td>\n",
" <td>20.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>46.0</td>\n",
" <td>0.46</td>\n",
" <td>3482.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>190</td>\n",
" <td>193.0</td>\n",
" <td>22.0</td>\n",
" <td>9.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>37.0</td>\n",
" <td>0.48</td>\n",
" <td>3920.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>184</td>\n",
" <td>190.0</td>\n",
" <td>21.0</td>\n",
" <td>9.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>27.0</td>\n",
" <td>1.30</td>\n",
" <td>4162.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>157</td>\n",
" <td>165.0</td>\n",
" <td>20.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>0.30</td>\n",
" <td>3785.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>110</td>\n",
" <td>115.0</td>\n",
" <td>16.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>26.0</td>\n",
" <td>0.29</td>\n",
" <td>3103.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>135</td>\n",
" <td>145.0</td>\n",
" <td>18.0</td>\n",
" <td>7.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>35.0</td>\n",
" <td>0.43</td>\n",
" <td>3363.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>567</td>\n",
" <td>625.0</td>\n",
" <td>64.0</td>\n",
" <td>11.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>0.85</td>\n",
" <td>12192.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>180</td>\n",
" <td>185.0</td>\n",
" <td>20.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>1.00</td>\n",
" <td>3831.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>183</td>\n",
" <td>188.0</td>\n",
" <td>17.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>3.00</td>\n",
" <td>3564.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>185</td>\n",
" <td>193.0</td>\n",
" <td>20.0</td>\n",
" <td>9.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>56.0</td>\n",
" <td>6.49</td>\n",
" <td>3765.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>152</td>\n",
" <td>155.0</td>\n",
" <td>17.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>33.0</td>\n",
" <td>0.70</td>\n",
" <td>3361.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>148</td>\n",
" <td>153.0</td>\n",
" <td>13.0</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>22.0</td>\n",
" <td>0.39</td>\n",
" <td>3950.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>152</td>\n",
" <td>159.0</td>\n",
" <td>15.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>25.0</td>\n",
" <td>0.59</td>\n",
" <td>3055.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>146</td>\n",
" <td>150.0</td>\n",
" <td>16.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>31.0</td>\n",
" <td>0.36</td>\n",
" <td>2950.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>170</td>\n",
" <td>190.0</td>\n",
" <td>24.0</td>\n",
" <td>10.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>33.0</td>\n",
" <td>0.57</td>\n",
" <td>3346.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>127</td>\n",
" <td>130.0</td>\n",
" <td>20.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>65.0</td>\n",
" <td>0.40</td>\n",
" <td>3334.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>265</td>\n",
" <td>270.0</td>\n",
" <td>36.0</td>\n",
" <td>10.0</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>33.0</td>\n",
" <td>1.20</td>\n",
" <td>5853.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>157</td>\n",
" <td>163.0</td>\n",
" <td>18.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>12.0</td>\n",
" <td>1.13</td>\n",
" <td>3982.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>128</td>\n",
" <td>135.0</td>\n",
" <td>17.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>25.0</td>\n",
" <td>0.52</td>\n",
" <td>3374.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>110</td>\n",
" <td>120.0</td>\n",
" <td>15.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>0.59</td>\n",
" <td>3119.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>123</td>\n",
" <td>130.0</td>\n",
" <td>18.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>43.0</td>\n",
" <td>0.39</td>\n",
" <td>3268.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>212</td>\n",
" <td>230.0</td>\n",
" <td>39.0</td>\n",
" <td>12.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>202.0</td>\n",
" <td>4.29</td>\n",
" <td>3648.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>145</td>\n",
" <td>145.0</td>\n",
" <td>18.0</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>44.0</td>\n",
" <td>0.22</td>\n",
" <td>2783.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>129</td>\n",
" <td>135.0</td>\n",
" <td>10.0</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>15.0</td>\n",
" <td>1.00</td>\n",
" <td>2438.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>143</td>\n",
" <td>145.0</td>\n",
" <td>21.0</td>\n",
" <td>7.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>1.20</td>\n",
" <td>3529.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>247</td>\n",
" <td>252.0</td>\n",
" <td>29.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>1.25</td>\n",
" <td>4626.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>111</td>\n",
" <td>120.0</td>\n",
" <td>15.0</td>\n",
" <td>8.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>97.0</td>\n",
" <td>1.11</td>\n",
" <td>3205.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>133</td>\n",
" <td>145.0</td>\n",
" <td>26.0</td>\n",
" <td>7.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>42.0</td>\n",
" <td>0.36</td>\n",
" <td>3059.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sell List Living Rooms Beds Baths Age Acres Taxes\n",
"0 142 160.0 28.0 10.0 5.0 3.0 60.0 0.28 3167.0\n",
"1 175 180.0 18.0 8.0 4.0 1.0 12.0 0.43 4033.0\n",
"2 129 132.0 13.0 6.0 3.0 1.0 41.0 0.33 1471.0\n",
"3 138 140.0 17.0 7.0 3.0 1.0 22.0 0.46 3204.0\n",
"4 232 240.0 25.0 8.0 4.0 3.0 5.0 2.05 3613.0\n",
"5 135 140.0 18.0 7.0 4.0 3.0 9.0 0.57 3028.0\n",
"6 150 160.0 20.0 8.0 4.0 3.0 18.0 4.00 3131.0\n",
"7 207 225.0 22.0 8.0 4.0 2.0 16.0 2.22 5158.0\n",
"8 271 285.0 30.0 10.0 5.0 2.0 30.0 0.53 5702.0\n",
"9 89 90.0 10.0 5.0 3.0 1.0 43.0 0.30 2054.0\n",
"10 153 157.0 22.0 8.0 3.0 3.0 18.0 0.38 4127.0\n",
"11 87 90.0 16.0 7.0 3.0 1.0 50.0 0.65 1445.0\n",
"12 234 238.0 25.0 8.0 4.0 2.0 2.0 1.61 2087.0\n",
"13 106 116.0 20.0 8.0 4.0 1.0 13.0 0.22 2818.0\n",
"14 175 180.0 22.0 8.0 4.0 2.0 15.0 2.06 3917.0\n",
"15 165 170.0 17.0 8.0 4.0 2.0 33.0 0.46 2220.0\n",
"16 166 170.0 23.0 9.0 4.0 2.0 37.0 0.27 3498.0\n",
"17 136 140.0 19.0 7.0 3.0 1.0 22.0 0.63 3607.0\n",
"18 148 160.0 17.0 7.0 3.0 2.0 13.0 0.36 3648.0\n",
"19 151 153.0 19.0 8.0 4.0 2.0 24.0 0.34 3561.0\n",
"20 180 190.0 24.0 9.0 4.0 2.0 10.0 1.55 4681.0\n",
"21 293 305.0 26.0 8.0 4.0 3.0 6.0 0.46 7088.0\n",
"22 167 170.0 20.0 9.0 4.0 2.0 46.0 0.46 3482.0\n",
"23 190 193.0 22.0 9.0 5.0 2.0 37.0 0.48 3920.0\n",
"24 184 190.0 21.0 9.0 5.0 2.0 27.0 1.30 4162.0\n",
"25 157 165.0 20.0 8.0 4.0 2.0 7.0 0.30 3785.0\n",
"26 110 115.0 16.0 8.0 4.0 1.0 26.0 0.29 3103.0\n",
"27 135 145.0 18.0 7.0 4.0 1.0 35.0 0.43 3363.0\n",
"28 567 625.0 64.0 11.0 4.0 4.0 4.0 0.85 12192.0\n",
"29 180 185.0 20.0 8.0 4.0 2.0 11.0 1.00 3831.0\n",
"30 183 188.0 17.0 7.0 3.0 2.0 16.0 3.00 3564.0\n",
"31 185 193.0 20.0 9.0 3.0 2.0 56.0 6.49 3765.0\n",
"32 152 155.0 17.0 8.0 4.0 1.0 33.0 0.70 3361.0\n",
"33 148 153.0 13.0 6.0 3.0 2.0 22.0 0.39 3950.0\n",
"34 152 159.0 15.0 7.0 3.0 1.0 25.0 0.59 3055.0\n",
"35 146 150.0 16.0 7.0 3.0 1.0 31.0 0.36 2950.0\n",
"36 170 190.0 24.0 10.0 3.0 2.0 33.0 0.57 3346.0\n",
"37 127 130.0 20.0 8.0 4.0 1.0 65.0 0.40 3334.0\n",
"38 265 270.0 36.0 10.0 6.0 3.0 33.0 1.20 5853.0\n",
"39 157 163.0 18.0 8.0 4.0 2.0 12.0 1.13 3982.0\n",
"40 128 135.0 17.0 9.0 4.0 1.0 25.0 0.52 3374.0\n",
"41 110 120.0 15.0 8.0 4.0 2.0 11.0 0.59 3119.0\n",
"42 123 130.0 18.0 8.0 4.0 2.0 43.0 0.39 3268.0\n",
"43 212 230.0 39.0 12.0 5.0 3.0 202.0 4.29 3648.0\n",
"44 145 145.0 18.0 8.0 4.0 2.0 44.0 0.22 2783.0\n",
"45 129 135.0 10.0 6.0 3.0 1.0 15.0 1.00 2438.0\n",
"46 143 145.0 21.0 7.0 4.0 2.0 10.0 1.20 3529.0\n",
"47 247 252.0 29.0 9.0 4.0 2.0 4.0 1.25 4626.0\n",
"48 111 120.0 15.0 8.0 3.0 1.0 97.0 1.11 3205.0\n",
"49 133 145.0 26.0 7.0 3.0 1.0 42.0 0.36 3059.0\n",
"50 NaN NaN NaN NaN NaN NaN NaN NaN"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_excel(\"data/homes.xlsx\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Writing CSV files\n",
"Easy!"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"homes.to_csv(\"test.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a DataFrame which consists of all numbers 0 to 1000. Reshape it into 50 rows and save it to a `.csv` file. How many columns did you end up with?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exercise 8\n",
"There is a dataset `data/yob2012.txt` which lists the number of newborns registered in 2018 with their names and sex. Open the dataset in pandas, explore it and derive the ratio between male and female newborns."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Web scraping <a name=\"web\"></a>\n",
"It is also very easy to scrape webpages and extract tables from them.\n",
"\n",
"For example, let's consider extracting the table of failed American banks."
"execution_count": 80,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "lxml not found, please install it",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-80-a8c440c11437>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"https://www.fdic.gov/bank/individual/failed/banklist.html\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mbanks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_html\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mbanks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbanks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/html.py\u001b[0m in \u001b[0;36mread_html\u001b[0;34m(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, tupleize_cols, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)\u001b[0m\n\u001b[1;32m 985\u001b[0m \u001b[0mdecimal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecimal\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconverters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconverters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_values\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mna_values\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 986\u001b[0m \u001b[0mkeep_default_na\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkeep_default_na\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 987\u001b[0;31m displayed_only=displayed_only)\n\u001b[0m",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/html.py\u001b[0m in \u001b[0;36m_parse\u001b[0;34m(flavor, io, match, attrs, encoding, displayed_only, **kwargs)\u001b[0m\n\u001b[1;32m 791\u001b[0m \u001b[0mretained\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mflav\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mflavor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 793\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_parser_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflav\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 794\u001b[0m \u001b[0mp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompiled_match\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdisplayed_only\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 795\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/html.py\u001b[0m in \u001b[0;36m_parser_dispatch\u001b[0;34m(flavor)\u001b[0m\n\u001b[1;32m 749\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 750\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0m_HAS_LXML\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 751\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"lxml not found, please install it\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 752\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_valid_parsers\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mflavor\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 753\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mImportError\u001b[0m: lxml not found, please install it"
]
}
],
"source": [
"url = \"https://www.fdic.gov/bank/individual/failed/banklist.html\"\n",
"banks = pd.read_html(url)\n",
"banks = banks[0]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'banks' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-81-a4667db23f8f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbanks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'banks' is not defined"
]
}
],
"source": [
"banks"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Powerful no? Now let's turn that into an exercise.\n",
"\n",
"Given the data you just extracted above, can you analyse how many banks have failed per state?\n",
"\n",
"Georgia (GA) should be the state with the most failed banks!\n",
"\n",
"*Hint: try searching the web for pandas counting occurrences* "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Cleaning <a name=\"cleaning\"></a>\n",
"While doing data analysis and modeling, a significant amount of time is spent on data preparation: loading, cleaning, transforming and rearranging. Such tasks are often reported to take **up to 80%** or more of a data analyst's time. Often the way the data is stored in files isn't in the correct format and needs to be modified. Researchers usually do this on an ad-hoc basis using programming languages like Python.\n",
"In this chapter, we will discuss tools for handling missing data, duplicate data, string manipulation, and some other analytical data transformations.\n",
"Mussing data occurs commonly in many data analysis applications. One of the goals of pandas is to make working with missing data as painless as possible.\n",
"\n",
"In pandas, missing numeric data is represented by `NaN` (Not a Number) and can easily be handled:"
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 orange\n",
"1 tomato\n",
"2 NaN\n",
"3 avocado\n",
"dtype: object"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string_data = pd.Series(['orange', 'tomato', np.nan, 'avocado'])\n",
"string_data"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 False\n",
"1 False\n",
"2 True\n",
"3 False\n",
"dtype: bool"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string_data.isnull()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Furthermore, the pandas `NaN` is functionally equlevant to the standard Python type `NoneType` which can be defined with `x = None`."
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 None\n",
"1 tomato\n",
"2 NaN\n",
"3 avocado\n",
"dtype: object"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string_data[0] = None\n",
"string_data"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"3 False\n",
"dtype: bool"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string_data.isnull()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are some other methods which you can find useful:\n",
" \n",
"| dropna | Filter axis labels based on whether the values of each label have missing data|\n",
"| fillna | Fill in missing data with some value |\n",
"| isnull | Return boolean values indicating which values are missing |\n",
"| notnull | Negation of isnull |\n",
"\n",
"Remove the missing data below using the appropriate method"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 NaN\n",
"2 3.0\n",
"3 4.0\n",
"4 NaN\n",
"5 6.0\n",
"dtype: float64"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"data = pd.Series([1, None, 3, 4, None, 6])\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`dropna()` by default removes any row/column that has a missing value. What if we want to remove only rows in which all of the data is missing though?"
]
},
{
"cell_type": "code",
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"0 1.0 6.5 3.0\n",
"1 1.0 NaN NaN\n",
"2 NaN NaN NaN\n",
"3 NaN 6.5 3.0"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame([[1., 6.5, 3.], [1., None, None],\n",
" [None, None, None], [None, 6.5, 3.]])\n",
"data"
]
},
{
"cell_type": "code",
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",