-
Notifications
You must be signed in to change notification settings - Fork 0
/
airbnb_superhost_face_data_analyzer.py
68 lines (61 loc) · 2.34 KB
/
airbnb_superhost_face_data_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
import json
import os
import pandas
import matplotlib.pyplot as plt
import numpy as np
def read_profile():
file_in = open('./Data/Superhost/Profile_Super.txt')
superhost_list_tmp = file_in.read().split('\n')
superhost_list = []
for superhost in superhost_list_tmp:
if superhost != '':
superhost_list.append(superhost.split('\t'))
return superhost_list
def draw_face_charts():
superhost_list = read_profile()
gender = list()
age = list()
race = list()
smiling = list()
num = 0
count = 0
for superhost in superhost_list:
uid = superhost[0]
if os.path.exists('./Data/Superhost/Faces/' + uid + '.json'):
with open('./Data/Superhost/Faces/' + uid + '.json', 'r') as file_in:
face_data = json.load(file_in)
num += 1
if face_data['face']:
count += 1
for face in face_data['face']:
gender.append(face['attribute']['gender']['value'])
age.append(face['attribute']['age']['value'])
race.append(face['attribute']['race']['value'])
smiling.append(face['attribute']['smiling']['value'])
print(float(count)/num)
face_info = pandas.DataFrame(map(list, zip(*[gender, age, race, smiling])), columns=['gender', 'age', 'race', 'smiling'])
gb_gender = face_info.groupby(['gender'])
gb_race = face_info.groupby(['race'])
gb_gender[['age']].count().rename(columns={'age': 'count'}).plot.pie(y='count', figsize=(5, 5), autopct='%.2f')
plt.title('Gender of Airbnb superhosts')
plt.savefig('./Result/gender.eps')
plt.close()
gb_race[['gender']].count().rename(columns={'gender': 'count'}).plot.pie(y='count', figsize=(5, 5), autopct='%.2f')
plt.title('Race of Airbnb superhosts')
plt.savefig('./Result/race.eps')
plt.close()
plt.plot(np.sort(age), np.linspace(0, 1, len(age)))
plt.title('CDF of Airbnb superhosts age')
plt.xlabel('Age')
plt.ylabel('CDF')
plt.savefig('./Result/CDF_age.eps')
plt.close()
plt.plot(np.sort(smiling), np.linspace(0, 1, len(smiling)))
plt.title('CDF of Airbnb superhosts smiling index')
plt.xlabel('Smiling index')
plt.ylabel('CDF')
plt.savefig('./Result/CDF_smiling.eps')
plt.close()
if __name__ == '__main__':
draw_face_charts()