天天看點

資料分析

1 #!/usr/bin/python
  2 #-*- coding: utf-8 -*-
  3 
  4 
  5 
  6 import os
  7 import re
  8 import csv
  9 import time
 10 import json
 11 import jieba
 12 from jieba import analyse
 13 import pandas as pd
 14 import itchat
 15 import base64
 16 from snownlp import SnowNLP
 17 import requests
 18 import sys
 19 from collections import Counter
 20 import matplotlib.pyplot as plt
 21 from pylab import *
 22 from faceApi import FaceAPI
 23 mpl.rcParams['font.sans-serif'] = ['SimHei']
 24 from PIL import Image
 25 import numpy as np
 26 from wordcloud import WordCloud
 27 from pyecharts import Pie, Map, Style, Page, Bar
 28 
 29 def analyseSex(firends):
 30     sexs = list(map(lambda x:x['Sex'],friends[1:]))
 31     counts = Counter(sexs).items()
 32     counts = sorted(counts, key=lambda x:x[0], reverse=False)
 33     counts = list(map(lambda x:x[1],counts))
 34     labels = ['不明','男性','女性']
 35     colors = ['red','yellow','blue']
 36     plt.figure(figsize=(8,5), dpi=80)
 37     plt.axes(aspect=1) 
 38     plt.pie(counts, 
 39             labels=labels, 
 40             colors=colors, 
 41             labeldistance = 1.1, 
 42             autopct = '%3.1f%%',
 43             shadow = False, 
 44             startangle = 90, 
 45             pctdistance = 0.6 
 46     )
 47     plt.legend(loc='upper right',)
 48     plt.title(u'%s的微信好友性别組成' % friends[0]['NickName'])
 49     plt.show()
 50 
 51 def analyseLocation(friends):
 52     freqs = {}
 53     headers = ['NickName','Province','City']
 54     with open('location.csv','w',encoding='utf-8',newline='',) as csvFile:
 55         writer = csv.DictWriter(csvFile, headers)
 56         writer.writeheader()
 57         for friend in friends[1:]:
 58             row = {}
 59             row['NickName'] = friend['NickName']
 60             row['Province'] = friend['Province']
 61             row['City'] = friend['City']
 62             if(friend['Province']!=None):
 63                 if(friend['Province'] not in freqs):
 64                    freqs[friend['Province']] = 1
 65                 else:
 66                    freqs[friend['Province']] = 1
 67             writer.writerow(row)
 68     
 69 
 70 
 71 def analyseHeadImage(frineds):
 72     # Init Path
 73     basePath = os.path.abspath('.')
 74     baseFolder = basePath + '\\HeadImages\\'
 75     if(os.path.exists(baseFolder) == False):
 76         os.makedirs(baseFolder)
 77 
 78     # Analyse Images
 79     faceApi = FaceAPI()
 80     use_face = 0
 81     not_use_face = 0
 82     image_tags = ''
 83     for index in range(1,len(friends)):
 84         friend = friends[index]
 85         # Save HeadImages
 86         imgFile = baseFolder + '\\Image%s.jpg' % str(index)
 87         imgData = itchat.get_head_img(userName = friend['UserName'])
 88         if(os.path.exists(imgFile) == False):
 89             with open(imgFile,'wb') as file:
 90                 file.write(imgData)
 91 
 92         # Detect Faces
 93         time.sleep(1)
 94         result = faceApi.detectFace(imgFile)
 95         if result == True:
 96             use_face += 1
 97         else:
 98             not_use_face += 1 
 99 
100         # Extract Tags
101         result = faceApi.extractTags(imgFile)
102         image_tags += ','.join(list(map(lambda x:x['tag_name'],result)))
103     
104     labels = [u'使用人臉頭像',u'不使用人臉頭像']
105     counts = [use_face,not_use_face]
106     colors = ['red','yellow']
107     plt.figure(figsize=(8,5), dpi=80)
108     plt.axes(aspect=1) 
109     plt.pie(counts, #性别統計結果
110             labels=labels, #性别展示标簽
111             colors=colors, #餅圖區域配色
112             labeldistance = 1.1, #标簽距離圓點距離
113             autopct = '%3.1f%%', #餅圖區域文本格式
114             shadow = False, #餅圖是否顯示陰影
115             startangle = 90, #餅圖起始角度
116             pctdistance = 0.5 #餅圖區域文本距離圓點距離
117     )
118     plt.legend(loc='upper right',)
119     plt.title(u'%s的微信好友使用人臉頭像情況' % friends[0]['NickName'])
120     plt.show() 
121 
122     image_tags = image_tags.encode('iso8859-1').decode('utf-8')
123     back_coloring = np.array(Image.open('face.jpg'))
124     wordcloud = WordCloud(
125         font_path='simfang.ttf',
126         background_color="white",
127         max_words=1200,
128         mask=back_coloring, 
129         max_font_size=85,
130         random_state=75,
131         width=800, 
132         height=480, 
133         margin=15
134     )
135 
136     wordcloud.generate(image_tags)
137     plt.imshow(wordcloud)
138     plt.axis("off")
139     plt.show()
140 
141 def analyseSignature(friends):
142     signatures = ''
143     emotions = []
144     pattern = re.compile("1f\d.+")
145     print (pattern)
146     for friend in friends:
147         signature = friend['Signature']
148         if(signature != None):
149             signature = signature.strip().replace('span', '').replace('class', '').replace('emoji', '')
150             signature = re.sub(r'1f(\d.+)','',signature)
151             if(len(signature)>0):
152                 nlp = SnowNLP(signature)
153                 emotions.append(nlp.sentiments)
154                 signatures += ' '.join(jieba.analyse.extract_tags(signature,5))
155             print(signatures)
156     with open('signatures.txt','wt',encoding='utf-8') as file:
157          file.write(signatures)
158 
159     # Sinature WordCloud
160     back_coloring = np.array(Image.open('flower.jpg'))
161     wordcloud = WordCloud(
162         font_path='simfang.ttf',
163         background_color="white",
164         max_words=1200,
165         mask=back_coloring, 
166         max_font_size=75,
167         random_state=45,
168         width=960, 
169         height=720, 
170         margin=15
171     )
172 
173     wordcloud.generate(signatures)
174     plt.imshow(wordcloud)
175     plt.axis("off")
176     plt.show()
177     wordcloud.to_file('signatures.jpg')
178     
179     # Signature Emotional Judgment
180     count_good = len(list(filter(lambda x:x>0.66,emotions)))
181     count_normal = len(list(filter(lambda x:x>=0.33 and x<=0.66,emotions)))
182     count_bad = len(list(filter(lambda x:x<0.33,emotions)))
183     print(count_good * 100/len(emotions))
184     print(count_normal * 100/len(emotions))
185     print(count_bad * 100/len(emotions))
186     print(count_good)
187     print(count_normal)
188     print(count_bad)
189     labels = [u'負面消極',u'中性',u'正面積極']
190     values = (count_bad ,count_normal,count_good)
191     plt.rcParams['font.sans-serif'] = ['simHei'] 
192     plt.rcParams['axes.unicode_minus'] = False
193     plt.xlabel(u'情感判斷')
194     plt.ylabel(u'頻數')
195     plt.xticks(range(3),labels)
196     plt.legend(loc='upper right',)
197     plt.bar(range(3), values, color = 'rgb')
198     plt.title(u'%s的微信好友簽名資訊情感分析' % friends[0]['NickName'])
199     plt.show()
200 
201 def create_charts():
202     users = itchat.get_friends()
203     page = Page()
204     style = Style(width=1100, height=600)
205     style_middle = Style(width=900, height=500)
206     data = prov_stats(users)
207     attr, value = data
208     chart = Map('中國地圖', **style.init_style)
209     chart.add('', attr, value, is_label_show=True, is_visualmap=True, visual_text_color='#000')
210     page.add(chart)
211     page.render()
212 
213 
214 def prov_stats(users):
215     prv = pd.DataFrame(users)
216     prv_cnt = prv.groupby('Province', as_index=True)['Province'].count().sort_values()
217     attr = list(map(lambda x: x if x != '' else '未知', list(prv_cnt.index)))
218     return attr, list(prv_cnt)
219 
220 # login wechat and extract friends
221 itchat.auto_login(hotReload = True)
222 friends = itchat.get_friends(update = True)
223 create_charts()
224 analyseSex(friends)
225 analyseSignature(friends)
226 analyseHeadImage(friends)
227 analyseLocation(friends)