python 简单应用——网络爬虫AND数据分析
当你学会python后是不是要做一些简单的应用勒,刚好猫猫在初学python的时候做过一些简单的应用。网络爬虫AND数据分析。知道简单的语法就能看懂下面的代码,如果你有需要,可以继续看下去
如果有写错的地方,麻烦留言。
一、网络爬虫
1.要求
对爬取的新闻内容进行分析,用图形显示新闻中的高频关键词。使用requests库,获取网页页面,使用BeautifulSoup库对网页内容进行解析,使用正则表达式获取网页链接字符串。
2.思路
3.程序源码
3.1_main.py
# 1新闻内容进行爬取,并按格式存储。
# 2使用云词的方法展现高频词汇。
import time
#用于程序记时
start_T=time.time()
#导入自定义函数
from GetNEWContent import getNEWContent
from GetTitleList import getTitleList
from Words_Cloud import wc
#所要爬取的网页地址
url = 'https://uscnews.usc.edu.cn/nhyw.htm'#主网页
#测试副网页
url1='https://uscnews.usc.edu.cn/info/1025/43508.htm'
#存入文档
#第一个文档是新闻的日期+作者+新闻标题+新闻网页地址信息
#第二个文档是每个新闻的具体内容
#^^^^^^^^正则表达式
str00="*"*100+"\n"#定义文档的分割符号5.
file1=open("南华新闻标题锦集.txt",'w+')
file2=open("南华新闻内容锦集.txt",'w+')
#获取内容并进行组装
j=1#记录是第几个新闻
for i in getTitleList(url):
content=getNEWContent(i)
str0="*"+str(j)+"新闻标题:"+(''.join(content[0][0].split()))+" "\
+str(content[0][1])+" "\
+str(content[0][2])+" "\
+"网页地址"+i+"\n"
str1=str00+str0+"#新闻具体内容"+content[1]+"\n"
#文档制作
file1.write(str0)
file2.write(str1)
#词云制作
namestr="新闻"+str(j)
wc(content[1],namestr)
j=j+1
end_T=time.time()
print("程序运行时间{:.3f}秒".format(end_T - start_T))
3.2GetNEWContent.py
import re
from pyparsing import Regex
from Reptile import _reptlie
#函数名getNEWContent,
#一个参数是要抓取指定网络的网址,
#函数作用,主要是抓取新闻的具体内容
#抓取的具体内容包括新闻标题,作者,正文
def getNEWContent(url):
print("正在爬取 {} 内容的……".format(url))
soup=_reptlie(url)
#筛选内容
#1.1标题——删除不想关字符相关字符
content=soup.find(name="div",class_="content-title mt20").text
content=re.split(r" +|\n+|\xa0+|点击:+",content)
Titlecontent = [i for i in content if i != '']
j=0
str=''
for i in Titlecontent:
if j>=2:
str=str+" "+Titlecontent[j]
j=j+1
Titlecontent=Titlecontent[0:2]
Titlecontent.append(str)
#1.2筛选内容,主要是新闻内容
_content=soup.find(name="div",class_="v_news_content").text
#2.1新闻内容——替换不可打印相关字符
_content = re.sub(r'\xa0+|\u2003+|\u2022+', '', _content)
#2.2新闻内容——使正文内容显示更加规范
_content=re.sub(r'\n+| +', '', _content)
return Titlecontent,_content
3.3GetTitleList.py
import re
from Reptile import _reptlie
#函数名getTitleList
#一个参数是要抓取指定网络的网址,
#函数作用,将获取到的新闻的标题和网络链接,并按行存入txt文档中
def getTitleList(url):
print("正在爬取主网页 {} 的链接……".format(url))
#爬取相关内容
soup = _reptlie(url)
#定义存储新闻标题和链接的字典
TitleLink={}
#筛选内容
content=soup.find(name="ul",class_="media-list3 mt10")
#获取content中所有超链接的内容
content=content.find_all('a')
re_url = re.compile(r'<a href="(.*?)" target="_blank">')
link=re.findall(re_url,str(content))
allUrl=[]
for i in link:
url="https://uscnews.usc.edu.cn/"+i
allUrl.append(url)
#返回列表类型,文章的链接
return allUrl
3.4Reptile.py
import requests
from bs4 import BeautifulSoup
#函数名_reptlie
#一个参数是要爬取指定网络的网址
#函数作用,爬取内容
def _reptlie(url):
try:
# 伪装成浏览器
# 以字典形式设置hesders,格式是[(字段名,字段值),(字段名,字段值)],这里先用字典,在下面会用for把它转为列表元组的形式
h = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Connection': 'keep-alive',
'referer': 'http://www.163.com/'}
# 1.向网站发送请求
html = requests.get(url, headers=h)
# 2.解析网页,# 改变编码模式
html.content.decode("utf-8", errors='ignore')
# 用parser解析器来解析该html
soup = BeautifulSoup(html.content, 'html.parser')
return soup
except:
print("爬取网页失败……")
3.5Word_cloud_optimization.py
#coding=gbk
import logging
import jieba
#函数名words_count_jieba
#一个参数是处理的内容,类型是字符串
#函数作用,对内容进行筛选,得到使用频率高的中文词语,以字符串的形式进行返回
def words_count_jieba(content):
#常用字删除
stopwords = set()
stopwords.update(content)
#分词
jieba.setLogLevel(logging.INFO)
words = jieba.lcut(content)
counts = {}
#记录词语个数
for word in words:
# 排除单个字符的分词结果
if len(word) == 1:
continue
else:
counts[word] = counts.get(word, 0) + 1
#删除常用字
for word in stopwords:
if word in counts.keys():
del (counts[word])
items = list(counts.items())
#排序
items.sort(key=lambda x: x[1], reverse=True)
#组合成字符串,方便之后的词云展示
end_str=''
for i in range(40):
word, count = items[i]
end_str=end_str+" "+word
return end_str
3.6Words_Cloud.py
import jieba
from wordcloud import WordCloud
from Word_cloud_optimization import words_count_jieba
#绘制词云图片
def wc(newcontent,namestr):
# 结巴分词,生成字符串,wordcloud无法直接生成正确的中文词云
cut_text =words_count_jieba(newcontent)
w=WordCloud(
#设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
font_path="c:/windows/fonts/simfang.ttf",
#设置了背景,宽高
background_color="white",width=1000,height=880).generate(cut_text)
#进行保存
strload=r"词云图片存放/"+str(namestr) + ".png"
w.to_file(strload)
4.结果展示
二、数据分析
1、要求
熟悉Matplotlib库的基本使用。
使用Matplotlib库,绘制新型冠状病毒部分城市数据对比图。实现两个城市三种数据(确诊,死亡,治愈)的对比,程序可以根据不同输入不同城市和选项(确诊,死亡,治愈),显示相应的对比图。
对数据进行多样化分析。
2.思路:
3、程序源码
3.1_main__start_windows.py
#开始窗口显示
#导入相关库
import tkinter
from tkinter import ttk
import tkinter.messagebox
#导入自定义函数
from matplotlib import pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import allData_show
from BUtton_click import click_play_all, click_play_map
from Drawing_Board import CV
from Linechart import linechar
from Barchar import barchar
from Piechar import pie_play
from reptile_data import city_name, datacityname, dataconfirm
#显示默认显示值
city_result=['武汉', '大连']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title("欢迎")
plt.show()
#************************************************************************************************************
#1创建一个窗口
start_window=tkinter.Tk()
start_window.title("中国城市疫情情况搜索GUI窗口")#设置窗口标题
screenwidth = start_window.winfo_screenwidth()#求屏幕宽度
screenheight = start_window.winfo_screenheight()#求屏幕高度
start_window.geometry('{}x{}+0+0'.format(screenwidth,screenheight))#设置窗口大小为宽为屏幕宽度,高为屏幕高度,窗口默认出现位置为+0,+0
start_window['bg']="Cyan"#设置窗口背景颜色为蓝色
# #增加背景图片,因为大面积窗口被图表所覆盖所以这里不做演示
# image_file = Image.open("python_gui_backimage_1.png")
# photo = ImageTk.PhotoImage(image_file)
# fwagui=tkinter.Frame(start_window,padx=0,pady=0)
# fwagui.place(x=0,y=0,width=screenwidth,height=screenheight)
# tkinter.Label(fwagui,image=photo).grid(row = 0, column = 0,rowspan=1,columnspan=1)
#2设置标签“城市名称”
cityLabelx=tkinter.Label(start_window,text="城市名称",fg="CornflowerBlue",font=("宋体",20))
cityLabelx.grid(row=1,column=1)#网格布局
#3.1输入文本框,主要是接收需要查询的城市名称
#3.1.1输入框1的事件
def choose1(event):
widget = event.widget
value = widget.get()
print('value:{}'.format(value))
if value not in city_name:
tkinter.messagebox.showinfo('提示', '输入错误请重新输入')
city_result[0] = value
#3.1.2输入框2的事件
def choose2(event):
widget = event.widget # 当前的组件
value = widget.get() # 选中的值
print('value:{}'.format(value))
if value not in city_name:
tkinter.messagebox.showinfo('提示', '输入错误请重新输入')
city_result[1] = value
#3.2.1输入框1的实现
value1 = tkinter.StringVar()
value1.set('请输入第一个城市的名字')
values =city_name
combobox1= ttk.Combobox(
master=start_window, # 父容器
height=10, # 高度,下拉显示的条目数量
width=20, # 宽度
state='normal', # 设置状态 normal(可选可输入)、readonly(只可选)、 disabled
cursor='arrow', # 鼠标移动时样式 arrow, circle, cross, plus...
font=('', 14), # 字体
textvariable=value1, # 通过StringVar设置可改变的值
values=values, # 设置下拉框的选项
)
combobox1.bind('<<ComboboxSelected>>', choose1) # 绑定选中事件
combobox1.grid(row=1, column=2)
#3.2.2输入框2的实现
value2 = tkinter.StringVar()
value2.set('请输入第二个城市的名字')
values =city_name
combobox2= ttk.Combobox(
master=start_window, # 父容器
height=10, # 高度,下拉显示的条目数量
width=20, # 宽度
state='normal', # 设置状态 normal(可选可输入)、readonly(只可选)、 disabled
cursor='arrow', # 鼠标移动时样式 arrow, circle, cross, plus...
font=('', 14), # 字体
textvariable=value2, # 通过StringVar设置可改变的值
values=values, # 设置下拉框的选项
)
combobox2.bind('<<ComboboxSelected>>', choose2) # 绑定选中事件
combobox2.grid(row=1, column=3)
#4搜索按钮
#4.1搜索按钮点击事件
on_click=True
def click_searchB():
global on_click
if on_click==False:
on_click=True
else:
on_click=False
print("搜索按钮点击成功")
#创建画布,此处是更新
CV(start_window, city_result)
#4.2搜索按钮的实现
searchB=tkinter.Button(start_window, text="搜索", font=("宋体",20), command=click_searchB)
searchB.grid(row=1, column=4)
#5疫情地图展示按钮
play_mapB=tkinter.Button(start_window, text="疫情地图", font=("宋体", 10), command=click_play_map)
play_mapB.grid(row=2, column=1)
#6两个城市累计患病人数饼图展示按钮
#6.1饼图展示按钮事件
on_click_play_pie=True
def click_play_pie():
global on_click_play_pie
if on_click_play_pie== False:
on_click_play_pie = True
else:
print("各城市与全国疫情人数占比")
cityname0 = city_result[0]
cityname1 = city_result[1]
pie_play(cityname0, cityname1,1)
#6.2饼图展示按钮实现
s2B=tkinter.Button(start_window,text="城市确诊人数占比",font=("宋体",10),command=click_play_pie)
s2B.grid(row=2, column=4)
##6两个城市疫情死亡与治愈对比条形图按钮
#7.1条形图展示按钮事件
on_click_play_bar=True
def click_play_b():
global on_click_play_bar
if on_click_play_bar==False:
on_click_play_bar=True
else:
on_click_play_bar=False
barchar(city_result[0],city_result[1],1)
print("疫情死亡与治愈对比条形图点击成功")
#7.2条形图展示按钮实现
s3B=tkinter.Button(start_window,text="疫情死亡与治愈对比",font=("宋体",10),command=click_play_b)
s3B.grid(row=2, column=3)
s4B=tkinter.Button(start_window,text="所有省的数据展示",font=("宋体",10),command=click_play_all)
s4B.grid(row=2, column=2)
on_click_play_line=True
def click_play_line():
global on_click_play_line
if on_click_play_line==False:
on_click_play_line=True
else:
on_click_play_line=False
linechar(city_result[0],city_result[1],1)
print("疫情死亡与治愈对比条形图点击成功")
s4B=tkinter.Button(start_window,text="所有数据的折线图",font=("宋体",10),command=click_play_line)
s4B.grid(row=2, column=7)
CV(start_window, city_result).get_tk_widget().grid(row=1, column=1)
#进入消息循环,显示窗口
start_window.mainloop()
3.2allData_show.py
#参考网站 https://www.cnblogs.com/springionic/p/11150192.html
from matplotlib import pyplot as plt
import matplotlib
def P_T(x,y,b):
font = {'family': 'SimHei'}
matplotlib.rc('font', **font)#支持中文
plt.barh(range(len(x)), y, height=0.3, color='red')#绘制横着的条形图,横着的用height控制线条宽度
plt.yticks(range(len(x)),x)#设置字符串到x轴
plt.grid(alpha=0.3)#添加网格
plt.ylabel('城市名')
plt.xlabel('确诊人数')
plt.title('中国境内所有省疫情数据展示')
if b!=0:
plt.show()
3.3Barchar.py
#参考资料 https://www.pythonf.cn/read/125298
import numpy as np
from matplotlib import pyplot as plt
from search_data import search
def barchar(cityname0,cityname1,b):
plt.title("城市治愈人数与死亡人数对比")
plt.rcParams['font.sans-serif'] = ['SimHei']#因为要显示中文标签,所有需要手动设置字体
plt.rcParams['axes.unicode_minus'] = False
city0_data = search(cityname0)
city1_data = search(cityname1)
x = [city0_data["治愈人数"],city1_data["治愈人数"]]
y = [city0_data["死亡人数"],city1_data["死亡人数"]]
x = np.array(x)#这里不是普通的列表,需要通过array转换
y = np.array(y)
y_pos = np.arange(len(x))
y_pos = [i for i in y_pos]
name_list=[cityname0,cityname1]
plt.barh(y_pos, x, label="治愈人数",color='g', alpha=0.5, tick_label=name_list)
plt.barh(y_pos, -y,label="死亡人数",color='b', alpha=0.5)
plt.xlim(-max(y) - 1, max(x) + 1)
plt.ylim(-1, len(x) + 1)
plt.legend()
if b!=0:
plt.show()
3.4BUtton_click.py
#************************************************************************************************************
#搜索按钮的事件的函数
#无参数,主要作用是获取输入文本框的内容
import webbrowser
import allData_show
from reptile_data import datacityname, dataconfirm
on_click_play_map=True
def click_play_map():
global on_click_play_map
if on_click_play_map==False:
on_click_play_map=True
else:
on_click_play_map=False
print("疫情地图点击成功")
from reptile_data import datacityname
from map_show import map_show
from reptile_data import dataconfirm
map_show(datacityname, dataconfirm)
webbrowser.open_new_tab('中国疫情地图.html')
on_click_play_barall=True
def click_play_all():
global on_click_play_barall
if on_click_play_barall==False:
on_click_play_barall=True
else:
on_click_play_barall=False
print("展示所有省疫情数据点击成功")
allData_show.P_T(datacityname, dataconfirm, 1)
3.5Drawing_Board.py
import tkinter
from matplotlib import pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import allData_show
from Linechart import linechar
from Barchar import barchar
from Piechar import pie_play
from reptile_data import datacityname, dataconfirm
#函数名c
#参数是两个城市名
#目的实现窗口下面的画布
def c(cityr):
f = plt.figure(figsize=(21, 9), dpi=60, tight_layout=True, facecolor="WhiteSmoke") # WhiteSmoke
plt.subplot(221)
allData_show.P_T(datacityname, dataconfirm, 0)
plt.subplot(222)
barchar(cityr[0], cityr[1],0)
plt.subplot(223)
pie_play(cityr[0], cityr[1],0)
plt.subplot(224)
linechar(cityr[0], cityr[1],0)
return f
#函数名CV
#第一个参数是画布出现的窗口,第二个是城市名(列表)
#目的初始化窗口下面的画布
def CV(win,city_result):
cv = tkinter.Canvas(win, background='Cyan')
cv.grid_remove()
cv.place(relx=0.0, rely=0.1, relwidth=1, relheight=0.9)
canvas = FigureCanvasTkAgg(c(city_result), master=cv)
# #canvas.draw()
# canvas.get_tk_widget().grid(row=1, column=1)
return canvas
3.6Get_url_content.py
import json
import requests
#爬取疫情数据
def get_url_content(url):
#1模拟浏览器
header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/51.0.2704.63 Safari/537.36'}
#2向网站发送请求
response=requests.get(url,headers=header)
#3改变编码模式
content=response.content.decode('utf-8')
#4数据筛选
#用loads方法形式转换dict类型
return json.loads(content)
3.7Linechart.py
import matplotlib.pyplot as plt
from search_data import search
def linechar(cityname0,cityname1,b):
plt.rcParams['font.sans-serif'] = ['SimHei']
city0_data = search(cityname0)
city1_data = search(cityname1)
x = ["确诊人数", "死亡人数", "治愈人数"]#x的坐标
y1 = [city0_data["确诊人数"],city0_data["死亡人数"],city0_data["治愈人数"]]
y2 = [city1_data["确诊人数"],city1_data["死亡人数"],city1_data["治愈人数"]]
plt.plot(x, y1, label=cityname0, linewidth=3, color='black', marker='o',
markerfacecolor='red', markersize=12)
plt.plot(x, y2, label=cityname1, linewidth=3, color='b', marker='o',
markerfacecolor='y', markersize=12)
plt.ylabel('人数')
plt.title('城市疫情情况')
plt.legend()
if b!=0:
plt.show()
3.8map_show.py
#绘制中国省份疫情地图的数据,用html进行保存
##参考资料pyecharts关方文档https://pyecharts.org/#/zh-cn/geography_charts?id=map%ef%bc%9a%e5%9c%b0%e5%9b%be
from pyecharts.charts import Map
from pyecharts import options as opts
def map_show(datacityname_, dataconfirm_):
pieces = [{"max": 999999, "min": 1001, "label": ">10000", "color": "#8A0808"},
{"max": 9999, "min": 1000, "label": "1000-9999", "color": "#FF69B4"},
{"max": 999, "min": 100, "label": "100-999", "color": "#F08080"},
{"max": 99, "min": 10, "label": "10-99", "color": "#F78181"},
{"max": 9, "min": 1, "label": "1-9", "color": "#0000FF"},
{"max": 0, "min": 0, "label": "0", "color": "#B4CDCD"},
]
c = (
# 设置地图大小
Map(init_opts=opts.InitOpts(width='1000px', height='880px'))
.add("累计确诊人数", [list(z) for z in zip(datacityname_, dataconfirm_)], "china", is_map_symbol_show=False)
.set_global_opts(
title_opts=opts.TitleOpts(title="中国疫情地图分布", pos_left="center", pos_top="10px"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(max_=200, is_piecewise=True,
pieces=pieces
),
)
.render("中国疫情地图.html")
)
3.9Piechar.py
import numpy as np
from matplotlib import pyplot as plt
from reptile_data import sumdata
from search_data import search
#函数名pie_play
#第一个参数是第一个城市名,第二个参数是第二个城市名
def pie_play(cityname0,cityname1,b):
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.title("各城市与全国疫情人数占比")
data_city_name=[cityname0,cityname1,"其他地区"]
p_data = [search(cityname0)['确诊人数'], search(cityname1)['确诊人数'], sumdata - search(cityname0)['确诊人数'] - search(cityname1)['确诊人数']]
data = np.array(p_data)
_ = plt.pie(data
,labels=data_city_name
,labeldistance=1.1 # 标签到中心点的距离
,autopct='%.1f%%' # 控制比例的值的显示
,pctdistance=0.5 # 控制百分比的值的显示位置
,explode=[0.1,0.2,0] # 每一份扇形 到中心点的距离
,colors = ['red','blue','yellow']
,shadow=True
,startangle=90 # 绘制图形时候 开始的角度
)
if b!=0:
plt.show()
3.10python_gui_backimage_1.png
(随便一张图片都OK)
3.11reptile_data.py
#这里主要是对中国境内疫情数据分析
import json
from Get_url_content import get_url_content
url="https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
Data=get_url_content(url)
#筛选出省的数据和城市的数据
data=json.loads(Data['data'])
ChinaData=data["areaTree"][0]['children']#中国全部的疫情数据
sumdata=data['chinaTotal']['confirm']
#这里只是将数据进行分开保存,多种数据保存方便后来的使用,
#但比较浪费存储资源
#省的数据
updata_provinceChinaData=[]
#省对应城市的数据
updata_province_cityChinaData={}
#城市的数据
updata_cityChinaData=[]
#将省的名字 和 确诊数据 单独进行保存方方便之后可视化的使用
datacityname=[]
dataconfirm=[]
city_name=[]
for i in range(len(ChinaData)):
datacityname.append(ChinaData[i]['name'])
dataconfirm.append(ChinaData[i]['total']['confirm'])
#用来存储数据,一个数据是省的名字,第二个为确诊人数,第三个为死亡人数,第四个为治愈人数
updata_provinceChinaData.append([ChinaData[i]['name'], ChinaData[i]['total']['confirm'], ChinaData[i]['total']['dead'], ChinaData[i]['total']['heal']])
#print(updata_provinceChinaData)#测试如下['台湾', 16578, 848, 13742], ['辽宁', 783, 2, 611]
updata_province_cityChinaData[ChinaData[i]['name']]=ChinaData[i]['children']
k=0
for j in updata_province_cityChinaData[ChinaData[i]['name']]:
if j['name'] != "地区待确认" and j['name'] != "境外输入":
city_name.append(j['name'])
updata_cityChinaData.append([j['name'],
j['total']['confirm'],
j['total']['dead'],
j['total']['heal']]) # ['桂林', 32, 0, 32]
data_cityChinaData={}
for i in updata_cityChinaData:
data_cityChinaData[i[0]]={'确诊人数':i[1],'死亡人数':i[2],'治愈人数':i[3]}
3.12search_data.py
from reptile_data import data_cityChinaData
#函数名search
#一个参数城市名(str)
#目的是可以通过函数名来获得该城市的数据
def search(City_name):
return(data_cityChinaData[City_name])
3.13中国疫情地图.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Awesome-pyecharts</title>
<script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
<script type="text/javascript" src="https://assets.pyecharts.org/assets/maps/china.js"></script>
</head>
<body>
<div id="4f73a20678e340a8b9fe3dec140d826c" class="chart-container" style="width:1000px; height:880px;"></div>
<script>
var chart_4f73a20678e340a8b9fe3dec140d826c = echarts.init(
document.getElementById('4f73a20678e340a8b9fe3dec140d826c'), 'white', {renderer: 'canvas'});
var option_4f73a20678e340a8b9fe3dec140d826c = {
"animation": true,
"animationThreshold": 2000,
"animationDuration": 1000,
"animationEasing": "cubicOut",
"animationDelay": 0,
"animationDurationUpdate": 300,
"animationEasingUpdate": "cubicOut",
"animationDelayUpdate": 0,
"color": [
"#c23531",
"#2f4554",
"#61a0a8",
"#d48265",
"#749f83",
"#ca8622",
"#bda29a",
"#6e7074",
"#546570",
"#c4ccd3",
"#f05b72",
"#ef5b9c",
"#f47920",
"#905a3d",
"#fab27b",
"#2a5caa",
"#444693",
"#726930",
"#b2d235",
"#6d8346",
"#ac6767",
"#1d953f",
"#6950a1",
"#918597"
],
"series": [
{
"type": "map",
"name": "\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570",
"label": {
"show": true,
"position": "top",
"margin": 8
},
"mapType": "china",
"data": [
{
"name": "\u53f0\u6e7e",
"value": 16652
},
{
"name": "\u5185\u8499\u53e4",
"value": 955
},
{
"name": "\u4e91\u5357",
"value": 1736
},
{
"name": "\u9999\u6e2f",
"value": 12461
},
{
"name": "\u5e7f\u897f",
"value": 417
},
{
"name": "\u8fbd\u5b81",
"value": 786
},
{
"name": "\u6cb3\u5357",
"value": 1637
},
{
"name": "\u4e0a\u6d77",
"value": 2877
},
{
"name": "\u5e7f\u4e1c",
"value": 3323
},
{
"name": "\u5c71\u4e1c",
"value": 1021
},
{
"name": "\u9ed1\u9f99\u6c5f",
"value": 2017
},
{
"name": "\u56db\u5ddd",
"value": 1277
},
{
"name": "\u6cb3\u5317",
"value": 1457
},
{
"name": "\u5929\u6d25",
"value": 535
},
{
"name": "\u6d59\u6c5f",
"value": 1501
},
{
"name": "\u798f\u5efa",
"value": 1322
},
{
"name": "\u5317\u4eac",
"value": 1194
},
{
"name": "\u6c5f\u82cf",
"value": 1619
},
{
"name": "\u9655\u897f",
"value": 709
},
{
"name": "\u5409\u6797",
"value": 585
},
{
"name": "\u5c71\u897f",
"value": 265
},
{
"name": "\u91cd\u5e86",
"value": 610
},
{
"name": "\u6e56\u5357",
"value": 1199
},
{
"name": "\u6e56\u5317",
"value": 68311
},
{
"name": "\u897f\u85cf",
"value": 1
},
{
"name": "\u65b0\u7586",
"value": 981
},
{
"name": "\u6d77\u5357",
"value": 190
},
{
"name": "\u7518\u8083",
"value": 344
},
{
"name": "\u6fb3\u95e8",
"value": 77
},
{
"name": "\u6c5f\u897f",
"value": 959
},
{
"name": "\u9752\u6d77",
"value": 30
},
{
"name": "\u5b81\u590f",
"value": 122
},
{
"name": "\u5b89\u5fbd",
"value": 1008
},
{
"name": "\u8d35\u5dde",
"value": 159
}
],
"roam": true,
"aspectScale": 0.75,
"nameProperty": "name",
"selectedMode": false,
"zoom": 1,
"mapValueCalculation": "sum",
"showLegendSymbol": false,
"emphasis": {}
}
],
"legend": [
{
"data": [
"\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570"
],
"selected": {
"\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570": true
},
"show": false,
"padding": 5,
"itemGap": 10,
"itemWidth": 25,
"itemHeight": 14
}
],
"tooltip": {
"show": true,
"trigger": "item",
"triggerOn": "mousemove|click",
"axisPointer": {
"type": "line"
},
"showContent": true,
"alwaysShowContent": false,
"showDelay": 0,
"hideDelay": 100,
"textStyle": {
"fontSize": 14
},
"borderWidth": 0,
"padding": 5
},
"title": [
{
"text": "\u4e2d\u56fd\u75ab\u60c5\u5730\u56fe\u5206\u5e03",
"left": "center",
"top": "10px",
"padding": 5,
"itemGap": 10
}
],
"visualMap": {
"show": true,
"type": "piecewise",
"min": 0,
"max": 200,
"inRange": {
"color": [
"#50a3ba",
"#eac763",
"#d94e5d"
]
},
"calculable": true,
"inverse": false,
"splitNumber": 5,
"orient": "vertical",
"showLabel": true,
"itemWidth": 20,
"itemHeight": 14,
"borderWidth": 0,
"pieces": [
{
"max": 999999,
"min": 1001,
"label": ">10000",
"color": "#8A0808"
},
{
"max": 9999,
"min": 1000,
"label": "1000-9999",
"color": "#FF69B4"
},
{
"max": 999,
"min": 100,
"label": "100-999",
"color": "#F08080"
},
{
"max": 99,
"min": 10,
"label": "10-99",
"color": "#F78181"
},
{
"max": 9,
"min": 1,
"label": "1-9",
"color": "#0000FF"
},
{
"max": 0,
"min": 0,
"label": "0",
"color": "#B4CDCD"
}
]
}
};
chart_4f73a20678e340a8b9fe3dec140d826c.setOption(option_4f73a20678e340a8b9fe3dec140d826c);
</script>
</body>
</html>
4、结果
对疫情数据的爬取。Matplotlib库和Tkinter库的基本使用。通过Matplotlib实现折线图,扇形图,条形图。Tkinter库的下拉框Combobox,按钮Button,画布Canvas的使用。利用pyechart来生成html,绘制疫情地图。
5.还需改进
爬取数据和存储浪费了时间和空间资源,后期可以改进来节省。选择程序运行到窗口出现时间是3秒左右,但有时网络问题等多个因素造成窗口出现时间不定。
作者:爱喝冰美式的猫猫