python 简单应用——网络爬虫AND数据分析

当你学会python后是不是要做一些简单的应用勒,刚好猫猫在初学python的时候做过一些简单的应用。网络爬虫AND数据分析。知道简单的语法就能看懂下面的代码,如果你有需要,可以继续看下去

如果有写错的地方,麻烦留言。

一、网络爬虫

1.要求
对爬取的新闻内容进行分析,用图形显示新闻中的高频关键词。使用requests库,获取网页页面,使用BeautifulSoup库对网页内容进行解析,使用正则表达式获取网页链接字符串。
2.思路

3.程序源码

3.1_main.py

# 1新闻内容进行爬取,并按格式存储。
# 2使用云词的方法展现高频词汇。
import time
#用于程序记时
start_T=time.time()
#导入自定义函数
from GetNEWContent import getNEWContent
from GetTitleList import getTitleList
from Words_Cloud import wc

#所要爬取的网页地址
url = 'https://uscnews.usc.edu.cn/nhyw.htm'#主网页
#测试副网页
url1='https://uscnews.usc.edu.cn/info/1025/43508.htm'

#存入文档
#第一个文档是新闻的日期+作者+新闻标题+新闻网页地址信息
#第二个文档是每个新闻的具体内容
#^^^^^^^^正则表达式
str00="*"*100+"\n"#定义文档的分割符号5.
file1=open("南华新闻标题锦集.txt",'w+')
file2=open("南华新闻内容锦集.txt",'w+')
#获取内容并进行组装
j=1#记录是第几个新闻
for i in getTitleList(url):
    content=getNEWContent(i)
    str0="*"+str(j)+"新闻标题:"+(''.join(content[0][0].split()))+"  "\
         +str(content[0][1])+"  "\
         +str(content[0][2])+"  "\
         +"网页地址"+i+"\n"
    str1=str00+str0+"#新闻具体内容"+content[1]+"\n"
    #文档制作
    file1.write(str0)
    file2.write(str1)
    #词云制作
    namestr="新闻"+str(j)
    wc(content[1],namestr)
    j=j+1

end_T=time.time()
print("程序运行时间{:.3f}秒".format(end_T - start_T))

3.2GetNEWContent.py

import re
from pyparsing import Regex
from Reptile import _reptlie

#函数名getNEWContent,
#一个参数是要抓取指定网络的网址,
#函数作用,主要是抓取新闻的具体内容
#抓取的具体内容包括新闻标题,作者,正文
def getNEWContent(url):
    print("正在爬取 {} 内容的……".format(url))
    soup=_reptlie(url)
    #筛选内容
    #1.1标题——删除不想关字符相关字符
    content=soup.find(name="div",class_="content-title mt20").text
    content=re.split(r" +|\n+|\xa0+|点击:+",content)
    Titlecontent = [i for i in content if i != '']
    j=0
    str=''
    for i in Titlecontent:
        if j>=2:
            str=str+" "+Titlecontent[j]
        j=j+1
    Titlecontent=Titlecontent[0:2]
    Titlecontent.append(str)
    #1.2筛选内容,主要是新闻内容
    _content=soup.find(name="div",class_="v_news_content").text
    #2.1新闻内容——替换不可打印相关字符
    _content = re.sub(r'\xa0+|\u2003+|\u2022+', '', _content)
    #2.2新闻内容——使正文内容显示更加规范
    _content=re.sub(r'\n+| +', '', _content)
    return Titlecontent,_content

3.3GetTitleList.py

import re

from Reptile import _reptlie

#函数名getTitleList
#一个参数是要抓取指定网络的网址,
#函数作用,将获取到的新闻的标题和网络链接,并按行存入txt文档中
def getTitleList(url):
    print("正在爬取主网页 {} 的链接……".format(url))
    #爬取相关内容
    soup = _reptlie(url)
    #定义存储新闻标题和链接的字典
    TitleLink={}
    #筛选内容
    content=soup.find(name="ul",class_="media-list3 mt10")
    #获取content中所有超链接的内容
    content=content.find_all('a')
    re_url = re.compile(r'<a href="(.*?)" target="_blank">')
    link=re.findall(re_url,str(content))
    allUrl=[]
    for i in link:
        url="https://uscnews.usc.edu.cn/"+i
        allUrl.append(url)
    #返回列表类型,文章的链接
    return allUrl

3.4Reptile.py

import requests
from bs4 import BeautifulSoup

#函数名_reptlie
#一个参数是要爬取指定网络的网址
#函数作用,爬取内容
def _reptlie(url):
    try:
        # 伪装成浏览器
        # 以字典形式设置hesders,格式是[(字段名,字段值),(字段名,字段值)],这里先用字典,在下面会用for把它转为列表元组的形式
        h = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
             'Accept-Language': 'zh-CN,zh;q=0.8',
             'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
             'Connection': 'keep-alive',
             'referer': 'http://www.163.com/'}
        # 1.向网站发送请求
        html = requests.get(url, headers=h)
        # 2.解析网页,# 改变编码模式
        html.content.decode("utf-8", errors='ignore')
        # 用parser解析器来解析该html
        soup = BeautifulSoup(html.content, 'html.parser')
        return soup
    except:
        print("爬取网页失败……")

3.5Word_cloud_optimization.py

#coding=gbk
import logging

import jieba

#函数名words_count_jieba
#一个参数是处理的内容,类型是字符串
#函数作用,对内容进行筛选,得到使用频率高的中文词语,以字符串的形式进行返回
def words_count_jieba(content):
    #常用字删除
    stopwords = set()
    stopwords.update(content)
    #分词
    jieba.setLogLevel(logging.INFO)
    words = jieba.lcut(content)
    counts = {}
    #记录词语个数
    for word in words:
        # 排除单个字符的分词结果
        if len(word) == 1:
            continue
        else:
            counts[word] = counts.get(word, 0) + 1
    #删除常用字
    for word in stopwords:
        if word in counts.keys():
            del (counts[word])
    items = list(counts.items())
    #排序
    items.sort(key=lambda x: x[1], reverse=True)
    #组合成字符串,方便之后的词云展示
    end_str=''
    for i in range(40):
        word, count = items[i]
        end_str=end_str+" "+word
    return end_str

3.6Words_Cloud.py

import jieba
from wordcloud import WordCloud
from Word_cloud_optimization import words_count_jieba
#绘制词云图片

def wc(newcontent,namestr):
    # 结巴分词,生成字符串,wordcloud无法直接生成正确的中文词云
    cut_text =words_count_jieba(newcontent)
    w=WordCloud(
        #设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
        font_path="c:/windows/fonts/simfang.ttf",
        #设置了背景,宽高
        background_color="white",width=1000,height=880).generate(cut_text)
    #进行保存
    strload=r"词云图片存放/"+str(namestr) + ".png"
    w.to_file(strload)

4.结果展示

二、数据分析

1、要求
熟悉Matplotlib库的基本使用。
使用Matplotlib库,绘制新型冠状病毒部分城市数据对比图。实现两个城市三种数据(确诊,死亡,治愈)的对比,程序可以根据不同输入不同城市和选项(确诊,死亡,治愈),显示相应的对比图。
对数据进行多样化分析。
2.思路:

3、程序源码

3.1_main__start_windows.py

#开始窗口显示
#导入相关库
import tkinter
from tkinter import ttk
import tkinter.messagebox
#导入自定义函数
from matplotlib import pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

import allData_show
from BUtton_click import  click_play_all, click_play_map
from Drawing_Board import CV
from Linechart import linechar
from Barchar import barchar
from Piechar import pie_play
from reptile_data import city_name, datacityname, dataconfirm

#显示默认显示值
city_result=['武汉', '大连']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title("欢迎")
plt.show()
#************************************************************************************************************
#1创建一个窗口
start_window=tkinter.Tk()
start_window.title("中国城市疫情情况搜索GUI窗口")#设置窗口标题
screenwidth = start_window.winfo_screenwidth()#求屏幕宽度
screenheight = start_window.winfo_screenheight()#求屏幕高度
start_window.geometry('{}x{}+0+0'.format(screenwidth,screenheight))#设置窗口大小为宽为屏幕宽度,高为屏幕高度,窗口默认出现位置为+0,+0
start_window['bg']="Cyan"#设置窗口背景颜色为蓝色
# #增加背景图片,因为大面积窗口被图表所覆盖所以这里不做演示
# image_file = Image.open("python_gui_backimage_1.png")
# photo = ImageTk.PhotoImage(image_file)
# fwagui=tkinter.Frame(start_window,padx=0,pady=0)
# fwagui.place(x=0,y=0,width=screenwidth,height=screenheight)
# tkinter.Label(fwagui,image=photo).grid(row = 0, column = 0,rowspan=1,columnspan=1)

#2设置标签“城市名称”
cityLabelx=tkinter.Label(start_window,text="城市名称",fg="CornflowerBlue",font=("宋体",20))
cityLabelx.grid(row=1,column=1)#网格布局

#3.1输入文本框,主要是接收需要查询的城市名称
#3.1.1输入框1的事件
def choose1(event):
    widget = event.widget
    value = widget.get()
    print('value:{}'.format(value))
    if value not in city_name:
        tkinter.messagebox.showinfo('提示', '输入错误请重新输入')
    city_result[0] = value
#3.1.2输入框2的事件
def choose2(event):
    widget = event.widget  # 当前的组件
    value = widget.get()  # 选中的值
    print('value:{}'.format(value))
    if value not in city_name:
        tkinter.messagebox.showinfo('提示', '输入错误请重新输入')
    city_result[1] = value
#3.2.1输入框1的实现
value1 = tkinter.StringVar()
value1.set('请输入第一个城市的名字')
values =city_name
combobox1= ttk.Combobox(
            master=start_window,  # 父容器
            height=10,  # 高度,下拉显示的条目数量
            width=20,  # 宽度
            state='normal',  # 设置状态 normal(可选可输入)、readonly(只可选)、 disabled
            cursor='arrow',  # 鼠标移动时样式 arrow, circle, cross, plus...
            font=('', 14),  # 字体
            textvariable=value1,  # 通过StringVar设置可改变的值
            values=values,  # 设置下拉框的选项
            )
combobox1.bind('<<ComboboxSelected>>', choose1)  # 绑定选中事件
combobox1.grid(row=1, column=2)
#3.2.2输入框2的实现
value2 = tkinter.StringVar()
value2.set('请输入第二个城市的名字')
values =city_name
combobox2= ttk.Combobox(
            master=start_window,  # 父容器
            height=10,  # 高度,下拉显示的条目数量
            width=20,  # 宽度
            state='normal',  # 设置状态 normal(可选可输入)、readonly(只可选)、 disabled
            cursor='arrow',  # 鼠标移动时样式 arrow, circle, cross, plus...
            font=('', 14),  # 字体
            textvariable=value2,  # 通过StringVar设置可改变的值
            values=values,  # 设置下拉框的选项
            )
combobox2.bind('<<ComboboxSelected>>', choose2)  # 绑定选中事件
combobox2.grid(row=1, column=3)
#4搜索按钮
#4.1搜索按钮点击事件
on_click=True
def click_searchB():
    global on_click
    if on_click==False:
        on_click=True
    else:
        on_click=False
        print("搜索按钮点击成功")
        #创建画布,此处是更新
        CV(start_window, city_result)
#4.2搜索按钮的实现
searchB=tkinter.Button(start_window, text="搜索", font=("宋体",20), command=click_searchB)
searchB.grid(row=1, column=4)

#5疫情地图展示按钮
play_mapB=tkinter.Button(start_window, text="疫情地图", font=("宋体", 10), command=click_play_map)
play_mapB.grid(row=2, column=1)

#6两个城市累计患病人数饼图展示按钮
#6.1饼图展示按钮事件
on_click_play_pie=True
def click_play_pie():
    global on_click_play_pie
    if on_click_play_pie== False:
        on_click_play_pie = True
    else:
        print("各城市与全国疫情人数占比")
        cityname0 = city_result[0]
        cityname1 = city_result[1]
        pie_play(cityname0, cityname1,1)
#6.2饼图展示按钮实现
s2B=tkinter.Button(start_window,text="城市确诊人数占比",font=("宋体",10),command=click_play_pie)
s2B.grid(row=2, column=4)

##6两个城市疫情死亡与治愈对比条形图按钮
#7.1条形图展示按钮事件
on_click_play_bar=True
def click_play_b():
    global on_click_play_bar
    if on_click_play_bar==False:
        on_click_play_bar=True
    else:
        on_click_play_bar=False
        barchar(city_result[0],city_result[1],1)
        print("疫情死亡与治愈对比条形图点击成功")
#7.2条形图展示按钮实现
s3B=tkinter.Button(start_window,text="疫情死亡与治愈对比",font=("宋体",10),command=click_play_b)
s3B.grid(row=2, column=3)



s4B=tkinter.Button(start_window,text="所有省的数据展示",font=("宋体",10),command=click_play_all)
s4B.grid(row=2, column=2)

on_click_play_line=True
def click_play_line():
    global on_click_play_line
    if on_click_play_line==False:
        on_click_play_line=True
    else:
        on_click_play_line=False
        linechar(city_result[0],city_result[1],1)
        print("疫情死亡与治愈对比条形图点击成功")
s4B=tkinter.Button(start_window,text="所有数据的折线图",font=("宋体",10),command=click_play_line)
s4B.grid(row=2, column=7)

CV(start_window, city_result).get_tk_widget().grid(row=1, column=1)

#进入消息循环,显示窗口
start_window.mainloop()

3.2allData_show.py

#参考网站 https://www.cnblogs.com/springionic/p/11150192.html
from matplotlib import pyplot as plt
import matplotlib
def P_T(x,y,b):
    font = {'family': 'SimHei'}
    matplotlib.rc('font', **font)#支持中文
    plt.barh(range(len(x)), y, height=0.3, color='red')#绘制横着的条形图,横着的用height控制线条宽度
    plt.yticks(range(len(x)),x)#设置字符串到x轴
    plt.grid(alpha=0.3)#添加网格
    plt.ylabel('城市名')
    plt.xlabel('确诊人数')
    plt.title('中国境内所有省疫情数据展示')
    if b!=0:
        plt.show()

3.3Barchar.py

#参考资料 https://www.pythonf.cn/read/125298
import numpy as np
from matplotlib import pyplot as plt

from search_data import search

def barchar(cityname0,cityname1,b):
    plt.title("城市治愈人数与死亡人数对比")
    plt.rcParams['font.sans-serif'] = ['SimHei']#因为要显示中文标签,所有需要手动设置字体
    plt.rcParams['axes.unicode_minus'] = False
    city0_data = search(cityname0)
    city1_data = search(cityname1)
    x = [city0_data["治愈人数"],city1_data["治愈人数"]]
    y = [city0_data["死亡人数"],city1_data["死亡人数"]]
    x = np.array(x)#这里不是普通的列表,需要通过array转换
    y = np.array(y)
    y_pos = np.arange(len(x))
    y_pos = [i for i in y_pos]
    name_list=[cityname0,cityname1]
    plt.barh(y_pos, x, label="治愈人数",color='g', alpha=0.5, tick_label=name_list)
    plt.barh(y_pos, -y,label="死亡人数",color='b', alpha=0.5)
    plt.xlim(-max(y) - 1, max(x) + 1)
    plt.ylim(-1, len(x) + 1)
    plt.legend()
    if b!=0:
        plt.show()

3.4BUtton_click.py

#************************************************************************************************************
#搜索按钮的事件的函数
#无参数,主要作用是获取输入文本框的内容
import webbrowser

import allData_show
from reptile_data import datacityname, dataconfirm

on_click_play_map=True
def click_play_map():
    global on_click_play_map
    if on_click_play_map==False:
        on_click_play_map=True
    else:
        on_click_play_map=False
        print("疫情地图点击成功")
        from reptile_data import datacityname
        from map_show import map_show
        from reptile_data import dataconfirm
        map_show(datacityname, dataconfirm)
        webbrowser.open_new_tab('中国疫情地图.html')

on_click_play_barall=True
def click_play_all():
    global on_click_play_barall
    if on_click_play_barall==False:
        on_click_play_barall=True
    else:
        on_click_play_barall=False
        print("展示所有省疫情数据点击成功")
        allData_show.P_T(datacityname, dataconfirm, 1)

3.5Drawing_Board.py

import tkinter
from matplotlib import pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

import allData_show
from Linechart import linechar
from Barchar import barchar
from Piechar import pie_play
from reptile_data import datacityname, dataconfirm

#函数名c
#参数是两个城市名
#目的实现窗口下面的画布
def c(cityr):
    f = plt.figure(figsize=(21, 9), dpi=60, tight_layout=True, facecolor="WhiteSmoke")  # WhiteSmoke
    plt.subplot(221)
    allData_show.P_T(datacityname, dataconfirm, 0)
    plt.subplot(222)
    barchar(cityr[0], cityr[1],0)
    plt.subplot(223)
    pie_play(cityr[0], cityr[1],0)
    plt.subplot(224)
    linechar(cityr[0], cityr[1],0)
    return f

#函数名CV
#第一个参数是画布出现的窗口,第二个是城市名(列表)
#目的初始化窗口下面的画布
def CV(win,city_result):
    cv = tkinter.Canvas(win, background='Cyan')
    cv.grid_remove()
    cv.place(relx=0.0, rely=0.1, relwidth=1, relheight=0.9)
    canvas = FigureCanvasTkAgg(c(city_result), master=cv)
    # #canvas.draw()
    # canvas.get_tk_widget().grid(row=1, column=1)
    return canvas

3.6Get_url_content.py

import json
import requests
#爬取疫情数据
def get_url_content(url):
    #1模拟浏览器
    header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                     'Chrome/51.0.2704.63 Safari/537.36'}
    #2向网站发送请求
    response=requests.get(url,headers=header)
    #3改变编码模式
    content=response.content.decode('utf-8')
    #4数据筛选
    #用loads方法形式转换dict类型
    return json.loads(content)


3.7Linechart.py

import matplotlib.pyplot as plt
from search_data import search

def linechar(cityname0,cityname1,b):
    plt.rcParams['font.sans-serif'] = ['SimHei']
    city0_data = search(cityname0)
    city1_data = search(cityname1)
    x = ["确诊人数", "死亡人数", "治愈人数"]#x的坐标
    y1 = [city0_data["确诊人数"],city0_data["死亡人数"],city0_data["治愈人数"]]
    y2 = [city1_data["确诊人数"],city1_data["死亡人数"],city1_data["治愈人数"]]
    plt.plot(x, y1, label=cityname0, linewidth=3, color='black', marker='o',
             markerfacecolor='red', markersize=12)
    plt.plot(x, y2, label=cityname1, linewidth=3, color='b', marker='o',
             markerfacecolor='y', markersize=12)
    plt.ylabel('人数')
    plt.title('城市疫情情况')
    plt.legend()
    if b!=0:
        plt.show()

3.8map_show.py

#绘制中国省份疫情地图的数据,用html进行保存
##参考资料pyecharts关方文档https://pyecharts.org/#/zh-cn/geography_charts?id=map%ef%bc%9a%e5%9c%b0%e5%9b%be
from pyecharts.charts import Map
from pyecharts import options as opts
def map_show(datacityname_, dataconfirm_):

    pieces = [{"max": 999999, "min": 1001, "label": ">10000", "color": "#8A0808"},
            {"max": 9999, "min": 1000, "label": "1000-9999", "color": "#FF69B4"},
            {"max": 999, "min": 100, "label": "100-999", "color": "#F08080"},
            {"max": 99, "min": 10, "label": "10-99", "color": "#F78181"},
            {"max": 9, "min": 1, "label": "1-9", "color": "#0000FF"},
            {"max": 0, "min": 0, "label": "0", "color": "#B4CDCD"},
            ]
    c = (
        # 设置地图大小
        Map(init_opts=opts.InitOpts(width='1000px', height='880px'))
            .add("累计确诊人数", [list(z) for z in zip(datacityname_, dataconfirm_)], "china", is_map_symbol_show=False)
            .set_global_opts(
            title_opts=opts.TitleOpts(title="中国疫情地图分布",  pos_left="center", pos_top="10px"),
            legend_opts=opts.LegendOpts(is_show=False),
            visualmap_opts=opts.VisualMapOpts(max_=200, is_piecewise=True,
                                            pieces=pieces
                                             ),
        )
    .render("中国疫情地图.html")
        )

3.9Piechar.py

import numpy as np
from matplotlib import pyplot as plt

from reptile_data import sumdata
from search_data import search

#函数名pie_play
#第一个参数是第一个城市名,第二个参数是第二个城市名
def pie_play(cityname0,cityname1,b):
    plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
    plt.title("各城市与全国疫情人数占比")
    data_city_name=[cityname0,cityname1,"其他地区"]
    p_data = [search(cityname0)['确诊人数'], search(cityname1)['确诊人数'], sumdata - search(cityname0)['确诊人数'] - search(cityname1)['确诊人数']]
    data = np.array(p_data)
    _ = plt.pie(data
                ,labels=data_city_name
                ,labeldistance=1.1  # 标签到中心点的距离
                ,autopct='%.1f%%'  # 控制比例的值的显示
                ,pctdistance=0.5  # 控制百分比的值的显示位置
                ,explode=[0.1,0.2,0]  # 每一份扇形 到中心点的距离
                ,colors = ['red','blue','yellow']
                ,shadow=True
                ,startangle=90  # 绘制图形时候 开始的角度
            )
    if b!=0:
        plt.show()

3.10python_gui_backimage_1.png
(随便一张图片都OK)
3.11reptile_data.py

#这里主要是对中国境内疫情数据分析
import json

from Get_url_content import get_url_content

url="https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
Data=get_url_content(url)
#筛选出省的数据和城市的数据
data=json.loads(Data['data'])
ChinaData=data["areaTree"][0]['children']#中国全部的疫情数据
sumdata=data['chinaTotal']['confirm']

#这里只是将数据进行分开保存,多种数据保存方便后来的使用,
#但比较浪费存储资源
#省的数据
updata_provinceChinaData=[]
#省对应城市的数据
updata_province_cityChinaData={}
#城市的数据
updata_cityChinaData=[]
#将省的名字 和 确诊数据 单独进行保存方方便之后可视化的使用

datacityname=[]
dataconfirm=[]
city_name=[]
for i in range(len(ChinaData)):
    datacityname.append(ChinaData[i]['name'])
    dataconfirm.append(ChinaData[i]['total']['confirm'])
    #用来存储数据,一个数据是省的名字,第二个为确诊人数,第三个为死亡人数,第四个为治愈人数
    updata_provinceChinaData.append([ChinaData[i]['name'], ChinaData[i]['total']['confirm'], ChinaData[i]['total']['dead'], ChinaData[i]['total']['heal']])
    #print(updata_provinceChinaData)#测试如下['台湾', 16578, 848, 13742], ['辽宁', 783, 2, 611]
    updata_province_cityChinaData[ChinaData[i]['name']]=ChinaData[i]['children']
    k=0
    for j in updata_province_cityChinaData[ChinaData[i]['name']]:
        if j['name'] != "地区待确认" and j['name'] != "境外输入":
            city_name.append(j['name'])
            updata_cityChinaData.append([j['name'],
                                         j['total']['confirm'],
                                         j['total']['dead'],
                                         j['total']['heal']])  # ['桂林', 32, 0, 32]

data_cityChinaData={}
for i in updata_cityChinaData:
    data_cityChinaData[i[0]]={'确诊人数':i[1],'死亡人数':i[2],'治愈人数':i[3]}

3.12search_data.py

from reptile_data import data_cityChinaData
#函数名search
#一个参数城市名(str)
#目的是可以通过函数名来获得该城市的数据
def search(City_name):
    return(data_cityChinaData[City_name])

3.13中国疫情地图.html

<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Awesome-pyecharts</title>
            <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
        <script type="text/javascript" src="https://assets.pyecharts.org/assets/maps/china.js"></script>

</head>
<body>
    <div id="4f73a20678e340a8b9fe3dec140d826c" class="chart-container" style="width:1000px; height:880px;"></div>
    <script>
        var chart_4f73a20678e340a8b9fe3dec140d826c = echarts.init(
            document.getElementById('4f73a20678e340a8b9fe3dec140d826c'), 'white', {renderer: 'canvas'});
        var option_4f73a20678e340a8b9fe3dec140d826c = {
    "animation": true,
    "animationThreshold": 2000,
    "animationDuration": 1000,
    "animationEasing": "cubicOut",
    "animationDelay": 0,
    "animationDurationUpdate": 300,
    "animationEasingUpdate": "cubicOut",
    "animationDelayUpdate": 0,
    "color": [
        "#c23531",
        "#2f4554",
        "#61a0a8",
        "#d48265",
        "#749f83",
        "#ca8622",
        "#bda29a",
        "#6e7074",
        "#546570",
        "#c4ccd3",
        "#f05b72",
        "#ef5b9c",
        "#f47920",
        "#905a3d",
        "#fab27b",
        "#2a5caa",
        "#444693",
        "#726930",
        "#b2d235",
        "#6d8346",
        "#ac6767",
        "#1d953f",
        "#6950a1",
        "#918597"
    ],
    "series": [
        {
            "type": "map",
            "name": "\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570",
            "label": {
                "show": true,
                "position": "top",
                "margin": 8
            },
            "mapType": "china",
            "data": [
                {
                    "name": "\u53f0\u6e7e",
                    "value": 16652
                },
                {
                    "name": "\u5185\u8499\u53e4",
                    "value": 955
                },
                {
                    "name": "\u4e91\u5357",
                    "value": 1736
                },
                {
                    "name": "\u9999\u6e2f",
                    "value": 12461
                },
                {
                    "name": "\u5e7f\u897f",
                    "value": 417
                },
                {
                    "name": "\u8fbd\u5b81",
                    "value": 786
                },
                {
                    "name": "\u6cb3\u5357",
                    "value": 1637
                },
                {
                    "name": "\u4e0a\u6d77",
                    "value": 2877
                },
                {
                    "name": "\u5e7f\u4e1c",
                    "value": 3323
                },
                {
                    "name": "\u5c71\u4e1c",
                    "value": 1021
                },
                {
                    "name": "\u9ed1\u9f99\u6c5f",
                    "value": 2017
                },
                {
                    "name": "\u56db\u5ddd",
                    "value": 1277
                },
                {
                    "name": "\u6cb3\u5317",
                    "value": 1457
                },
                {
                    "name": "\u5929\u6d25",
                    "value": 535
                },
                {
                    "name": "\u6d59\u6c5f",
                    "value": 1501
                },
                {
                    "name": "\u798f\u5efa",
                    "value": 1322
                },
                {
                    "name": "\u5317\u4eac",
                    "value": 1194
                },
                {
                    "name": "\u6c5f\u82cf",
                    "value": 1619
                },
                {
                    "name": "\u9655\u897f",
                    "value": 709
                },
                {
                    "name": "\u5409\u6797",
                    "value": 585
                },
                {
                    "name": "\u5c71\u897f",
                    "value": 265
                },
                {
                    "name": "\u91cd\u5e86",
                    "value": 610
                },
                {
                    "name": "\u6e56\u5357",
                    "value": 1199
                },
                {
                    "name": "\u6e56\u5317",
                    "value": 68311
                },
                {
                    "name": "\u897f\u85cf",
                    "value": 1
                },
                {
                    "name": "\u65b0\u7586",
                    "value": 981
                },
                {
                    "name": "\u6d77\u5357",
                    "value": 190
                },
                {
                    "name": "\u7518\u8083",
                    "value": 344
                },
                {
                    "name": "\u6fb3\u95e8",
                    "value": 77
                },
                {
                    "name": "\u6c5f\u897f",
                    "value": 959
                },
                {
                    "name": "\u9752\u6d77",
                    "value": 30
                },
                {
                    "name": "\u5b81\u590f",
                    "value": 122
                },
                {
                    "name": "\u5b89\u5fbd",
                    "value": 1008
                },
                {
                    "name": "\u8d35\u5dde",
                    "value": 159
                }
            ],
            "roam": true,
            "aspectScale": 0.75,
            "nameProperty": "name",
            "selectedMode": false,
            "zoom": 1,
            "mapValueCalculation": "sum",
            "showLegendSymbol": false,
            "emphasis": {}
        }
    ],
    "legend": [
        {
            "data": [
                "\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570"
            ],
            "selected": {
                "\u7d2f\u8ba1\u786e\u8bca\u4eba\u6570": true
            },
            "show": false,
            "padding": 5,
            "itemGap": 10,
            "itemWidth": 25,
            "itemHeight": 14
        }
    ],
    "tooltip": {
        "show": true,
        "trigger": "item",
        "triggerOn": "mousemove|click",
        "axisPointer": {
            "type": "line"
        },
        "showContent": true,
        "alwaysShowContent": false,
        "showDelay": 0,
        "hideDelay": 100,
        "textStyle": {
            "fontSize": 14
        },
        "borderWidth": 0,
        "padding": 5
    },
    "title": [
        {
            "text": "\u4e2d\u56fd\u75ab\u60c5\u5730\u56fe\u5206\u5e03",
            "left": "center",
            "top": "10px",
            "padding": 5,
            "itemGap": 10
        }
    ],
    "visualMap": {
        "show": true,
        "type": "piecewise",
        "min": 0,
        "max": 200,
        "inRange": {
            "color": [
                "#50a3ba",
                "#eac763",
                "#d94e5d"
            ]
        },
        "calculable": true,
        "inverse": false,
        "splitNumber": 5,
        "orient": "vertical",
        "showLabel": true,
        "itemWidth": 20,
        "itemHeight": 14,
        "borderWidth": 0,
        "pieces": [
            {
                "max": 999999,
                "min": 1001,
                "label": ">10000",
                "color": "#8A0808"
            },
            {
                "max": 9999,
                "min": 1000,
                "label": "1000-9999",
                "color": "#FF69B4"
            },
            {
                "max": 999,
                "min": 100,
                "label": "100-999",
                "color": "#F08080"
            },
            {
                "max": 99,
                "min": 10,
                "label": "10-99",
                "color": "#F78181"
            },
            {
                "max": 9,
                "min": 1,
                "label": "1-9",
                "color": "#0000FF"
            },
            {
                "max": 0,
                "min": 0,
                "label": "0",
                "color": "#B4CDCD"
            }
        ]
    }
};
        chart_4f73a20678e340a8b9fe3dec140d826c.setOption(option_4f73a20678e340a8b9fe3dec140d826c);
    </script>
</body>
</html>

4、结果
对疫情数据的爬取。Matplotlib库和Tkinter库的基本使用。通过Matplotlib实现折线图,扇形图,条形图。Tkinter库的下拉框Combobox,按钮Button,画布Canvas的使用。利用pyechart来生成html,绘制疫情地图。




5.还需改进
爬取数据和存储浪费了时间和空间资源,后期可以改进来节省。选择程序运行到窗口出现时间是3秒左右,但有时网络问题等多个因素造成窗口出现时间不定。

作者:爱喝冰美式的猫猫

物联沃分享整理
物联沃-IOTWORD物联网 » python 简单应用——网络爬虫AND数据分析

发表回复