使用Python对CMIP6数据进行偏差校正Delta处理

降水要素

import xarray as xr
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymannkendall
from scipy.stats import pearsonr
from sklearn.metrics import r2_score,mean_squared_error

# 思路
# bias = obs / gcm
# gcm_downscale = gcm * bias

# 读取数据
gcm_pre = xr.open_dataset(r"G:\CMIP6\precip\ew_six_cmip_precip.nc")
obs_pre = xr.open_dataset(r"G:\Data\tp_processed\del_runnian_ERA5_tp_canjian_1985_2021_daily.nc")
# 计算多年月平均降水
obs_pre_monthlymean = obs_pre.tp.resample(time="1M").sum()
gcm_pre_monthlymean = gcm_pre.precip.resample(time="1M").sum()
obs_pre_multimonthlymean = obs_pre_monthlymean.groupby(obs_pre_monthlymean.time.dt.month).mean()
gcm_pre_multimonthlymean = gcm_pre_monthlymean.groupby(gcm_pre_monthlymean.time.dt.month).mean()

obs_nc = xr.Dataset({"precip":
                        (('time','lat','lon'),obs_pre_multimonthlymean.values.astype("float32"))},
                        coords={"time":gcm_pre_multimonthlymean.month.values,
                                "lat":gcm_pre_multimonthlymean.lat.values,
                                "lon":gcm_pre_multimonthlymean.lon.values})

gcm_nc = xr.Dataset({"precip":
                        (('time','lat','lon'),gcm_pre_multimonthlymean.values.astype("float32"))},
                        coords={"time":gcm_pre_multimonthlymean.month.values,
                                "lat":gcm_pre_multimonthlymean.lat.values,
                                "lon":gcm_pre_multimonthlymean.lon.values})

# 计算偏差
delta_pre = obs_nc.precip/gcm_nc.precip
delta_pre.to_netcdf(r"G:\CMIP6\precip\delta_hist_precip.nc")

# 降尺度
result = []
for i in range(1,13)[:]:
    itmp = gcm_pre.sel(time=gcm_pre.time.dt.month==i)*delta_pre.sel(time=i)
    itmp = itmp.persist()
    itmp2 = xr.Dataset({"precip":
                        (('time','lat','lon'),itmp.precip.values.astype("float32"))},
                        coords={"time":itmp.time.values,
                                "lat":itmp.lat.values,
                                "lon":itmp.lon.values})
    print(i)
    itmp2.to_netcdf(str(i).zfill(2)+".nc")
    result.append(itmp2)

# 合并逐月降尺度结果
pre_gcm_downscaled = xr.merge(result)
# pre_gcm_downscaled.to_netcdf(r"D:\Data2022\ERA5\downscaled\downscale\downscaled_precip.nc")

# 降尺度评估
# 日评估
pre_gcm_downscaled = xr.open_dataset(r"D:\Data2022\ERA5\downscaled\downscale\downscaled_precip.nc")
obs_pre = xr.open_dataset(r"D:\Data2022\ERA5\downscaled\data\era5_pre_v1.nc")
obs_data = obs_pre.tp.values.ravel() # 构造样本序列
gcm_data = gcm_pre.precip.values.ravel()
downscale_data = pre_gcm_downscaled.precip.values.ravel()

obs_data = np.nan_to_num(obs_data) # 构造样本序列
gcm_data = np.nan_to_num(gcm_data)
downscale_data = np.nan_to_num(downscale_data)

r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)
r0,p0 = pearsonr(obs_data,gcm_data)
rmse0 = np.sqrt(mean_squared_error(obs_data,gcm_data))
print(r0,rmse0)

# 月评估
obs_month = obs_pre.precip.resample(time='1M').sum()
downscale_month = pre_gcm_downscaled.pre_gcm.resample(time='1M').sum()
obs_data = obs_month.values.ravel() # 构造样本序列
downscale_data = downscale_month.values.ravel()
r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)

# 季节评估
# obs_season = obs_tmax.tmax.groupby('time.season')
# obs_season_JJA = obs_season['SON']
# obs_summer = obs_season_JJA.resample(time='1AS').mean()
# downscale_season = tmax_gcm_downscaled.tmax_gcm.groupby('time.season')
# downscale_season_JJA = downscale_season['SON']
# downscale_summer = downscale_season_JJA.resample(time='1AS').sum()
# obs_data = obs_summer.values.ravel() # 构造样本序列
# downscale_data = downscale_summer.values.ravel()
# r,p = pearsonr(obs_data,downscale_data)
# rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))

# 年评估
obs_year = obs_pre.precip.resample(time='1AS').sum()
downscale_year = pre_gcm_downscaled.pre_gcm.resample(time='1AS').sum()
obs_data = obs_year.values.ravel() # 构造样本序列
downscale_data = downscale_year.values.ravel()
r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)

评估时间:30年 1985-2014年

评估结果:日尺度CC从0.2041升为0.241384;RMSE从3.946625升为3.9088051

气温要素

import xarray as xr
import os
import numpy as np
import pandas as pd
import pymannkendall
import regionmask
import geopandas as gpd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn.metrics import r2_score,mean_squared_error

# 思路
# bias = gcm - obs
# gcm_downscale = gcm - bias

# 读取数据
gcm_tmax = xr.open_dataset(r"G:\CMIP6\tas\ew_six_cmip_tas.nc")
gcm_tmax = gcm_tmax.sortby(gcm_tmax.time)
obs_tmax = xr.open_dataset(r"G:\Data\2m_tem_processed\del_runnian_ERA5_tavg_canjian_1985_2021_daily.nc")
# 计算多年月平均气温
obs_tmax_monthlymean = obs_tmax.t2m.resample(time="1M").mean()
gcm_tmax_monthlymean = gcm_tmax.tas.resample(time="1M").mean()
obs_tmax_multimonthlymean = obs_tmax_monthlymean.groupby(obs_tmax_monthlymean.time.dt.month).mean()
gcm_tmax_multimonthlymean = gcm_tmax_monthlymean.groupby(gcm_tmax_monthlymean.time.dt.month).mean()



obs_nc = xr.Dataset({"tas":
                        (('time','lat','lon'),obs_tmax_multimonthlymean.values.astype("float32"))},
                        coords={"time":gcm_tmax_multimonthlymean.month.values,
                                "lat":gcm_tmax_multimonthlymean.lat.values,
                                "lon":gcm_tmax_multimonthlymean.lon.values})

gcm_nc = xr.Dataset({"tas":
                        (('time','lat','lon'),gcm_tmax_multimonthlymean.values.astype("float32"))},
                        coords={"time":gcm_tmax_multimonthlymean.month.values,
                                "lat":gcm_tmax_multimonthlymean.lat.values,
                                "lon":gcm_tmax_multimonthlymean.lon.values})

# 计算偏差
delta_tmax = gcm_nc.tas - obs_nc.tas
delta_tmax.to_netcdf(r"G:\CMIP6\tas\delta_hist_tas.nc")

# 降尺度
result = []
for i in range(1,13)[:]:
    itmp = gcm_tmax.sel(time=gcm_tmax.time.dt.month==i)-delta_tmax.sel(time=i)
    itmp = itmp.persist()
    itmp2 = xr.Dataset({"tmax_gcm":
                        (('time','lat','lon'),itmp.tas.values.astype("float32"))},
                        coords={"time":itmp.time.values,
                                "lat":itmp.lat.values,
                                "lon":itmp.lon.values})
    print(i)
    itmp2.to_netcdf(str(i).zfill(2)+".nc")
    result.append(itmp2)

# 合并逐月降尺度结果
tmax_gcm_downscaled = xr.merge(result)
# tmax_gcm_downscaled.to_netcdf(r"D:\Data2022\ERA5\downscaled\downscale\downscaled_tmax_v1.nc")

# # 查看插值到某点上的订正效果
# downscaled_point = tmax_gcm_downscaled.interp(lon=[115],lat=[48]).tmax_gcm
# obs = obs_tmax.interp(lon=[115],lat=[48]).tmax
# gcm = gcm_tmax.interp(lon=[115],lat=[48]).tmax
# plt.plot(downscaled_point.time.values,downscaled_point.squeeze(),'b-')
# plt.plot(obs.time.values,obs.squeeze(),'r-')
# plt.plot(gcm.time.values,gcm.squeeze(),'g-')
# plt.show()
# # downscaled_point.plot()
# # obs_tmax.tmax.plot(c="r")
# # gcm_tmax.tmax.plot(c="g")

# 裁剪
neimenggu_shp = r"G:\Data\基础地理数据\china\内蒙古自治区\neimenggu_xian.shp" #输入内蒙古矢量文件
mask_gdf = gpd.read_file(neimenggu_shp)
print(mask_gdf)

neimenggu_mask = regionmask.mask_geopandas(mask_gdf, gcm_tmax.lon, gcm_tmax.lat) # 创造掩膜
gcm_data_neimenggu = gcm_tmax.where(~np.isnan(neimenggu_mask)) # 裁剪后
neimenggu_mask0 = regionmask.mask_geopandas(mask_gdf, obs_tmax.longitude, obs_tmax.latitude) # 创造掩膜
obs_data_neimenggu = obs_tmax.where(~np.isnan(neimenggu_mask0)) # 裁剪后
neimenggu_mask1 = regionmask.mask_geopandas(mask_gdf, tmax_gcm_downscaled.lon, tmax_gcm_downscaled.lat) # 创造掩膜
gcmdownscaled_data_neimenggu = tmax_gcm_downscaled.where(~np.isnan(neimenggu_mask1)) # 裁剪后


# 降尺度评估
# 日评估
# tmax_gcm_downscaled = xr.open_dataset(r"D:\Data2022\ERA5\downscaled\downscale\downscaled_tmax_v1.nc")
# obs_tmax = xr.open_dataset(r"D:\Data2022\ERA5\downscaled\data\era5_tmax_v1.nc")
obs_data = obs_data_neimenggu.t2m.values.ravel() # 构造样本序列
obs_data = obs_data[~np.isnan(obs_data)]
# obs_data = np.nan_to_num(obs_data)

gcm_data = gcm_data_neimenggu.tas.values.ravel()
gcm_data = gcm_data[~np.isnan(gcm_data)]
# gcm_data = np.nan_to_num(gcm_data)

downscale_data = gcmdownscaled_data_neimenggu.tmax_gcm.values.ravel()
downscale_data = downscale_data[~np.isnan(downscale_data)]
# downscale_data = np.nan_to_num(downscale_data)

r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)

r0,p0 = pearsonr(obs_data,gcm_data)
rmse0 = np.sqrt(mean_squared_error(obs_data,gcm_data))
print(r0,rmse0)

# 月评估
obs_month = obs_tmax.tmax.resample(time='1M').mean()
downscale_month = tmax_gcm_downscaled.tmax_gcm.resample(time='1M').mean()
obs_data = obs_month.values.ravel() # 构造样本序列
downscale_data = downscale_month.values.ravel()
r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)

# 季节评估
# obs_season = obs_tmax.tmax.groupby('time.season')
# obs_season_JJA = obs_season['SON']
# obs_summer = obs_season_JJA.resample(time='1AS').mean()
# downscale_season = tmax_gcm_downscaled.tmax_gcm.groupby('time.season')
# downscale_season_JJA = downscale_season['SON']
# downscale_summer = downscale_season_JJA.resample(time='1AS').sum()
# obs_data = obs_summer.values.ravel() # 构造样本序列
# downscale_data = downscale_summer.values.ravel()
# r,p = pearsonr(obs_data,downscale_data)
# rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))

# 年评估
obs_year = obs_tmax.tmax.resample(time='1AS').mean()
downscale_year = tmax_gcm_downscaled.tmax_gcm.resample(time='1AS').mean()
obs_data = obs_year.values.ravel() # 构造样本序列
downscale_data = downscale_year.values.ravel()
r,p = pearsonr(obs_data,downscale_data)
rmse = np.sqrt(mean_squared_error(obs_data,downscale_data))
print(r,rmse)

评估时间:30年 1985-2014年

评估结果:日尺度CC从0.94952515升为0.95353693;RMSE从4.57162915升为4.317777

物联沃分享整理
物联沃-IOTWORD物联网 » 使用Python对CMIP6数据进行偏差校正Delta处理

发表评论