import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
https://powietrze.gios.gov.pl/pjp/current Chcemy symulować PM10 w Warszawie, w okresie zimowym.
df = pd.read_excel('2021_PM10_1g.xlsx', index_col=0)
df
| MzWarAlNiepo | MzWarBajkowa | MzWarChrosci | MzWarTolstoj | MzWarWokalna | |
|---|---|---|---|---|---|
| Date | |||||
| 2021-12-01 00:00:00 | 11.38 | 8.10 | 8.25 | 8.90 | 6.42 |
| 2021-12-01 01:00:00 | 14.05 | 10.39 | 10.78 | 22.33 | 8.36 |
| 2021-12-01 02:00:00 | 13.43 | 11.11 | 12.45 | 20.55 | 10.16 |
| 2021-12-01 03:00:00 | 14.24 | 10.96 | 11.46 | 19.57 | 9.71 |
| 2021-12-01 04:00:00 | 14.92 | 12.19 | 12.32 | 21.54 | 10.62 |
| ... | ... | ... | ... | ... | ... |
| 2022-02-28 19:00:00 | 43.70 | 48.00 | 58.20 | 30.00 | 29.50 |
| 2022-02-28 20:00:00 | 56.10 | 41.70 | 101.80 | 31.30 | 34.00 |
| 2022-02-28 21:00:00 | 64.10 | 58.00 | 77.80 | 47.40 | 36.90 |
| 2022-02-28 22:00:00 | 63.10 | 56.40 | 73.20 | 51.60 | 43.00 |
| 2022-02-28 23:00:00 | 56.70 | 61.80 | 63.80 | 52.10 | 52.80 |
2160 rows × 5 columns
df.index.freq = 'h'
df.index
DatetimeIndex(['2021-12-01 00:00:00', '2021-12-01 01:00:00',
'2021-12-01 02:00:00', '2021-12-01 03:00:00',
'2021-12-01 04:00:00', '2021-12-01 05:00:00',
'2021-12-01 06:00:00', '2021-12-01 07:00:00',
'2021-12-01 08:00:00', '2021-12-01 09:00:00',
...
'2022-02-28 14:00:00', '2022-02-28 15:00:00',
'2022-02-28 16:00:00', '2022-02-28 17:00:00',
'2022-02-28 18:00:00', '2022-02-28 19:00:00',
'2022-02-28 20:00:00', '2022-02-28 21:00:00',
'2022-02-28 22:00:00', '2022-02-28 23:00:00'],
dtype='datetime64[ns]', name='Date', length=2160, freq='H')
lag = 6
plt.figure(figsize=(20,10))
plt.plot(df.index[0+lag*72:72+lag*72], df['MzWarAlNiepo'][0+lag*72:72+lag*72])
plt.show()
Wartości zmieniają się w ciągu doby. Najlepszym rozwiązaniem byłoby wzięcie maksymalnej wartości zanieczyszczenia, wtedy symulujemy najgroźniejszy przypadek w ciągu dnia
df_daily = df.groupby(df.index.date).max()
df_daily
| MzWarAlNiepo | MzWarBajkowa | MzWarChrosci | MzWarTolstoj | MzWarWokalna | |
|---|---|---|---|---|---|
| 2021-12-01 | 26.42 | 23.47 | 24.65 | 35.08 | 18.23 |
| 2021-12-02 | 29.29 | 20.64 | 13.68 | 16.31 | 14.63 |
| 2021-12-03 | 34.22 | 18.49 | 18.44 | 24.60 | 14.20 |
| 2021-12-04 | 49.70 | 53.54 | 48.02 | 52.07 | 38.31 |
| 2021-12-05 | 43.01 | 43.71 | 42.53 | 58.29 | 38.78 |
| ... | ... | ... | ... | ... | ... |
| 2022-02-24 | 51.10 | 35.80 | 41.70 | 25.60 | 25.80 |
| 2022-02-25 | 90.20 | 37.10 | 33.60 | 23.80 | 31.20 |
| 2022-02-26 | 79.20 | 69.60 | 72.20 | 57.50 | 43.80 |
| 2022-02-27 | 44.20 | 88.00 | 47.80 | 44.70 | 35.80 |
| 2022-02-28 | 64.10 | 61.80 | 101.80 | 52.10 | 52.80 |
90 rows × 5 columns
plt.figure(figsize=(20,10))
cols = df.columns
for c in cols:
plt.plot(df_daily[c], label=c)
plt.legend()
plt.show()
stacje = df.columns
fig, ax = plt.subplots(3,2, figsize=(15,15))
params = [{"K":11,"loc":18,"scale":4},
{"K":7.5,"loc":19,"scale":4.2},
{"K":4,"loc":17.5,"scale":6},
{"K":3.9,"loc":13,"scale":5.5},
{"K":4,"loc":15.5,"scale":6.5}]
values = []
for idx,(col, axis) in enumerate(zip(stacje, ax.reshape(-1))):
srednia = np.mean(df_daily[col])
x_w = np.arange(min(df_daily[col])-25,max(df_daily[col])+20,0.01)
y_w = sp.stats.exponnorm.pdf(x_w,**params[idx])
values.append(*sp.stats.exponnorm.rvs(**params[idx],size=1,random_state=31))
sp.stats.exponnorm.rvs(**params[idx],size=1,random_state=31)
axis.hist(df_daily[col],density=True,bins=20)
axis.plot(x_w,y_w)
axis.set_title(col)
plt.show()
Trend w osobnym notatniku (folder Sprawdzanie_trendu). Ostatecznie chyba wartość -2 należy odjąć.
Sprawdzenie maksymalnej rozbieżności między stacjami:
df_daily.head(5)
| MzWarAlNiepo | MzWarBajkowa | MzWarChrosci | MzWarTolstoj | MzWarWokalna | |
|---|---|---|---|---|---|
| 2021-12-01 | 26.42 | 23.47 | 24.65 | 35.08 | 18.23 |
| 2021-12-02 | 29.29 | 20.64 | 13.68 | 16.31 | 14.63 |
| 2021-12-03 | 34.22 | 18.49 | 18.44 | 24.60 | 14.20 |
| 2021-12-04 | 49.70 | 53.54 | 48.02 | 52.07 | 38.31 |
| 2021-12-05 | 43.01 | 43.71 | 42.53 | 58.29 | 38.78 |
df_daily.max(axis=1) - df_daily.min(axis=1)
2021-12-01 16.85
2021-12-02 15.61
2021-12-03 20.02
2021-12-04 15.23
2021-12-05 19.51
...
2022-02-24 25.50
2022-02-25 66.40
2022-02-26 35.40
2022-02-27 52.20
2022-02-28 49.70
Length: 90, dtype: float64
np.max(df_daily.max(axis=1) - df_daily.min(axis=1))
175.26
np.mean(df_daily.max(axis=1) - df_daily.min(axis=1))
32.16477777777778
values = np.array(values)
# trend:
values -= 2
values
array([27.34527877, 23.95941152, 18.36611476, 13.44195062, 16.60495765])
values[values<0] = 0
values[values>200] = 200
# jeżeli ta różnica między min a max jest większa od 50,
# to zmniejszaj największą wartość
while (np.max(values) - np.min(values)) > 50:
values[values.index(np.max(values))] -= 5
values
array([27.34527877, 23.95941152, 18.36611476, 13.44195062, 16.60495765])
dane = pd.read_csv('DATA/dane.tsv', sep='\t', header=None)
dane
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 21.004724 | 52.219298 | 0 | 29.345279 |
| 1 | 21.176233 | 52.188474 | 0 | 25.959412 |
| 2 | 20.906073 | 52.207742 | 0 | 20.366115 |
| 3 | 20.933018 | 52.285073 | 0 | 15.441951 |
| 4 | 21.033819 | 52.160772 | 0 | 18.604958 |
dane.iloc[:,3] = values
dane
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | 21.004724 | 52.219298 | 0 | 29.345279 |
| 1 | 21.176233 | 52.188474 | 0 | 25.959412 |
| 2 | 20.906073 | 52.207742 | 0 | 20.366115 |
| 3 | 20.933018 | 52.285073 | 0 | 15.441951 |
| 4 | 21.033819 | 52.160772 | 0 | 18.604958 |
dane.to_csv('DATA/dane.tsv', sep='\t', index=False, header=False)
"""
Created on Fri Jan 15 09:43:50 2021
@author: dariograna
Reference: Grana and de Figueiredo, 2021, SeReMpy
"""
from scipy.io import loadmat
import scipy.spatial.distance
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from context import SeReMpy
from SeReMpy.Geostats import *
xr = 21.176233 - 20.906073
yr = 52.285073 - 52.160772
nx = 100
ny = 100
x = np.linspace(20.9, 21.2, nx)
y = np.linspace(52.1, 52.3, ny)
X, Y = np.meshgrid(x,y)
d = np.loadtxt('DATA/dane.tsv')
dx = d[:,0].reshape(-1, 1)
dy = d[:,1].reshape(-1, 1)
dz = d[:,2].reshape(-1, 1)
dt = d[:,3].reshape(-1, 1)
# # available data (100 measurements)
dcoords = np.hstack([dx,dy])
nd = dcoords.shape[0]
# # grid of coordinates of the location to be estimated
xcoords = np.transpose(np.vstack([X.reshape(-1), Y.reshape(-1)]))
n = xcoords.shape[0]
print('Dane wczytane.')
Dane wczytane.
now = datetime.now()
# parameters random variable
tmean = 10
tvar = 5
l = 3
krigtype = 'exp'
# kriging
xsk = np.zeros((n, 1))
for i in range(n):
# simple kiging
xsk[i,0] = SimpleKriging(xcoords[i,:], dcoords, dt, tmean, tvar, l, krigtype)[0]
xsk = np.reshape(xsk, X.shape)
print(f'Kriging skończony.')
# Sequential Gaussian Simulation
krig = 1
nsim = 5
sgsim = np.zeros((X.shape[0], X.shape[1], nsim))
print(sgsim.shape)
for i in range(nsim):
print(xcoords.shape)
print(dcoords.shape)
print(dt.shape)
sim = SeqGaussianSimulation(xcoords, dcoords, dt, tmean, tvar, l, krigtype, krig)
sgsim[:,:,i] = np.reshape(sim, (X.shape[0], X.shape[1]))
print(f'SGS skończone.')
Kriging skończony. (100, 100, 5) (10000, 2) (5, 2) (5, 1) (10000, 2) (5, 2) (5, 1) (10000, 2) (5, 2) (5, 1) (10000, 2) (5, 2) (5, 1) (10000, 2) (5, 2) (5, 1) SGS skończone.
# plot results
# # plot
plt.figure(4, figsize=(14,12))
plt.suptitle('Kriging and SGS realization.', size=24)
plt.subplot(221)
plt.pcolor(X,Y, xsk, cmap='summer')
plt.xlabel('X')
plt.ylabel('Y')
cbar = plt.colorbar()
cbar.set_label('PM10', rotation=270)
plt.title(f'Simple Kriging.')
plt.scatter(dx,dy, s=2, color='red')
plt.subplot(223)
plt.pcolor(X,Y, sgsim[:,:,0], cmap='summer')
plt.xlabel('X')
plt.ylabel('Y')
cbar = plt.colorbar()
cbar.set_label('PM10', rotation=270)
plt.title(f'SGS Realization.')
plt.subplot(222)
plt.hist(xsk.flatten())#, bins=20, range=(3,10))
minx = np.floor(np.min(xsk.flatten()))
maxx = np.ceil(np.max(xsk.flatten()))
plt.xlim([minx,maxx])
plt.ylim([0,3500])
plt.title('Simple kriging histogram')
plt.subplot(224)
plt.hist(sgsim[:,:,0].flatten())#, bins=20, range=(3,10))
plt.xlim([minx,maxx])
plt.ylim([0,3500])
plt.title('SGS histogram')
plt.tight_layout()
s = f'wyniki/w_{now.strftime("%Y_%m_%d-%H_%M")}_{krigtype}_mean{tmean:.2f}_tvar{tvar:.2f}.jpg'
plt.savefig(s, dpi=200)