Search
Tuulivarenka project
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

Opening Zeppelin hourly data from 2010

filenam1 = '/home/2daa7756-2d5725-2d4dfb-2db0ff-2d5e0a6858a009/shared-ns1000k/inputs//Aerosol_sizedist_obs/Zeppelin_2010_hourly.csv'
filenam2 = '/home/2daa7756-2d5725-2d4dfb-2db0ff-2d5e0a6858a009/shared-ns1000k/inputs//Aerosol_sizedist_obs/Zeppelin_2011_hourly.csv'
flist=[filenam1, filenam2]
ldf = []
for f in flist:
    ldf.append(pd.read_csv(f, parse_dates=[['0','0.1','0.2','0.3','0.4']],date_parser = mydateparser)) 
data = pd.concat(ldf, axis=0)
mydateparser = lambda x: pd.datetime.strptime(x, "%Y %m %d %H %M")
data = pd.read_csv(filenam, parse_dates=[['0','0.1','0.2','0.3','0.4']],date_parser = mydateparser)
#data.head()
#data.info()
data.rename(columns={'0_0.1_0.2_0.3_0.4':'date'}, inplace = True)
data = data.set_index('date')
 #remove last column
 data.drop(labels='0.6', axis=1, inplace=True)   
    
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-124-dd6d76ed93db> in <module>
      1 #remove last column
----> 2 data.drop(labels='0.6', axis=1, inplace=True)
      3 

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4100             level=level,
   4101             inplace=inplace,
-> 4102             errors=errors,
   4103         )
   4104 

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   3912         for axis, labels in axes.items():
   3913             if labels is not None:
-> 3914                 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
   3915 
   3916         if inplace:

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
   3944                 new_axis = axis.drop(labels, level=level, errors=errors)
   3945             else:
-> 3946                 new_axis = axis.drop(labels, errors=errors)
   3947             result = self.reindex(**{axis_name: new_axis})
   3948 

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   5338         if mask.any():
   5339             if errors != "ignore":
-> 5340                 raise KeyError("{} not found in axis".format(labels[mask]))
   5341             indexer = indexer[~mask]
   5342         return self.delete(indexer)

KeyError: "['0.6'] not found in axis"
fig = plt.figure(1, figsize=[20,5])

#set projection for plotting

ax = plt.subplot(1,1,1)
sns.heatmap(data.T.iloc[::-1], vmin=0, vmax=200, cmap='jet')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2be952e10>
data.iloc[:,1]
date
2010-01-01 00:00:00      9.2141
2010-01-01 01:00:00      7.4015
2010-01-01 02:00:00      8.2293
2010-01-01 03:00:00      6.0217
2010-01-01 04:00:00      9.8351
                         ...   
2010-12-31 19:00:00   -999.0000
2010-12-31 20:00:00   -999.0000
2010-12-31 21:00:00   -999.0000
2010-12-31 22:00:00   -999.0000
2010-12-31 23:00:00     15.7530
Name: 20, Length: 8760, dtype: float64
#Vaihdetaan oikeesta datasta kaikki -999 arvot NaN
data = data.replace(-999,np.nan)
data
0.5 20 22.44 25.179 28.251 31.698 35.566 39.905 44.774 50.238 ... 178.25 200 224.4 251.79 282.51 316.98 355.66 399.05 447.74 502.38
date
2010-01-01 00:00:00 127.010 9.2141 13.3240 18.261 23.245 30.351 39.4690 47.835 47.8340 42.7440 ... 112.370 115.400 168.740 298.220 358.390 247.520 125.5700 54.1360 22.3210 9.6607
2010-01-01 01:00:00 110.150 7.4015 12.8350 18.326 23.133 26.412 31.9880 38.002 38.1110 38.6730 ... 107.270 101.910 140.920 244.340 294.450 218.560 115.0300 49.6960 20.9440 9.4524
2010-01-01 02:00:00 98.864 8.2293 10.1860 14.526 19.346 23.288 30.8280 32.256 33.3850 34.3440 ... 90.091 88.761 121.910 207.210 267.870 196.080 101.8700 45.4790 20.8550 9.9242
2010-01-01 03:00:00 103.960 6.0217 10.0140 14.795 21.470 26.444 28.6210 31.333 31.4070 35.1930 ... 99.499 93.949 133.170 231.840 290.350 209.660 110.8000 50.5580 23.2000 11.1620
2010-01-01 04:00:00 109.060 9.8351 12.5260 15.748 19.991 24.453 30.3280 33.429 36.5330 36.4650 ... 102.890 106.060 142.540 243.290 307.780 215.960 115.7900 54.9650 24.6220 12.0210
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2011-12-31 19:00:00 33.237 4.5237 8.2171 12.178 13.667 15.023 15.4340 10.998 9.2830 8.3625 ... 66.587 74.784 73.260 58.339 35.515 19.784 12.6390 5.4085 3.2198 4.0654
2011-12-31 20:00:00 36.031 7.5069 11.4000 15.513 16.042 16.272 15.7030 12.395 11.8270 12.5400 ... 65.547 67.752 62.612 48.068 31.817 18.368 8.8337 7.7784 6.9098 5.9238
2011-12-31 21:00:00 38.550 4.4545 9.5881 15.145 18.117 17.796 13.6140 11.582 9.2801 8.4581 ... 81.728 91.344 84.708 53.480 37.032 22.753 10.9250 9.4918 7.4926 5.1997
2011-12-31 22:00:00 38.347 4.6390 8.8948 13.311 12.494 11.177 9.7762 11.722 10.7130 9.8151 ... 72.779 84.278 82.020 57.439 37.294 20.893 9.2273 6.8520 7.2738 8.6812
2011-12-31 23:00:00 46.968 11.0710 12.0850 13.415 18.010 20.073 18.3820 13.677 13.1990 14.4220 ... 91.955 91.564 84.006 65.885 39.213 21.230 13.9460 10.3320 8.6885 8.1243

14702 rows × 30 columns

#valitaan datasta pelkästään tietyt kolumnit (20-50nm) ja summataan ne yhteen
small_particle_data = data.iloc[:,1:9].sum(axis=1)
# take the mean for every row for indexes between 1-9 so (20-50 nm)
small_particle_data_mean = data.iloc[:,1:9].mean(axis=1)
#plot the data
fig = plt.figure()
ax = fig.add_subplot(2, 1, 1)

line, = ax.plot(small_particle_data_mean, color='blue', lw=2)

# change the y-axes to log
#ax.set_yscale('log')
small_particle_data_mean
date
2010-01-01 00:00:00    28.691637
2010-01-01 01:00:00    24.526063
2010-01-01 02:00:00    21.505537
2010-01-01 03:00:00    21.263212
2010-01-01 04:00:00    22.855387
                         ...    
2010-12-31 19:00:00          NaN
2010-12-31 20:00:00          NaN
2010-12-31 21:00:00          NaN
2010-12-31 22:00:00          NaN
2010-12-31 23:00:00    33.761125
Length: 8760, dtype: float64
# lets take mean for every month and plot them together
small_particle_data_mean.resample('M').mean().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2c2efe748>
_se = small_particle_data_mean.resample('M').mean()
Y = 'y'
M = 'm'
var = 'spdm'
_df = pd.DataFrame(_se,columns=[var])
_df[M] = _df.index.month
_df[Y] = _df.index.year

_df= _df.set_index(M)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-202-bf7276411ff0> in <module>
----> 1 _df = pd.DataFrame(_se,columns=[var])
      2 _df[M] = _df.index.month
      3 _df[Y] = _df.index.year
      4 
      5 _df= _df.set_index(M)

NameError: name 'var' is not defined
_df.groupby('y').plot(y='spdm',subplots=True)
y
2010    [AxesSubplot(0.125,0.2;0.775x0.68)]
2011    [AxesSubplot(0.125,0.2;0.775x0.68)]
dtype: object
_df.plot(y='spdm',color='y')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21ef6c6a0>
_df.plot(y='spdm',x='m')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21f46e080>
ax = plt.axes()
_df[_df['y']==2010].plot(x='m',y='spdm',ax=ax, label ='2010' )
_df[_df['y']==2011].plot(x='m',y='spdm',ax=ax, label ='2011')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21f0b7160>
_df.reset_index()
m spdm y
0 1 34.447331 2010
1 2 65.437523 2010
2 3 56.889464 2010
3 4 158.139479 2010
4 5 557.717221 2010
5 6 564.419334 2010
6 7 457.328173 2010
7 8 297.233322 2010
8 9 65.094502 2010
9 10 52.024606 2010
10 11 23.716328 2010
11 12 33.761125 2010
12 1 NaN 2011
13 2 NaN 2011
14 3 NaN 2011
15 4 245.573784 2011
16 5 170.079196 2011
17 6 546.593581 2011
18 7 738.481650 2011
19 8 351.219401 2011
20 9 241.201983 2011
21 10 26.122856 2011
22 11 21.797788 2011
23 12 43.021865 2011
_df1 = _df.reset_index()
_df2=_df1.set_index(['m','y']).unstack('y')
_gr.plot(y='spdm',subplots=True)
y
2010    [AxesSubplot(0.125,0.2;0.775x0.68)]
2011    [AxesSubplot(0.125,0.2;0.775x0.68)]
dtype: object