#!/usr/bin/env python
# coding: utf-8

# # wetterdienst - A simple example
# 
# pip install wetterdienst

# ## Import modules necessary for general functioning

# In[1]:


import warnings
warnings.filterwarnings("ignore")

from wetterdienst import metadata_for_climate_observations, \
    collect_climate_observations_data, get_nearby_stations, \
    discover_climate_observations
from wetterdienst import PeriodType, TimeResolution, Parameter

get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm


# Which parameters are available?

# In[2]:


# all
print("All available combinations")
print(
    discover_climate_observations()
)
# selection
print("Selection of daily historical data")
print(
    discover_climate_observations(
        time_resolution=TimeResolution.DAILY,
        period_type=PeriodType.HISTORICAL
    )
)


# ## 1. First check the metadata to inform yourself of available stations
# (here we pick historical daily precipitation - hdp)

# In[3]:


metadata_hdp = metadata_for_climate_observations(
    Parameter.PRECIPITATION_MORE, TimeResolution.DAILY, PeriodType.HISTORICAL)
print("Number of stations with available data: ", metadata_hdp["HAS_FILE"].sum())
print("Some of the stations:")
metadata_hdp.head()


# The metadata includes an id, the range of the measurements, the position
# (including height) as well as place and state of it and if it has a file. With the
# following plot we want to show a map of those stations:

# In[4]:


cmap = cm.get_cmap('viridis')
bounds = metadata_hdp.STATION_HEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

fig, ax = plt.subplots(figsize=(10, 10))
plot = metadata_hdp.plot.scatter(
    x="LON", y="LAT", c="STATION_HEIGHT", cmap=cmap, norm=norm, ax=ax)
plot.set_title("Map of daily precipitation stations in Germany\n"
               "Color refers to height of station")
plt.show()


# ## 2. The usual way of retrieving data

# Usually there are three steps to follow:
# - select indexed files based on
#     - its station_id
#         - "1048" for Dresden, Germany
#     - its parameter
#         - "kl" for climate
#     - its time_resolution
#         - "daily" for daily data
#     - its period_type
#         - "historical" for data up to the end of the last year
# - download the resulting list of files
# - parse it into pandas.DataFrames
# 
# We have summarized those steps into one:
# - collect_dwd_data
# 
# Let's try it out for the above selection:

# In[5]:


print("Receiving historical daily climate data for Dresden-Klotzsche (1048)")
station_data = collect_climate_observations_data(
    [1048],
    Parameter.CLIMATE_SUMMARY,
    TimeResolution.DAILY,
    PeriodType.HISTORICAL,
    tidy_data=False
)

station_data


# See that DATE is already parsed, so we can easily get some nice graphs with matplotlib,
# which we will do in the next part.

# ## 3. Let's create some plots

# First to handle the data easier, we want it to be transformed from tabular to column data,
# which means instead of having the data in several columns with an additional date column,
# we want columns, where a set of all columns defines the date of measured data, element
# and the exact value. We will therefor use the tidy_data option.

# In[6]:


print("Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.")
station_data = collect_climate_observations_data(
    [1048],
    Parameter.CLIMATE_SUMMARY,
    TimeResolution.DAILY,
    PeriodType.HISTORICAL,
    tidy_data=True
)

station_data.head()


# We can create a time series/histogram of some elements to compare the distribution of
# the values, here for example precipitation and mean temperature:

# In[7]:


elements_to_plot = ["RSK", "TMK"]
station_data_filtered = station_data[
    station_data["ELEMENT"].isin(elements_to_plot)
].sort_values(["STATION_ID", "ELEMENT", "DATE"])

station_data_grouped = station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]]


# In[8]:


cmap = plt.get_cmap('viridis')
colors = ("blue", "orange")

fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10))

for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors):
    v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color)
    v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2, color=color)
    
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle("Precipitation/Mean temperature time series of Dresden, Germany")

plt.show()


# We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2.

# ## 4. Create yearly values

# In[9]:


for parameter, group in station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]]:
    if parameter == "RSK":
        print(group.groupby(group["DATE"].dt.year)["VALUE"].sum())
    else:
        print(group.groupby(group["DATE"].dt.year)["VALUE"].mean())


# ## 5. Find a station
# 
# We may want to find a station near to a certain area. Therefor simply call get_nearest_station

# In[10]:


get_nearby_stations(
    51.05089,
    13.73832,
    "2000-01-01",
    "2010-01-01",
    Parameter.CLIMATE_SUMMARY,
    TimeResolution.DAILY,
    PeriodType.HISTORICAL,
    num_stations_nearby=5
)