#!/usr/bin/env python # coding: utf-8 # # wetterdienst - A simple example # # pip install wetterdienst # ## Import modules necessary for general functioning # In[1]: import warnings warnings.filterwarnings("ignore") from wetterdienst import metadata_for_climate_observations, \ collect_climate_observations_data, get_nearby_stations, \ discover_climate_observations from wetterdienst import PeriodType, TimeResolution, Parameter get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import cm # Which parameters are available? # In[2]: # all print("All available combinations") print( discover_climate_observations() ) # selection print("Selection of daily historical data") print( discover_climate_observations( time_resolution=TimeResolution.DAILY, period_type=PeriodType.HISTORICAL ) ) # ## 1. First check the metadata to inform yourself of available stations # (here we pick historical daily precipitation - hdp) # In[3]: metadata_hdp = metadata_for_climate_observations( Parameter.PRECIPITATION_MORE, TimeResolution.DAILY, PeriodType.HISTORICAL) print("Number of stations with available data: ", metadata_hdp["HAS_FILE"].sum()) print("Some of the stations:") metadata_hdp.head() # The metadata includes an id, the range of the measurements, the position # (including height) as well as place and state of it and if it has a file. With the # following plot we want to show a map of those stations: # In[4]: cmap = cm.get_cmap('viridis') bounds = metadata_hdp.STATION_HEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values norm = mpl.colors.BoundaryNorm(bounds, cmap.N) fig, ax = plt.subplots(figsize=(10, 10)) plot = metadata_hdp.plot.scatter( x="LON", y="LAT", c="STATION_HEIGHT", cmap=cmap, norm=norm, ax=ax) plot.set_title("Map of daily precipitation stations in Germany\n" "Color refers to height of station") plt.show() # ## 2. The usual way of retrieving data # Usually there are three steps to follow: # - select indexed files based on # - its station_id # - "1048" for Dresden, Germany # - its parameter # - "kl" for climate # - its time_resolution # - "daily" for daily data # - its period_type # - "historical" for data up to the end of the last year # - download the resulting list of files # - parse it into pandas.DataFrames # # We have summarized those steps into one: # - collect_dwd_data # # Let's try it out for the above selection: # In[5]: print("Receiving historical daily climate data for Dresden-Klotzsche (1048)") station_data = collect_climate_observations_data( [1048], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL, tidy_data=False ) station_data # See that DATE is already parsed, so we can easily get some nice graphs with matplotlib, # which we will do in the next part. # ## 3. Let's create some plots # First to handle the data easier, we want it to be transformed from tabular to column data, # which means instead of having the data in several columns with an additional date column, # we want columns, where a set of all columns defines the date of measured data, element # and the exact value. We will therefor use the tidy_data option. # In[6]: print("Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.") station_data = collect_climate_observations_data( [1048], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL, tidy_data=True ) station_data.head() # We can create a time series/histogram of some elements to compare the distribution of # the values, here for example precipitation and mean temperature: # In[7]: elements_to_plot = ["RSK", "TMK"] station_data_filtered = station_data[ station_data["ELEMENT"].isin(elements_to_plot) ].sort_values(["STATION_ID", "ELEMENT", "DATE"]) station_data_grouped = station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]] # In[8]: cmap = plt.get_cmap('viridis') colors = ("blue", "orange") fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10)) for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors): v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color) v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2, color=color) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.suptitle("Precipitation/Mean temperature time series of Dresden, Germany") plt.show() # We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2. # ## 4. Create yearly values # In[9]: for parameter, group in station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]]: if parameter == "RSK": print(group.groupby(group["DATE"].dt.year)["VALUE"].sum()) else: print(group.groupby(group["DATE"].dt.year)["VALUE"].mean()) # ## 5. Find a station # # We may want to find a station near to a certain area. Therefor simply call get_nearest_station # In[10]: get_nearby_stations( 51.05089, 13.73832, "2000-01-01", "2010-01-01", Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL, num_stations_nearby=5 )