Récupération des stations quotidiennes depuis les données Météo France
import json
import urllib.request
from glob import glob
import pandas as pd
import geopandas
dataset_id = '6569b51ae64326786e4e8e1a'
url = f'https://www.data.gouv.fr/api/1/datasets/{dataset_id}/'
with urllib.request.urlopen(url) as resp:
json_content = json.load(resp)
urls = [resource.get('url') for resource in json_content.get('resources') if 'RR-T-Vent' in resource.get('url') and resource.get('type') != 'documentation']
mydict = {}
for url, dep in [[url, url.split('/')[-1].split('_')[1]] for url in urls]:
if dep not in mydict:
mydict[dep] = []
mydict[dep].append(url)
for dep,values in mydict.items():
frames = [pd.read_csv(url, compression='gzip', sep=';', quotechar='"') for url in values]
df = pd.concat(frames)
stations = df[['NUM_POSTE', 'NOM_USUEL', 'LAT', 'LON', 'ALTI', 'AAAAMMJJ']]
stations['AAAAMMJJ'] = pd.to_datetime(stations['AAAAMMJJ'], format = '%Y%m%d')
stations['MIN_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('min')
stations['MAX_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('max')
stations.drop(columns=['AAAAMMJJ'], inplace=True)
stations.reset_index().drop_duplicates('NUM_POSTE').drop(columns=['index']).to_csv(f'stations-RR-T-Vent-dep-{dep}.csv', index=False)
files_stations_rr_t_vent = glob('stations-RR-T-Vent-dep-*.csv')
frames_stations_rr_t_vent = [pd.read_csv(input_file) for input_file in files_stations_rr_t_vent]
df_stations_rr_t_vent = pd.concat(frames_stations_rr_t_vent)
gdf_stations_rr_t_vent = geopandas.GeoDataFrame(
df_stations_rr_t_vent, geometry=geopandas.points_from_xy(df_stations_rr_t_vent.LON, df_stations_rr_t_vent.LAT), crs="EPSG:4326"
)
gdf_stations_rr_t_vent.to_file('stations_rr_t_vent.geojson', driver='GeoJSON')