Convert a set of CSV’s to a NetCDF file¶
[ ]:
import pandas as pd
import xarray as xr
import glob
# 1. Get a list of all CSV files in your folder, assuming they all start with 'data_'
csv_files = sorted(glob.glob("data_*.csv"))
# 2. Read and concatenate all CSVs
df_list = [pd.read_csv(f) for f in csv_files]
df_all = pd.concat(df_list, ignore_index=True)
# 3. Convert to xarray Dataset
# Assumes columns representing coordinates are: time, lat, lon
# Anything that should be considered a variable (like 'temperature', 'salinit', etc) should not be included here
ds = df_all.set_index(['time', 'lat', 'lon']).to_xarray()
# 4. Convert time to datetime if needed (you can skip this step or convert to/from whatever datetime formats you want)
ds['time'] = pd.to_datetime(ds['time'])
# 5. Write to NetCDF
ds.to_netcdf("combined_data.nc")