Search
Search CMIP6 historical data
import xarray as xr
import os,glob
import pandas as pd
from pathlib import Path
FILES = 'FILE_PATH'
MODEL = 'MODEL'
VAR = 'VAR_NAME'
NAME  = 'filename'
FRQ   = 'FREQUENCY'
RIPF  = 'RIPF'
RR   = 'REALIZATION'
II    = 'INDEX'
PP    = 'PHYSICS'
FF    = 'FORCING'
LABEL = 'LABEL'
ID    = 'ID'
TS    = 'TIME START'
TE    = 'TIME END'
from os.path import expanduser
def search_cmip6_hist(
        wildcard:str = '*',
        model:str = '*',
        label:str = '*',

) -> pd.DataFrame:
    '''
    searchs the historical cmip6 folder at nird and returns a dataframe
    with the results

    Parameters
    ----------
    wildcard
        pattern for the file name
    model
        pattern or name for the model. default is *
    label
        pattern of name for the label: forcin, index, realization, etc

    Returns
    -------
    df: pd.DataFrame
        dataframe with the results from the search

    Example
    -------
    >>> search_cmip6_hist(wildcard='tas*')

    '''
    home_path = expanduser("~")
    shared_path = 'shared-cmip6-for-ns1000k/historical'


    historical_path = os.path.join(home_path,shared_path,model,label,wildcard)
    files = glob.glob(historical_path)


    #ORDER = [MODEL,NAME,FILES,TS, TE, MON,RIPF,RR,II,PP,FF,LABEL,ID]
    ORDER = [MODEL,NAME,FRQ,FILES,TS, TE, RR,II,PP,FF,LABEL,ID]
    if len(files) is 0:
        return pd.DataFrame([],columns=ORDER)

    df = pd.DataFrame(files,columns=[FILES])

    df[MODEL]   = df[FILES].apply(lambda f: Path(f).parents[1].name)
    df[NAME]    = df[FILES].apply(lambda f: Path(f).name           )
    df[FRQ]     = df[NAME].str.extract('^.*?_[A-Z]*([a-z]*).*_')
    #df[RIPF]  = df[NAME].str.contains('_r.+i.+p.+f.+_')
    #df[VAR]    = df[NAME].str.extract('(\d+)_-\d+.nc')
    df[TS]      = df[NAME].str.extract('_(\d+)-\d+.nc')
    df[TE]      = df[NAME].str.extract('_\d+-(\d+).nc')
    df[RR]      = df[NAME].str.extract('_r(.+?)i.+p.+f.+_').astype(int)
    df[II ]     = df[NAME].str.extract('_r.+i(.+?)p.+f.+_').astype(int)
    df[PP ]     = df[NAME].str.extract('_r.+i.+p(.+?)f.+_').astype(int)
    df[FF ]     = df[NAME].str.extract('_r.+i.+p.+f(.+?)_').astype(int)
    df[LABEL ]  = df[NAME].str.extract('_(r.+i.+p.+f.+?)_')
    df[ID]      = df[MODEL]+df[LABEL]

    df = df[ORDER]
    return df
df = search_cmip6_hist(wildcard='tas*')
#this shows all values
#df

#this shows only a random sample 
df.sample(5)
MODEL filename FREQUENCY FILE_PATH TIME START TIME END REALIZATION INDEX PHYSICS FORCING LABEL ID
369 SAM0-UNICON tasmin_day_SAM0-UNICON_historical_r1i1p1f1_gn_... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 19140101 19141231 1 1 1 1 r1i1p1f1 SAM0-UNICONr1i1p1f1
2114 EC-Earth3-Veg tas_Amon_EC-Earth3-Veg_historical_r1i1p1f1_gr_... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 186201 186212 1 1 1 1 r1i1p1f1 EC-Earth3-Vegr1i1p1f1
676 CanESM5 tas_day_CanESM5_historical_r13i1p2f1_gn_185001... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 18500101 20141231 13 1 2 1 r13i1p2f1 CanESM5r13i1p2f1
2517 FGOALS-g3 tas_Amon_FGOALS-g3_historical_r1i1p1f1_gn_1930... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 193001 193912 1 1 1 1 r1i1p1f1 FGOALS-g3r1i1p1f1
2447 FGOALS-g3 tasmax_day_FGOALS-g3_historical_r1i1p1f1_gn_18... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 18560101 18561231 1 1 1 1 r1i1p1f1 FGOALS-g3r1i1p1f1
df.iloc[3]
MODEL                                               IPSL-CM6A-LR
filename       tasmin_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_g...
FREQUENCY                                                    mon
FILE_PATH      /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564...
TIME START                                                185001
TIME END                                                  201412
REALIZATION                                                    1
INDEX                                                          1
PHYSICS                                                        1
FORCING                                                        1
LABEL                                                   r1i1p1f1
ID                                          IPSL-CM6A-LRr1i1p1f1
Name: 3, dtype: object
#select row based on location
_path = df.iloc[3]['FILE_PATH']
#open dataset 
ds = xr.open_dataset(_path)
#include model in the search
df = search_cmip6_hist(wildcard='tas*',model='IPSL-CM6A-LR')

#include also label 
df = search_cmip6_hist(wildcard='tas*',model='IPSL-CM6A-LR',label='r5i1p1f1')
df
MODEL filename FREQUENCY FILE_PATH TIME START TIME END REALIZATION INDEX PHYSICS FORCING LABEL ID
0 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r5i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 5 1 1 1 r5i1p1f1 IPSL-CM6A-LRr5i1p1f1
#use star in model and/or lable
df = search_cmip6_hist(wildcard='tas*',model='IPSL-CM6*',label='r*i1p1f1')
df
MODEL filename FREQUENCY FILE_PATH TIME START TIME END REALIZATION INDEX PHYSICS FORCING LABEL ID
0 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r2i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 2 1 1 1 r2i1p1f1 IPSL-CM6A-LRr2i1p1f1
1 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r5i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 5 1 1 1 r5i1p1f1 IPSL-CM6A-LRr5i1p1f1
2 IPSL-CM6A-LR tas_ImonGre_IPSL-CM6A-LR_historical_r1i1p1f1_g... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
3 IPSL-CM6A-LR tasmin_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_g... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
4 IPSL-CM6A-LR tasmin_day_IPSL-CM6A-LR_r1i1p1f1_historical_IP... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... NaN NaN 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
5 IPSL-CM6A-LR tasmin_day_IPSL-CM6A-LR_historical_r1i1p1f1_gr... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 18500101 20141231 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
6 IPSL-CM6A-LR tas_day_IPSL-CM6A-LR_historical_r1i1p1f1_gr_18... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 18500101 20141231 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
7 IPSL-CM6A-LR tas_ImonAnt_IPSL-CM6A-LR_historical_r1i1p1f1_g... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
8 IPSL-CM6A-LR tasmax_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_g... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
9 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
10 IPSL-CM6A-LR tas_day_IPSL-CM6A-LR_r1i1p1f1_historical_IPSL-... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... NaN NaN 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
11 IPSL-CM6A-LR tasmax_day_IPSL-CM6A-LR_historical_r1i1p1f1_gr... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 18500101 20141231 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
12 IPSL-CM6A-LR tasmax_day_IPSL-CM6A-LR_r1i1p1f1_historical_IP... day /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... NaN NaN 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
13 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_r1i1p1f1_historical_IPSL... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... NaN NaN 1 1 1 1 r1i1p1f1 IPSL-CM6A-LRr1i1p1f1
14 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r4i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 4 1 1 1 r4i1p1f1 IPSL-CM6A-LRr4i1p1f1
15 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r3i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 3 1 1 1 r3i1p1f1 IPSL-CM6A-LRr3i1p1f1
16 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r9i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 9 1 1 1 r9i1p1f1 IPSL-CM6A-LRr9i1p1f1
17 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r8i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 8 1 1 1 r8i1p1f1 IPSL-CM6A-LRr8i1p1f1
18 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r10i1p1f1_gr_... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 10 1 1 1 r10i1p1f1 IPSL-CM6A-LRr10i1p1f1
19 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r7i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 7 1 1 1 r7i1p1f1 IPSL-CM6A-LRr7i1p1f1
20 IPSL-CM6A-LR tas_Amon_IPSL-CM6A-LR_historical_r6i1p1f1_gr_1... mon /home/28f6ea40-2d3059-2d4f6b-2d8429-2deb8e9564... 185001 201412 6 1 1 1 r6i1p1f1 IPSL-CM6A-LRr6i1p1f1