Skip to content
Snippets Groups Projects
Commit 2c0390b1 authored by Fabian Wachsmann's avatar Fabian Wachsmann
Browse files

Added packems to intake notebook

parent 27d59c5d
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:d8f066a2-2da6-4366-9d0c-5de4d3e1df1d tags:
``` python
import intake
```
%% Cell type:code id:c01445bb-83d4-4110-a53d-64a8c0da7200 tags:
``` python
import pandas as pd
```
%% Cell type:code id:40148755-29b4-4cf4-8b4f-6a02ec1560b3 tags:
``` python
import sys
sys.path.insert(0, '/home/k/k204210/volume/data-infrastructure-services/intake-esm/builder/ncar-builder/builders/')
```
%% Cell type:code id:50d151a4-11fc-40ce-837e-fa83d0349013 tags:
``` python
indexes=pd.read_csv("/work/ik1017/CMIP6/meta/cmip6_tape_index_paths_adjusted_2.txt",
skiprows=3,
sep=' ',
names=["permissions","owner","size","date","time","path","temp","tar"])
```
%% Cell type:code id:c4d11ef2-a53a-47df-b5f5-0bf62294050a tags:
``` python
indexes
```
%% Output
permissions owner size date time \
0 -r--r--r-- k204145/esgf 2.508455e+09 2019-09-30 23:11
1 -r--r--r-- k204145/esgf 2.370545e+09 2019-09-29 22:50
2 -r--r--r-- k204145/esgf 1.806243e+09 2019-09-29 22:50
3 -r--r--r-- k204145/esgf 2.491425e+09 2019-09-29 22:50
4 -r--r--r-- k204145/esgf 2.527008e+09 2019-09-29 22:50
... ... ... ... ... ...
1626476 -r--r--r-- k204210/esgf 7.618832e+07 2021-01-18 20:00
1626477 -r--r--r-- k204210/esgf 7.614116e+07 2021-01-18 20:00
1626478 -r--r--r-- k204210/esgf 7.609798e+07 2021-01-18 20:00
1626479 -r--r--r-- k204210/esgf 7.611006e+07 2021-01-18 20:00
1626480 -r--r--r-- k204210/esgf 2.439534e+07 2021-01-18 19:58
path temp \
0 ./DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s196... ->
1 ./DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s196... ->
2 ./DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s196... ->
3 ./DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s196... ->
4 ./DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s196... ->
... ... ...
1626476 ./ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r9i1p... ->
1626477 ./ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r9i1p... ->
1626478 ./ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r9i1p... ->
1626479 ./ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r9i1p... ->
1626480 ./ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r9i1p... ->
tar
0 DCPP_001.tar
1 DCPP_001.tar
2 DCPP_001.tar
3 DCPP_001.tar
4 DCPP_001.tar
... ...
1626476 Update_2021-02-16_1206.tar
1626477 Update_2021-02-16_1206.tar
1626478 Update_2021-02-16_1206.tar
1626479 Update_2021-02-16_1206.tar
1626480 Update_2021-02-16_1206.tar
[1626481 rows x 8 columns]
%% Cell type:code id:138da396-255a-437b-88f6-1cc95986fdbb tags:
``` python
indexes.loc[indexes["path"].str.contains('\:'),"path"]=indexes["temp"]
```
%% Cell type:code id:cdaa94c7-0478-42d1-b987-6ba63ce06b3f tags:
``` python
indexes.loc[(indexes["path"].str.contains('2019-')) &
~(indexes["path"].str.contains('\.nc')),"path"]=indexes["tar"]
indexes.loc[(indexes["path"].str.contains('2020-')) &
~(indexes["path"].str.contains('\.nc')),"path"]=indexes["tar"]
indexes.loc[(indexes["path"].str.contains('2021-')) &
~(indexes["path"].str.contains('\.nc')),"path"]=indexes["tar"]
```
%% Cell type:code id:dc84a8b5-b704-480a-83a5-05746ffb7c6b tags:
``` python
indexes=indexes[~indexes["tar"].str.contains('->')]
indexes=indexes[~indexes["tar"].isna()]
```
%% Cell type:code id:7e8a0556-a4f6-4af8-9db3-9c85cf7a662f tags:
``` python
from core import Builder, extract_attr_with_regex, get_asset_list, reverse_filename_format
from cmip import cmip6_parser
cmip_columns = [
'activity_id',
'institution_id',
'source_id',
'experiment_id',
'member_id',
'table_id',
'variable_id',
'grid_label',
'dcpp_init_year',
'version',
'time_range',
'path',
]
b = Builder(cmip_columns)
df = b(list(indexes["path"]), cmip6_parser)
```
%% Output
Parsing list of assets...
Done...
%% Cell type:code id:64cede18-6817-40cb-aa73-273fda8cf525 tags:
``` python
df["tar"]="/arch/ik1017/cmip6/CMIP6/"+indexes["tar"]
```
%% Cell type:code id:ae4d7bff-969a-4943-900d-5fbe0c11c74e tags:
``` python
df.to_csv("/work/ik1017/CMIP6/meta/dkrz_cmip6_archive.csv", index=False)
```
%% Cell type:code id:e589b159-2853-4deb-a77e-7134fd46c588 tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment