Skip to content
Snippets Groups Projects

Hacky way to convert Zarr 2 to Zarr3

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Manuel Reis

    This snippet makes a copy of a zarr2 dataset as zarr3

    Edited
    zarr2 1.32 KiB
    import zarr
    def convert_attrs(z2,z3):
       z3.attrs.update(z2.attrs.asdict())
    
    def convert_variable(v2,z3):
        compressors = zarr.codecs.BloscCodec(cname=v2.compressor.cname, clevel=v2.compressor.clevel, shuffle=zarr.codecs.BloscShuffle.shuffle.from_int(v2.compressor.shuffle),blocksize=v2.compressor.blocksize)
        v3 = z3.create_array(
             name=v2.name,
             shape=v2.shape,
             dtype=v2.dtype,
             compressors=compressors,
             chunks=v2.chunks,
             shards=v2.shards,
             order=v2.order,
             fill_value=v2.fill_value,
             dimension_names=v2.attrs.get('_ARRAY_DIMENSIONS'),
             overwrite=True
            )
        v3[:]=v2[:]
        convert_attrs(v2,v3)
    
    def convert_groups(g2,g3):
        convert_attrs(g2,g3)
        for vname,v2 in g2.arrays():
                convert_variable(v2,g3)
        for g2 in g2.groups():
                convert_groups(g2,g3)
    
    def convert_zarr(z2,z3):
        convert_groups(z2,z3)
        zarr.consolidate_metadata(z3.store)
    
    
    if __name__ == '__main__':
        import sys
        if not len(sys.argv) in (2,3):
            print('Usage: {} <input.zarr> [<output.zarr>]'.format(sys.argv[0]))
            sys.exit(1)
        
        input_zarr = sys.argv[1]
        output_zarr = sys.argv[2] if len(sys.argv) == 3 else input_zarr + '3'
        z2 = zarr.open(input_zarr,mode='r')
        z3 = zarr.open(output_zarr,mode='w')
        convert_zarr(z2,z3)
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment