Page tree

Creating an Extendible Dataset

An extendible dataset is one whose dimensions can grow. HDF5 allows you to define a dataset to have certain initial dimensions, then to later increase the size of any of the initial dimensions.

HDF5 requires you to use chunking to define extendible datasets. This makes it possible to extend datasets efficiently without having to excessively reorganize storage. (To use chunking efficiently, be sure to see the advanced topic, Chunking in HDF5.)

The following operations are required in order to extend a dataset:

  1. Declare the dataspace of the dataset to have unlimited dimensions for all dimensions that might eventually be extended.
  2. Set dataset creation properties to enable chunking.
  3. Create the dataset.
  4. Extend the size of the dataset.

Programming Example

Description

This example shows how to create a 3 x 3 extendible dataset, write to that dataset, extend the dataset to 10x3, and write to the dataset again:

 C

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * Copyright by the Board of Trustees of the University of Illinois.         *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
 * access to either file, you may request a copy from help@hdfgroup.org.     *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 *  This example how to work with extendible datasets. The dataset 
 *  must be chunked in order to be extendible.
 * 
 *  It is used in the HDF5 Tutorial.
 */


#include "hdf5.h"

#define FILENAME    "extend.h5"
#define DATASETNAME "ExtendibleArray"
#define RANK         2

int
main (void)
{
    hid_t        file;                          /* handles */
    hid_t        dataspace, dataset;  
    hid_t        filespace, memspace;
    hid_t        prop;                     

    hsize_t      dims[2]  = {3, 3};           /* dataset dimensions at creation time */		
    hsize_t      maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
    herr_t       status;                             
    hsize_t      chunk_dims[2] = {2, 5};
    int          data[3][3] = { {1, 1, 1},    /* data to write */
                                {1, 1, 1},
                                {1, 1, 1} };      

    /* Variables used in extending and writing to the extended portion of dataset */
    hsize_t      size[2];
    hsize_t      offset[2];
    hsize_t      dimsext[2] = {7, 3};         /* extend dimensions */
    int          dataext[7][3] = { {2, 3, 4}, 
                                   {2, 3, 4}, 
                                   {2, 3, 4}, 
                                   {2, 3, 4}, 
                                   {2, 3, 4}, 
                                   {2, 3, 4}, 
                                   {2, 3, 4} };

    /* Variables used in reading data back */
    hsize_t      chunk_dimsr[2];
    hsize_t      dimsr[2];
    hsize_t      i, j;
    int          rdata[10][3];
    herr_t       status_n;                             
    int          rank, rank_chunk;

    /* Create the data space with unlimited dimensions. */
    dataspace = H5Screate_simple (RANK, dims, maxdims); 

    /* Create a new file. If file exists its contents will be overwritten. */
    file = H5Fcreate (FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

    /* Modify dataset creation properties, i.e. enable chunking  */
    prop = H5Pcreate (H5P_DATASET_CREATE);
    status = H5Pset_chunk (prop, RANK, chunk_dims);

    /* Create a new dataset within the file using chunk 
       creation properties.  */
    dataset = H5Dcreate2 (file, DATASETNAME, H5T_NATIVE_INT, dataspace,
                         H5P_DEFAULT, prop, H5P_DEFAULT);

    /* Write data to dataset */
    status = H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL,
                       H5P_DEFAULT, data);

    /* Extend the dataset. Dataset becomes 10 x 3  */
    size[0] = dims[0]+ dimsext[0];
    size[1] = dims[1];
    status = H5Dset_extent (dataset, size);

    /* Select a hyperslab in extended portion of dataset  */
    filespace = H5Dget_space (dataset);
    offset[0] = 3;
    offset[1] = 0;
    status = H5Sselect_hyperslab (filespace, H5S_SELECT_SET, offset, NULL,
                                  dimsext, NULL);  

    /* Define memory space */
    memspace = H5Screate_simple (RANK, dimsext, NULL); 

    /* Write the data to the extended portion of dataset  */
    status = H5Dwrite (dataset, H5T_NATIVE_INT, memspace, filespace,
                       H5P_DEFAULT, dataext);

    /* Close resources */
    status = H5Dclose (dataset);
    status = H5Pclose (prop);
    status = H5Sclose (dataspace);
    status = H5Sclose (memspace);
    status = H5Sclose (filespace);
    status = H5Fclose (file);

    /********************************************
     * Re-open the file and read the data back. *
     ********************************************/

    file = H5Fopen (FILENAME, H5F_ACC_RDONLY, H5P_DEFAULT);
    dataset = H5Dopen2 (file, DATASETNAME, H5P_DEFAULT);

    filespace = H5Dget_space (dataset);
    rank = H5Sget_simple_extent_ndims (filespace);
    status_n = H5Sget_simple_extent_dims (filespace, dimsr, NULL);

    prop = H5Dget_create_plist (dataset);

    if (H5D_CHUNKED == H5Pget_layout (prop)) 
       rank_chunk = H5Pget_chunk (prop, rank, chunk_dimsr);

    memspace = H5Screate_simple (rank, dimsr, NULL);
    status = H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace,
                      H5P_DEFAULT, rdata);

    printf("\n");
    printf("Dataset: \n");
    for (j = 0; j < dimsr[0]; j++)
    {
       for (i = 0; i < dimsr[1]; i++)
           printf("%d ", rdata[j][i]);
       printf("\n");
    }

    status = H5Pclose (prop);
    status = H5Dclose (dataset);
    status = H5Sclose (filespace);
    status = H5Sclose (memspace);
    status = H5Fclose (file);
}

 Fortran

! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
!   Copyright by The HDF Group.                                               *
!   Copyright by the Board of Trustees of the University of Illinois.         *
!   All rights reserved.                                                      *
!                                                                             *
!   This file is part of HDF5.  The full HDF5 copyright notice, including     *
!   terms governing use, modification, and redistribution, is contained in    *
!   the files COPYING and Copyright.html.  COPYING can be found at the root   *
!   of the source code distribution tree; Copyright.html can be found at the  *
!   root level of an installed copy of the electronic HDF5 document set and   *
!   is linked from the top-level documents page.  It can also be found at     *
!   http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
!   access to either file, you may request a copy from help@hdfgroup.org.     *
! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
!
! This example extends an HDF5 dataset. It is used in the HDF5 Tutorial.

PROGRAM H5_EXTEND

  USE HDF5 ! This module contains all necessary modules 

  IMPLICIT NONE

  !
  !the dataset is stored in file "extend.h5" 
  !
  CHARACTER(LEN=9), PARAMETER :: filename = "extend.h5"

  !
  !dataset rank is 2 and name is "ExtendibleArray"
  !
  CHARACTER(LEN=15), PARAMETER :: dsetname = "ExtendibleArray"
  INTEGER :: RANK = 2

  INTEGER(HID_T) :: file_id       ! File identifier 
  INTEGER(HID_T) :: dset_id       ! Dataset identifier 
  INTEGER(HID_T) :: dataspace     ! Dataspace identifier 
  INTEGER(HID_T) :: memspace      ! Memory dataspace identifier 
  INTEGER(HID_T) :: crp_list      ! Dataset creation property identifier 

  !
  !dataset dimensions at creation time
  !
  INTEGER(HSIZE_T), DIMENSION(1:2) :: dims = (/3,3/)

  !
  !data dimensions 
  !
  INTEGER(HSIZE_T), DIMENSION(1:2) :: dimsc = (/2,5/)
  INTEGER(HSIZE_T), DIMENSION(1:2) :: dimsm = (/3,7/)

  !
  !Maximum dimensions
  !
  INTEGER(HSIZE_T), DIMENSION(1:2) :: maxdims 

  INTEGER(HSIZE_T), DIMENSION(1:2) :: offset
  INTEGER(HSIZE_T), DIMENSION(1:2) :: count 

  !
  ! Variables for reading and writing 
  !
  INTEGER, DIMENSION(1:3,1:3)  :: data1 
  INTEGER, DIMENSION(1:21) :: data2 = &
       (/2, 3, 4, 2, 3, 4, 2, 3, 4, 2, 3, 4, 2, 3, 4, 2, 3, 4, 2, 3, 4/)
  INTEGER(HSIZE_T), DIMENSION(1:2) :: data_dims

  !
  !Size of data in the file 
  !
  INTEGER(HSIZE_T), DIMENSION(1:2) :: size

  !
  !general purpose integer 
  !
  INTEGER(HSIZE_T) :: i, j

  !
  !flag to check operation success 
  !
  INTEGER :: error 

  !
  !Variables used in reading data back
  !  
  INTEGER(HSIZE_T), DIMENSION(1:2) :: dimsr, maxdimsr
  INTEGER :: rankr
  INTEGER, DIMENSION(1:3,1:10)  :: rdata 

  !
  !Initialize FORTRAN predifined datatypes
  !
  CALL h5open_f(error) 

  !
  !Create a new file using default properties.
  ! 
  CALL h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, error)

  !
  !Create the data space with unlimited dimensions.
  !
  maxdims = (/H5S_UNLIMITED_F, H5S_UNLIMITED_F/)

  CALL h5screate_simple_f(RANK, dims, dataspace, error, maxdims)

  !
  !Modify dataset creation properties, i.e. enable chunking
  !
  CALL h5pcreate_f(H5P_DATASET_CREATE_F, crp_list, error)

  CALL h5pset_chunk_f(crp_list, RANK, dimsc, error)

  !
  !Create a dataset with 3X3 dimensions using cparms creation propertie .
  !
  CALL h5dcreate_f(file_id, dsetname, H5T_NATIVE_INTEGER, dataspace, &
       dset_id, error, crp_list )
  CALL h5sclose_f(dataspace, error)

  !
  !Fill data array with 1's 
  !
  DO i = 1, dims(1)
     DO j = 1, dims(2)
        data1(i,j) = 1
     END DO
  END DO

  !
  !Write data array to dataset
  !
  data_dims(1:2) = (/3,3/) 
  CALL h5dwrite_f(dset_id, H5T_NATIVE_INTEGER, data1, data_dims, error)

  !
  !Extend the dataset. Dataset becomes 10 x 3.
  !
  size(1:2)   = (/3,10/)
  CALL h5dset_extent_f(dset_id, size, error)

  offset(1:2) = (/0,3/)
  count(1:2)  = (/3,7/)

  CALL h5screate_simple_f (2, dimsm, memspace, error)

  !
  !Write to 3x7 extended part of dataset
  !   
  CALL h5dget_space_f(dset_id, dataspace, error)
  CALL h5sselect_hyperslab_f(dataspace, H5S_SELECT_SET_F, &
       offset, count, error)

  data_dims(1:2) = (/3,7/)
  CALL H5dwrite_f(dset_id, H5T_NATIVE_INTEGER, data2, data_dims, error, &
       memspace, dataspace)

  !
  !Close the objects that were opened.
  !
  CALL h5sclose_f(dataspace, error)
  CALL h5pclose_f(crp_list, error)
  CALL h5dclose_f(dset_id, error)
  CALL h5fclose_f(file_id, error)

  !
  !read the data back
  !
  !Open the file.
  !
  CALL h5fopen_f (filename, H5F_ACC_RDONLY_F, file_id, error)

  !
  !Open the  dataset.
  !
  CALL h5dopen_f(file_id, dsetname, dset_id, error)

  !
  !Get dataset's dataspace handle.
  !
  CALL h5dget_space_f(dset_id, dataspace, error)

  !
  !Get dataspace's rank.
  !
  CALL h5sget_simple_extent_ndims_f(dataspace, rankr, error)

  !
  !Get dataspace's dimensions.
  ! 
  CALL h5sget_simple_extent_dims_f(dataspace, dimsr, maxdimsr, error)

  !
  !Get creation property list.
  !
  CALL h5dget_create_plist_f(dset_id, crp_list, error)

  !
  ! Fill read buffer with zeroes
  !
  rdata(1:dimsr(1),1:dimsr(2)) = 0

  !
  !create memory dataspace
  !
  CALL h5screate_simple_f(rankr, dimsr, memspace, error)

  !
  !Read data 
  !
  data_dims(1:2) = (/3,10/)
  CALL H5dread_f(dset_id, H5T_NATIVE_INTEGER, rdata, data_dims, &
       error, memspace, dataspace)

  WRITE(*,'(A)') "Dataset:" 
  DO i = 1, dimsr(1)
     WRITE(*,'(100(I0,1X))') rdata(i,1:dimsr(2))    
  END DO

  !
  !Close the objects that were opened.
  !
  CALL h5sclose_f(dataspace, error)
  CALL h5sclose_f(memspace, error)
  CALL h5pclose_f(crp_list, error)
  CALL h5dclose_f(dset_id, error)
  CALL h5fclose_f(file_id, error)

  !Close FORTRAN predefined datatypes
  !
  CALL h5close_f(error)

END PROGRAM H5_EXTEND

 C++

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.						     *
 * Copyright by the Board of Trustees of the University of Illinois.	     *
 * All rights reserved.							     *
 *	                                                                     *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have	     *
 * access to either file, you may request a copy from help@hdfgroup.org.     *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 *  This example illustrates how to create a dataset that is a 4 x 6
 *  array. It is used in the HDF5 Tutorial.
 */

#include <iostream>
#include <string>

#include "H5Cpp.h"

#ifndef H5_NO_NAMESPACE
     using namespace H5;
#ifndef H5_NO_STD
     using std::cout;
     using std::endl;
#endif  // H5_NO_STD
#endif

const H5std_string	FILE_NAME("h5tutr_extend.h5");
const H5std_string	DATASETNAME("ExtendibleArray");

int main (void)
{
     hsize_t dims[2] = {3,3};	        // dataset dimensions at creation
     hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED}; 
     hsize_t chunk_dims[2] ={2, 5};
     int	   data[3][3] = { {1, 1, 1},    // data to write 
	                          {1, 1, 1},
	                          {1, 1, 1} };

     // Variables used in extending and writing to the extended portion of dataset 

     hsize_t size[2];
     hsize_t offset[2];
     hsize_t dimsext[2] = {7, 3};         // extend dimensions 
     int     dataext[7][3] = { {2, 3, 4},
	                              {2, 3, 4},
	                              {2, 3, 4},
	                              {2, 3, 4},
	                              {2, 3, 4},
	                              {2, 3, 4},
	                              {2, 3, 4} };

    // Try block to detect exceptions raised by any of the calls inside it
    try
    {
	// Turn off the auto-printing when failure occurs so that we can
	// handle the errors appropriately
	Exception::dontPrint();

	// Create a new file using the default property lists. 
	H5File file(FILE_NAME, H5F_ACC_TRUNC);

	// Create the data space for the dataset.  Note the use of pointer
	// for the instance 'dataspace'.  It can be deleted and used again
	// later for another dataspace.  An HDF5 identifier can be closed
	// by the destructor or the method 'close()'.
	DataSpace *dataspace = new DataSpace (2, dims, maxdims);

	// Modify dataset creation property to enable chunking
	DSetCreatPropList prop;
	prop.setChunk(2, chunk_dims);

	// Create the chunked dataset.  Note the use of pointer.
	DataSet *dataset = new DataSet(file.createDataSet( DATASETNAME, 
	                         PredType::STD_I32BE, *dataspace, prop) );
 
	// Write data to dataset.
	dataset->write(data, PredType::NATIVE_INT);

	// Extend the dataset. Dataset becomes 10 x 3.
	size[0] = dims[0] + dimsext[0];
	size[1] = dims[1];
	dataset->extend(size); 

	// Select a hyperslab in extended portion of the dataset.
	DataSpace *filespace = new DataSpace(dataset->getSpace ());
	offset[0] = 3;
	offset[1] = 0;
	filespace->selectHyperslab(H5S_SELECT_SET, dimsext, offset);
	
	// Define memory space.
	DataSpace *memspace = new DataSpace(2, dimsext, NULL);

	// Write data to the extended portion of the dataset.
	dataset->write(dataext, PredType::NATIVE_INT, *memspace, *filespace);

	// Close all objects and file.
	prop.close();
	delete filespace;
	delete memspace;
	delete dataspace;
	delete dataset;
	file.close();

	// --------------------------------------- 
	// Re-open the file and read the data back
	// --------------------------------------- 

	int        rdata[10][3];
	int        i,j, rank, rank_chunk;
	hsize_t    chunk_dimsr[2], dimsr[2];

	// Open the file and dataset.
	file.openFile(FILE_NAME, H5F_ACC_RDONLY);
	dataset = new DataSet(file.openDataSet( DATASETNAME)); 

	// Get the dataset's dataspace and creation property list.
	filespace = new DataSpace(dataset->getSpace());
	prop = dataset->getCreatePlist();

	// Get information to obtain memory dataspace.
	rank = filespace->getSimpleExtentNdims();
	herr_t status_n = filespace->getSimpleExtentDims(dimsr);

	if (H5D_CHUNKED == prop.getLayout())
	     rank_chunk = prop.getChunk(rank, chunk_dimsr);
	cout << "rank chunk = " << rank_chunk << endl;;

	memspace = new DataSpace(rank, dimsr, NULL);
	dataset->read(rdata, PredType::NATIVE_INT, *memspace, *filespace);
      
	cout << endl;
	for (j = 0; j < dimsr[0]; j++) {
	    for (i = 0; i < dimsr[1]; i++)
	       cout << " " <<  rdata[j][i];
	    cout << endl;
	}

	// Close all objects and file.
	prop.close();
	delete filespace;
	delete memspace;
	delete dataset;
	file.close();
      
    }  // end of try block

    // catch failure caused by the H5File operations
    catch(FileIException error)
    {
	error.printError();
	return -1;
    }

    // catch failure caused by the DataSet operations
    catch(DataSetIException error)
    {
	error.printError();
	return -1;
    }

    // catch failure caused by the DataSpace operations
    catch(DataSpaceIException error)
    {
	error.printError();
	return -1;
    }

    return 0;  // successfully terminated
}

For details on compiling an HDF5 application: Compiling HDF5 Applications

Remarks

  • An unlimited dimension dataspace is specified with the H5S_CREATE_SIMPLE call, by passing in H5S_UNLIMITED as an element of the maxdims array.

  • The H5P_CREATE call creates a new property as an instance of a property list class. For creating an extendible array dataset, pass in H5P_DATASET_CREATE for the property list class.

  • The H5P_SET_CHUNK call modifies a Dataset Creation Property List instance to store a chunked layout dataset and sets the size of the chunks used.

  • To extend an unlimited dimension dataset use the the H5D_SET_EXTENT call. Please be aware that after this call, the dataset's dataspace must be refreshed with H5D_GET_SPACE before more data can be accessed.

  • The H5P_GET_CHUNK call retrieves the size of chunks for the raw data of a chunked layout dataset.

  • Once there is no longer a need for a Property List instance, it should be closed with the H5P_CLOSE call.

--- Last Modified: December 18, 2017 | 02:20 PM