Page tree

Creating a Compressed Dataset

HDF5 requires you to use chunking to create a compressed dataset. (To use chunking efficiently, be sure to see the advanced topic, Chunking in HDF5.)

The following operations are required in order to create a compressed dataset:

  • Create a dataset creation property list.
  • Modify the dataset creation property list instance to enable chunking and to enable compression.
  • Create the dataset.
  • Close the dataset creation property list and dataset.

For more information on compression, see the FAQ question on Using Compression in HDF5.

Programming Example

Description

This example creates a chunked and ZLIB compressed dataset. It also includes comments for what needs to be done to create an SZIP compressed dataset. The example then reopens the dataset, prints the filter information, and reads the dataset:

 C

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * Copyright by the Board of Trustees of the University of Illinois.         *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
 * access to either file, you may request a copy from help@hdfgroup.org.     *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/* 
 *  This example illustrates how to create a compressed dataset.
 *  It is used in the HDF5 Tutorial.
 */ 

#include "hdf5.h"

#define FILE    "cmprss.h5"
#define RANK    2
#define DIM0    100
#define DIM1    20
 
int main () {

    hid_t    file_id, dataset_id, dataspace_id; /* identifiers */
    hid_t    plist_id; 

    size_t   nelmts;
    unsigned flags, filter_info;
    H5Z_filter_t filter_type;

    herr_t   status;
    hsize_t  dims[2];
    hsize_t  cdims[2];
 
    int      idx;
    int      i,j, numfilt;
    int      buf[DIM0][DIM1];
    int      rbuf [DIM0][DIM1];

    /* Uncomment these variables to use SZIP compression 
    unsigned szip_options_mask;
    unsigned szip_pixels_per_block;
    */

    /* Create a file.  */
    file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);


    /* Create dataset "Compressed Data" in the group using absolute name.  */
    dims[0] = DIM0;
    dims[1] = DIM1;
    dataspace_id = H5Screate_simple (RANK, dims, NULL);

    plist_id  = H5Pcreate (H5P_DATASET_CREATE);

    /* Dataset must be chunked for compression */
    cdims[0] = 20;
    cdims[1] = 20;
    status = H5Pset_chunk (plist_id, 2, cdims);

    /* Set ZLIB / DEFLATE Compression using compression level 6.
     * To use SZIP Compression comment out these lines. 
    */ 
    status = H5Pset_deflate (plist_id, 6); 

    /* Uncomment these lines to set SZIP Compression 
    szip_options_mask = H5_SZIP_NN_OPTION_MASK;
    szip_pixels_per_block = 16;
    status = H5Pset_szip (plist_id, szip_options_mask, szip_pixels_per_block);
    */
    
    dataset_id = H5Dcreate2 (file_id, "Compressed_Data", H5T_STD_I32BE, 
                            dataspace_id, H5P_DEFAULT, plist_id, H5P_DEFAULT); 

    for (i = 0; i< DIM0; i++) 
        for (j=0; j<DIM1; j++) 
           buf[i][j] = i+j;

    status = H5Dwrite (dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf);

    status = H5Sclose (dataspace_id);
    status = H5Dclose (dataset_id);
    status = H5Pclose (plist_id);
    status = H5Fclose (file_id);

    /* Now reopen the file and dataset in the file. */
    file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT);
    dataset_id = H5Dopen2 (file_id, "Compressed_Data", H5P_DEFAULT);

    /* Retrieve filter information. */
    plist_id = H5Dget_create_plist (dataset_id);
    
    numfilt = H5Pget_nfilters (plist_id);
    printf ("Number of filters associated with dataset: %i\n", numfilt);
     
    for (i=0; i<numfilt; i++) {
       nelmts = 0;
       filter_type = H5Pget_filter2 (plist_id, 0, &flags, &nelmts, NULL, 0, NULL,
                     &filter_info);
       printf ("Filter Type: ");
       switch (filter_type) {
         case H5Z_FILTER_DEFLATE:
              printf ("H5Z_FILTER_DEFLATE\n");
              break;
         case H5Z_FILTER_SZIP:
              printf ("H5Z_FILTER_SZIP\n");
              break;
         default:
              printf ("Other filter type included.\n");
         }
    }

    status = H5Dread (dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, 
                      H5P_DEFAULT, rbuf); 
    
    status = H5Dclose (dataset_id);
    status = H5Pclose (plist_id);
    status = H5Fclose (file_id);
}

 Fortran

! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
!   Copyright by The HDF Group.                                               *
!   Copyright by the Board of Trustees of the University of Illinois.         *
!   All rights reserved.                                                      *
!                                                                             *
!   This file is part of HDF5.  The full HDF5 copyright notice, including     *
!   terms governing use, modification, and redistribution, is contained in    *
!   the files COPYING and Copyright.html.  COPYING can be found at the root   *
!   of the source code distribution tree; Copyright.html can be found at the  *
!   root level of an installed copy of the electronic HDF5 document set and   *
!   is linked from the top-level documents page.  It can also be found at     *
!   http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
!   access to either file, you may request a copy from help@hdfgroup.org.     *
! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
! 
!  This example illustrates how to create a compressed dataset.
!  It is used in the HDF5 Tutorial.
! 
PROGRAM h5_cmprss

  USE HDF5 ! This module contains all necessary modules 

  IMPLICIT NONE
  !
  ! The dataset is stored in file "h5_cmprss.h5" 
  !
  CHARACTER(LEN=12), PARAMETER :: filename = "h5_cmprss.h5"
  INTEGER, PARAMETER  :: rank = 2   ! Rank of the data set
  INTEGER, PARAMETER  :: dim0 = 100 ! Data set sizes
  INTEGER, PARAMETER  :: dim1 = 20

  INTEGER(hid_t) :: file_id, dataset_id, dataspace_id ! Identifiers
  INTEGER(hid_t) :: plist_id ! Property list identifier

  INTEGER :: error
  INTEGER(hsize_t), DIMENSION(1:rank) :: dims ! dimensions of data
  INTEGER(hsize_t), DIMENSION(1:rank) :: cdims ! sizes of chunked data
 
  INTEGER :: i,j, numfilt
  INTEGER, DIMENSION(1:dim0,1:dim1) :: buf ! write buffer
  INTEGER, DIMENSION(1:dim0,1:dim1) :: rbuf ! read buffer
  INTEGER(HSIZE_T), DIMENSION(1:rank) :: data_dims ! dimensions of data buffers

  INTEGER, DIMENSION(1:1) :: cd_values ! Auxiliary data for the filter
  INTEGER(size_t) :: nelmts            ! Number of elements in cd_values
  INTEGER :: flags ! Bit vector specifying certain general properties of the filter
  INTEGER(SIZE_T) :: namelen = 180 ! Anticipated number of characters in name
  CHARACTER(LEN=180) :: name ! Name of the filter
  INTEGER :: filter_id ! Filter identification number

  ! Uncomment these variables to use SZIP compression
  !INTEGER :: szip_options_mask
  !INTEGER :: szip_pixels_per_block

  !
  !Initialize FORTRAN predifined datatypes
  !
  CALL h5open_f(error)
  !
  ! Create a file
  CALL h5fcreate_f(filename, H5F_ACC_TRUNC_F, file_id, error)
  !
  ! Create dataset "Compressed Data" in the group using absolute name.
  dims(1:2) = (/dim0, dim1/)
  CALL h5screate_simple_f(rank, dims, dataspace_id, error)
  CALL h5pcreate_f(H5P_DATASET_CREATE_F, plist_id, error)
  !
  ! Dataset must be chunked for compression 
  cdims(1:2) = 20
  CALL h5pset_chunk_f(plist_id, 2, cdims, error)

  ! Set ZLIB / DEFLATE Compression using compression level 6.
  ! To use SZIP Compression comment out these lines. 
  CALL h5pset_deflate_f(plist_id, 6, error)

  ! Uncomment these lines to set SZIP Compression 
  !szip_options_mask = H5_SZIP_NN_OM_F
  !szip_pixels_per_block = 16
  !CALL H5Pset_szip_f(plist_id, szip_options_mask, szip_pixels_per_block, error)

  ! Create data set
  CALL h5dcreate_f(file_id, "Compressed_Data", H5T_NATIVE_INTEGER, dataspace_id, &
       dataset_id, error, dcpl_id=plist_id)

  DO j = 1, dim1
     DO i = 1, dim0
        buf(i,j) = i+j
     ENDDO
  ENDDO

  data_dims(1:2) = (/dim0,dim1/) 
  CALL h5dwrite_f(dataset_id, H5T_NATIVE_INTEGER, buf, data_dims, error)

  ! Close resources
  CALL h5sclose_f(dataspace_id, error)
  CALL h5pclose_f(plist_id, error)
  CALL h5dclose_f(dataset_id, error)
  CALL h5fclose_f(file_id, error)

  ! Now reopen the file and dataset in the file.
  CALL h5fopen_f(filename, H5F_ACC_RDONLY_F, file_id, error)
  CALL h5dopen_f(file_id, "Compressed_Data", dataset_id, error)

  ! Retrieve filter information. 
  CALL h5dget_create_plist_f(dataset_id, plist_id, error)
    
  CALL h5pget_nfilters_f(plist_id, numfilt, error)
  WRITE(*,'(A, I0)') "Number of filters associated with dataset: ", numfilt
     
  DO i = 1, numfilt
     nelmts = 1
     CALL h5pget_filter_f(plist_id, 0, flags, nelmts, cd_values, &
          namelen, name, filter_id, error)

      WRITE(*,'(30X,A)', ADVANCE='NO')"Filter Type: "
      IF(filter_id.EQ.H5Z_FILTER_DEFLATE_F)THEN
         WRITE(*,'(A)') "H5Z_FILTER_DEFLATE"
      ELSEIF (filter_id.EQ.H5Z_FILTER_SZIP_F)THEN
         WRITE(*,'(A)') "H5Z_FILTER_SZIP"
      ELSE
         WRITE(*,'(A)') "Other filter type included"
      ENDIF
   ENDDO
  data_dims(1:2) = (/dim0,dim1/)
  CALL h5dread_f(dataset_id, H5T_NATIVE_INTEGER, rbuf, data_dims, error)
    
  CALL h5dclose_f(dataset_id, error)
  CALL h5pclose_f(plist_id, error)
  CALL h5fclose_f(file_id, error)

END PROGRAM h5_cmprss

 C++

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.						     *
 * Copyright by the Board of Trustees of the University of Illinois.	     *
 * All rights reserved.							     *
 *	                                                                     *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have	     *
 * access to either file, you may request a copy from help@hdfgroup.org.     *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 *  This example illustrates how to create a compressed dataset.
 *  It is used in the HDF5 Tutorial.
 */

#include <iostream>
#include <string>

#include "H5Cpp.h"

#ifndef H5_NO_NAMESPACE
    using namespace H5;
#ifndef H5_NO_STD
    using std::cout;
    using std::endl;
#endif  // H5_NO_STD
#endif

const H5std_string	FILE_NAME("h5tutr_cmprss.h5");
const H5std_string	DATASET_NAME("Compressed_Data");
const int	DIM0 = 100;
const int	DIM1 = 20;

int main (void)
{
    hsize_t dims[2] = { DIM0, DIM1 };	// dataset dimensions
    hsize_t chunk_dims[2] = { 20, 20 };	// chunk dimensions
    int     i,j, buf[DIM0][DIM1];

    // Try block to detect exceptions raised by any of the calls inside it
    try
    {
	// Turn off the auto-printing when failure occurs so that we can
	// handle the errors appropriately
	Exception::dontPrint();

	// Create a new file using the default property lists. 
	H5File file(FILE_NAME, H5F_ACC_TRUNC);

	// Create the data space for the dataset.
	DataSpace *dataspace = new DataSpace(2, dims);

	// Modify dataset creation property to enable chunking
	DSetCreatPropList  *plist = new  DSetCreatPropList;
	plist->setChunk(2, chunk_dims);

	// Set ZLIB (DEFLATE) Compression using level 6.
	// To use SZIP compression comment out this line.
	plist->setDeflate(6);

	// Uncomment these lines to set SZIP Compression
	// unsigned szip_options_mask = H5_SZIP_NN_OPTION_MASK;
	// unsigned szip_pixels_per_block = 16;
	// plist->setSzip(szip_options_mask, szip_pixels_per_block);
     
	// Create the dataset.      
	DataSet *dataset = new DataSet(file.createDataSet( DATASET_NAME, 
	                        PredType::STD_I32BE, *dataspace, *plist) );

	for (i = 0; i< DIM0; i++)
	  for (j=0; j<DIM1; j++)
	      buf[i][j] = i+j;

	// Write data to dataset.
	dataset->write(buf, PredType::NATIVE_INT);

	// Close objects and file.  Either approach will close the HDF5 item.
	delete dataspace;
	delete dataset;
	delete plist;
	file.close();

	// -----------------------------------------------
	// Re-open the file and dataset, retrieve filter 
	// information for dataset and read the data back.
	// -----------------------------------------------
	
	int        rbuf[DIM0][DIM1];
	int        numfilt;
	size_t     nelmts={1}, namelen={1};
	unsigned  flags, filter_info, cd_values[1], idx;
	char       name[1];
	H5Z_filter_t filter_type;

	// Open the file and the dataset in the file.
	file.openFile(FILE_NAME, H5F_ACC_RDONLY);
	dataset = new DataSet(file.openDataSet( DATASET_NAME));

	// Get the create property list of the dataset.
	plist = new DSetCreatPropList(dataset->getCreatePlist ());

	// Get the number of filters associated with the dataset.
	numfilt = plist->getNfilters();
	cout << "Number of filters associated with dataset: " << numfilt << endl;

	for (idx=0; idx < numfilt; idx++) {
	    nelmts = 0;

	    filter_type = plist->getFilter(idx, flags, nelmts, cd_values, namelen, name , filter_info);

	    cout << "Filter Type: ";

	    switch (filter_type) {
	      case H5Z_FILTER_DEFLATE:
	           cout << "H5Z_FILTER_DEFLATE" << endl;
	           break;
	      case H5Z_FILTER_SZIP:
	           cout << "H5Z_FILTER_SZIP" << endl; 
	           break;
	      default:
	           cout << "Other filter type included." << endl;
	      }
	}

	// Read data.
	dataset->read(rbuf, PredType::NATIVE_INT);

	delete plist; 
	delete dataset;
	file.close();	// can be skipped

    }  // end of try block

    // catch failure caused by the H5File operations
    catch(FileIException error)
    {
	error.printError();
	return -1;
    }

    // catch failure caused by the DataSet operations
    catch(DataSetIException error)
    {
	error.printError();
	return -1;
    }

    // catch failure caused by the DataSpace operations
    catch(DataSpaceIException error)
    {
	error.printError();
	return -1;
    }

    return 0;  // successfully terminated
}

For details on compiling an HDF5 application: Compiling HDF5 Applications

Remarks

  • The H5P_SET_CHUNK call modifies a Dataset Creation Property List instance to store a chunked layout dataset and sets the size of the chunks used.

  • The H5P_SET_DEFLATE call modifies the Dataset Creation Property List instance to use ZLIB or DEFLATE compression. The H5Pset_szip call modifies it to use SZIP compression. There are different compression parameters required for each compression method.

  • SZIP Limitations:

    • SZIP compression can only be used with atomic datatypes that are integer, float, or char. It cannot be applied to compound, array, variable-length, enumerations, or other user-defined datatypes. The call to H5D_CREATE will fail if attempting to create an SZIP compressed dataset with a non-allowed datatype. The conflict can only be detected when the property list is used.

    • There are restrictions for use of SZIP by commercial users. See the documents at SZIP Compression in HDF5 for further information on SZIP, including the SZIP copyright notice.

--- Last Modified: December 18, 2017 | 02:23 PM