diff --git a/ompi/mca/fbtl/directio/Makefile.am b/ompi/mca/fbtl/directio/Makefile.am new file mode 100644 index 00000000000..ee7bc9e3b7e --- /dev/null +++ b/ompi/mca/fbtl/directio/Makefile.am @@ -0,0 +1,50 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fbtl_directio_DSO +component_noinst = +component_install = mca_fbtl_directio.la +else +component_noinst = libmca_fbtl_directio.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fbtl_directio_la_SOURCES = $(sources) +mca_fbtl_directio_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fbtl_directio_la_SOURCES = $(sources) +libmca_fbtl_directio_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + fbtl_directio.h \ + fbtl_directio.c \ + fbtl_directio_component.c \ + fbtl_directio_preadv.c \ + fbtl_directio_ipreadv.c \ + fbtl_directio_pwritev.c \ + fbtl_directio_ipwritev.c diff --git a/ompi/mca/fbtl/directio/fbtl_directio.c b/ompi/mca/fbtl/directio/fbtl_directio.c new file mode 100644 index 00000000000..6732c856aa1 --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" + +#include +#include + +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/directio/fbtl_directio.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fbtl_base_module_1_0_0_t directio = { + mca_fbtl_directio_module_init, /* initalise after being selected */ + mca_fbtl_directio_module_finalize, /* close a module on a communicator */ + mca_fbtl_directio_preadv, /* blocking read */ + NULL, /* non-blocking read */ + mca_fbtl_directio_pwritev, /* blocking write */ + NULL, /* non-blocking write */ + NULL, /* module specific progress */ + NULL /* free module specific data items on the request */ +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fbtl_directio_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) { + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_directio_component_file_query (mca_io_ompio_file_t *fh, int *priority) { + int fd_direct; + *priority = mca_fbtl_directio_priority; + + memcpy (&fd_direct, &fh->f_fs_ptr, sizeof(int) ); + if ( 0 < fd_direct && + PVFS2 != fh->f_fstype) { + *priority = 100; + } + + return &directio; +} + +int mca_fbtl_directio_component_file_unquery (mca_io_ompio_file_t *file) { + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fbtl_directio_module_init (mca_io_ompio_file_t *file) { + + return OMPI_SUCCESS; +} + + +int mca_fbtl_directio_module_finalize (mca_io_ompio_file_t *file) { + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/fbtl/directio/fbtl_directio.h b/ompi/mca/fbtl/directio/fbtl_directio.h new file mode 100644 index 00000000000..3531f1425d7 --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FBTL_DIRECTIO_H +#define MCA_FBTL_DIRECTIO_H + +#include "ompi_config.h" +#include "ompi/mca/mca.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/ompio/io_ompio_request.h" + +extern int mca_fbtl_directio_priority; +#define FBTL_DIRECTIO_BLOCK_SIZE 4096 +#define FBTL_DIRECTIO_MEMALIGN_SIZE 4096 + +BEGIN_C_DECLS + +int mca_fbtl_directio_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_directio_component_file_query (mca_io_ompio_file_t *file, int *priority); +int mca_fbtl_directio_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fbtl_directio_module_init (mca_io_ompio_file_t *file); +int mca_fbtl_directio_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_directio_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +ssize_t mca_fbtl_directio_preadv (mca_io_ompio_file_t *file ); +ssize_t mca_fbtl_directio_pwritev (mca_io_ompio_file_t *file ); +ssize_t mca_fbtl_directio_ipreadv (mca_io_ompio_file_t *file, + ompi_request_t *request); +ssize_t mca_fbtl_directio_ipwritev (mca_io_ompio_file_t *file, + ompi_request_t *request); + +bool mca_fbtl_directio_progress ( mca_ompio_request_t *req); +void mca_fbtl_directio_request_free ( mca_ompio_request_t *req); + +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FBTL_DIRECTIO_H */ diff --git a/ompi/mca/fbtl/directio/fbtl_directio_component.c b/ompi/mca/fbtl/directio/fbtl_directio_component.c new file mode 100644 index 00000000000..9ef794cdf48 --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio_component.c @@ -0,0 +1,65 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fbtl_directio.h" +#include "mpi.h" + +/* + * Public string showing the fbtl directio component version number + */ +const char *mca_fbtl_directio_component_version_string = + "OMPI/MPI directio FBTL MCA component version " OMPI_VERSION; + +int mca_fbtl_directio_priority = 10; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fbtl_base_component_2_0_0_t mca_fbtl_directio_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .fbtlm_version = { + MCA_FBTL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "directio", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + }, + .fbtlm_data = { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + .fbtlm_init_query = mca_fbtl_directio_component_init_query, /* get thread level */ + .fbtlm_file_query = mca_fbtl_directio_component_file_query, /* get priority and actions */ + .fbtlm_file_unquery = mca_fbtl_directio_component_file_unquery, /* undo what was done by previous function */ +}; diff --git a/ompi/mca/fbtl/directio/fbtl_directio_ipreadv.c b/ompi/mca/fbtl/directio/fbtl_directio_ipreadv.c new file mode 100644 index 00000000000..1bd19d07683 --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio_ipreadv.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_directio.h" + +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fbtl/fbtl.h" + +ssize_t mca_fbtl_directio_ipreadv (mca_io_ompio_file_t *fh, + ompi_request_t *request) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/directio/fbtl_directio_ipwritev.c b/ompi/mca/fbtl/directio/fbtl_directio_ipwritev.c new file mode 100644 index 00000000000..58b44f6d12d --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio_ipwritev.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fbtl_directio.h" + +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fbtl/fbtl.h" + +ssize_t mca_fbtl_directio_ipwritev (mca_io_ompio_file_t *fh, + ompi_request_t *request) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/directio/fbtl_directio_preadv.c b/ompi/mca/fbtl/directio/fbtl_directio_preadv.c new file mode 100644 index 00000000000..2d26d1af8cd --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio_preadv.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fbtl_directio.h" + +#include "mpi.h" +#include +#include +#include "ompi/constants.h" +#include "ompi/mca/fbtl/fbtl.h" + +ssize_t mca_fbtl_directio_preadv (mca_io_ompio_file_t *fh ) +{ + int i; + ssize_t bytes_read=0, total_bytes_read=0; + size_t nbytes, rem, diff; + char *newbuf=NULL; + int fs_ptr; + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + + memcpy ( &fs_ptr, &fh->f_fs_ptr, sizeof(int)); + + for (i=0 ; if_num_of_io_entries ; i++) { + /* + ** To use direct I/O : + ** 1. make sure fh->f_io_array[i].offset starts at a block boundary + ** 2. make sure fh->f_io_array[i].length is a multiple of the block size + ** 3. make sure fh->f_io_array[i].memory_address is aligned to a page size + */ + + /* + ** This is step 1. Make sure offset is aligned + */ + if ( (OMPI_MPI_OFFSET_TYPE ) fh->f_io_array[i].offset % FBTL_DIRECTIO_BLOCK_SIZE) { + diff = FBTL_DIRECTIO_BLOCK_SIZE - ( (OMPI_MPI_OFFSET_TYPE) fh->f_io_array[i].offset % FBTL_DIRECTIO_BLOCK_SIZE); + if ( fh->f_io_array[i].length < diff) { + diff = fh->f_io_array[i].length; + } + /* + ** Use regular, bufferd I/O to write a partial block. This is represented in the + ** fh->f_fd handle. + */ + total_bytes_read = pread(fh->fd, (void *)fh->f_io_array[i].memory_address, + diff, + (off_t) fh->f_io_array[i].offset); + if ( 0 > total_bytes_read ){ + opal_output (1, "fbtl_directio_preadv: could not read\n" ); + return OMPI_ERROR; + } + fh->f_io_array[i].memory_address = ((char *) fh->f_io_array[i].memory_address) + diff; + fh->f_io_array[i].offset = (IOVBASE_TYPE *)(intptr_t)((off_t)fh->f_io_array[i].offset + diff); + fh->f_io_array[i].length -= diff; + } + + /* + ** This is step 2. Make sure length is a multiple of block size + */ + rem = fh->f_io_array[i].length % FBTL_DIRECTIO_BLOCK_SIZE; + nbytes = fh->f_io_array[i].length -rem; + + if ( 0 < nbytes ) { + if ( (((long)fh->f_io_array[i].memory_address) % FBTL_DIRECTIO_MEMALIGN_SIZE ) ) { + /* + ** This is step 3. Make sure the buffer that we use starts at a page boundary + */ + posix_memalign( (void **)&newbuf, FBTL_DIRECTIO_MEMALIGN_SIZE, nbytes); + if (NULL == newbuf) { + opal_output(1, "fbtl_directio:preadv: memalign failed "); + } + } + else { + newbuf = fh->f_io_array[i].memory_address; + } + + /* + ** Write a multiple of block sizes using direct I/O + ** This is achieved using the fh->f_fdirect handle + */ + bytes_read = pread( fs_ptr, + newbuf, + nbytes, + (off_t )fh->f_io_array[i].offset ); + + if ( (((long)fh->f_io_array[i].memory_address) % FBTL_DIRECTIO_MEMALIGN_SIZE ) ) { + memcpy(fh->f_io_array[i].memory_address, newbuf, nbytes); + free ( newbuf ); + } + if (bytes_read < 0) { + opal_output (1, "fbtl_directio_preadv: could not read on directio descriptor\n" ); + return OMPI_ERROR; + } + } + /* + ** Write the remaining portion using buffered I/O + */ + if ( rem > 0 ) { + bytes_read += pread (fh->fd, + ((char *)fh->f_io_array[i].memory_address) + nbytes, + rem, + (off_t )fh->f_io_array[i].offset + nbytes ); + } + + if (bytes_read < 0) { + opal_output (1, "fbtl_directio_preadv: could not read\n" ); + return OMPI_ERROR; + } + total_bytes_read += bytes_read; + } + + return total_bytes_read; +} diff --git a/ompi/mca/fbtl/directio/fbtl_directio_pwritev.c b/ompi/mca/fbtl/directio/fbtl_directio_pwritev.c new file mode 100644 index 00000000000..da7b8bd88e2 --- /dev/null +++ b/ompi/mca/fbtl/directio/fbtl_directio_pwritev.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_directio.h" + +#include "mpi.h" +#include +#include +#include +#include +#include "ompi/constants.h" +#include "ompi/mca/fbtl/fbtl.h" + +ssize_t mca_fbtl_directio_pwritev(mca_io_ompio_file_t *fh ) +{ + int i; + ssize_t bytes_written=0, total_bytes_written=0; + size_t nbytes, rem, diff; + void *newbuf=NULL; + int fs_ptr; + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + + memcpy ( &fs_ptr, &fh->f_fs_ptr, sizeof(int)); + + for (i=0 ; if_num_of_io_entries ; i++) { + /* + ** To use direct I/O : + ** 1. make sure fh->f_io_array[i].offset starts at a block boundary + ** 2. make sure fh->f_io_array[i].length is a multiple of the block size + ** 3. make sure fh->f_io_array[i].memory_address is aligned to a page size + */ + + /* + ** This is step 1. Make sure offset is aligned + */ + if ( (OMPI_MPI_OFFSET_TYPE ) fh->f_io_array[i].offset % FBTL_DIRECTIO_BLOCK_SIZE) { + diff = FBTL_DIRECTIO_BLOCK_SIZE - ( (OMPI_MPI_OFFSET_TYPE) fh->f_io_array[i].offset % FBTL_DIRECTIO_BLOCK_SIZE); + if ( fh->f_io_array[i].length < diff ) { + diff = fh->f_io_array[i].length; + } + /* + ** Use regular, bufferd I/O to write a partial block. This is represented in the + ** fh->f_fd handle. + */ + total_bytes_written = pwrite(fh->fd, (void *)fh->f_io_array[i].memory_address, + diff, + (off_t) fh->f_io_array[i].offset); + if ( 0 > total_bytes_written ) { + opal_output (1, "fbtl_directio_pwritev: could not write\n" ); + return OMPI_ERROR; + } + fh->f_io_array[i].memory_address = ((char *) fh->f_io_array[i].memory_address) + diff; + fh->f_io_array[i].offset = (IOVBASE_TYPE *)(intptr_t)((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + diff); + fh->f_io_array[i].length -= diff; + } + + /* + ** This is step 2. Make sure length is a multiple of block size + */ + rem = fh->f_io_array[i].length % FBTL_DIRECTIO_BLOCK_SIZE; + nbytes = fh->f_io_array[i].length -rem; + + if ( nbytes > 0 ) { + if ( (((long)fh->f_io_array[i].memory_address) % FBTL_DIRECTIO_MEMALIGN_SIZE ) ) { + /* + ** This is step 3. Make sure the buffer that we use starts at a page boundary + */ + posix_memalign(&newbuf, FBTL_DIRECTIO_MEMALIGN_SIZE, nbytes); + if (NULL == newbuf) { + opal_output(1, "fbtl_directio_pwritev: memalign failed "); + } + else { + memcpy(newbuf, fh->f_io_array[i].memory_address,nbytes); + } + } + else { + newbuf = fh->f_io_array[i].memory_address; + } + + /* + ** Write a multiple of block sizes using direct I/O + ** This is achieved using the fh->f_fdirect handle + */ + bytes_written = pwrite( fs_ptr, + newbuf, + nbytes, + (off_t )fh->f_io_array[i].offset ); + + if ( (((long)fh->f_io_array[i].memory_address) % FBTL_DIRECTIO_MEMALIGN_SIZE ) ) { + free ( newbuf ); + } + if (bytes_written < 0) { + opal_output (1, "fbtl_directio_pwritev: could not write on direct I/O descriptor \n" ); + return OMPI_ERROR; + } + } + /* + ** Write the remaining portion using buffered I/O + */ + if ( rem > 0 ) { + bytes_written += pwrite (fh->fd, + ((char *)fh->f_io_array[i].memory_address) + nbytes, + rem, + (off_t)fh->f_io_array[i].offset + nbytes ); + } + + if (bytes_written < 0) { + opal_output (1, "fbtl_directio_pwritev: could not write\n" ); + return OMPI_ERROR; + } + total_bytes_written += bytes_written; + } + + return total_bytes_written; +} diff --git a/ompi/mca/fbtl/directio/owner.txt b/ompi/mca/fbtl/directio/owner.txt new file mode 100644 index 00000000000..2e9726c28a4 --- /dev/null +++ b/ompi/mca/fbtl/directio/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: UH +status: active diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h index ad4844c618c..7fa6c9f76f6 100644 --- a/ompi/mca/fs/lustre/fs_lustre.h +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -30,6 +30,7 @@ extern int mca_fs_lustre_priority; extern int mca_fs_lustre_stripe_size; extern int mca_fs_lustre_stripe_width; +extern int mca_fs_lustre_use_directio; BEGIN_C_DECLS diff --git a/ompi/mca/fs/lustre/fs_lustre_component.c b/ompi/mca/fs/lustre/fs_lustre_component.c index d8392af482d..3b82d046bcb 100644 --- a/ompi/mca/fs/lustre/fs_lustre_component.c +++ b/ompi/mca/fs/lustre/fs_lustre_component.c @@ -45,6 +45,7 @@ int mca_fs_lustre_priority = 20; runtime also*/ int mca_fs_lustre_stripe_size = 0; int mca_fs_lustre_stripe_width = 0; +int mca_fs_lustre_use_directio = 0; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it @@ -94,5 +95,12 @@ lustre_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_stripe_width); + mca_fs_lustre_use_directio = 0; + (void) mca_base_component_var_register(&mca_fs_lustre_component.fsm_version, + "use_directio", "whether to use direct I/O", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_lustre_use_directio); + return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre_file_close.c b/ompi/mca/fs/lustre/fs_lustre_file_close.c index a62e371d040..3445d55327b 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_close.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_close.c @@ -40,5 +40,12 @@ mca_fs_lustre_file_close (mca_io_ompio_file_t *fh) fh->f_comm->c_coll.coll_barrier (fh->f_comm, fh->f_comm->c_coll.coll_barrier_module); close (fh->fd); + + if (0 != fh->f_fs_ptr ) { + int fs_ptr; + memcpy ( &fs_ptr, &fh->f_fs_ptr, sizeof(int)); +// fsync ( fs_ptr ); + close (fs_ptr ); + } return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index b873564bc86..81143bf8558 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -62,13 +62,14 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, struct ompi_info_t *info, mca_io_ompio_file_t *fh) { - int amode; + int amode, amode_direct; int old_mask, perm; int rc; int flag; int fs_lustre_stripe_size = -1; int fs_lustre_stripe_width = -1; char char_stripe[MPI_MAX_INFO_KEY]; + int fd_direct; struct lov_user_md *lump=NULL; @@ -93,6 +94,7 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, if (access_mode & MPI_MODE_EXCL) amode = amode | O_EXCL; + amode_direct = amode | O_DIRECT; ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { @@ -159,5 +161,14 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, // free ( lump ); // } } + + if ( mca_fs_lustre_use_directio ) { + fd_direct = open ( filename, amode_direct, perm); + if ( -1 != fd_direct ) { + memcpy ( &fh->f_fs_ptr, &fd_direct, sizeof(int)); + } + } + + return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/ufs/fs_ufs.h b/ompi/mca/fs/ufs/fs_ufs.h index daebc5d505b..6801f3effa7 100644 --- a/ompi/mca/fs/ufs/fs_ufs.h +++ b/ompi/mca/fs/ufs/fs_ufs.h @@ -28,6 +28,7 @@ #include "ompi/mca/io/ompio/io_ompio.h" extern int mca_fs_ufs_priority; +extern int mca_fs_ufs_use_directio; BEGIN_C_DECLS diff --git a/ompi/mca/fs/ufs/fs_ufs_component.c b/ompi/mca/fs/ufs/fs_ufs_component.c index d5f3c157daf..8037279510f 100644 --- a/ompi/mca/fs/ufs/fs_ufs_component.c +++ b/ompi/mca/fs/ufs/fs_ufs_component.c @@ -31,6 +31,9 @@ #include "mpi.h" int mca_fs_ufs_priority = 10; +int mca_fs_ufs_use_directio=0; + +static int ufs_register(void); /* * Public string showing the fs ufs component version number @@ -54,6 +57,7 @@ mca_fs_base_component_2_0_0_t mca_fs_ufs_component = { .mca_component_name = "ufs", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), + .mca_register_component_params = ufs_register, }, .fsm_data = { /* This component is checkpointable */ @@ -63,3 +67,21 @@ mca_fs_base_component_2_0_0_t mca_fs_ufs_component = { .fsm_file_query = mca_fs_ufs_component_file_query, /* get priority and actions */ .fsm_file_unquery = mca_fs_ufs_component_file_unquery, /* undo what was done by previous function */ }; + +static int ufs_register(void) +{ + mca_fs_ufs_priority = 10; + (void) mca_base_component_var_register (&mca_fs_ufs_component.fsm_version, + "priority", "Priority of the ufs2 fs component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_ufs_priority); + mca_fs_ufs_use_directio = 0; + (void) mca_base_component_var_register (&mca_fs_ufs_component.fsm_version, + "use_directio", "whether to use direct I/O", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &mca_fs_ufs_use_directio); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_close.c b/ompi/mca/fs/ufs/fs_ufs_file_close.c index 4eb9938c953..659761812a7 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_close.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_close.c @@ -46,5 +46,13 @@ mca_fs_ufs_file_close (mca_io_ompio_file_t *fh) free (fh->fd); fh->fd = NULL; }*/ + + if (0 != fh->f_fs_ptr ) { + int fs_ptr; + memcpy ( &fs_ptr, &fh->f_fs_ptr, sizeof(int)); +// fsync ( fs_ptr ); + close (fs_ptr ); + } + return OMPI_SUCCESS; } diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index 1f9fe5c1234..2cb33a1bddf 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -45,9 +45,10 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, struct ompi_info_t *info, mca_io_ompio_file_t *fh) { - int amode; + int amode, amode_direct; int old_mask, perm; int rank, ret; + int fd_direct; rank = ompi_comm_rank ( comm ); @@ -69,6 +70,8 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, if (access_mode & MPI_MODE_RDWR) amode = amode | O_RDWR; + amode_direct = amode | O_DIRECT; + if ( 0 == rank ) { /* MODE_CREATE and MODE_EXCL can only be set by one process */ if ( access_mode & MPI_MODE_CREATE ) @@ -91,5 +94,13 @@ mca_fs_ufs_file_open (struct ompi_communicator_t *comm, } } + + if ( mca_fs_ufs_use_directio ) { + fd_direct = open ( filename, amode_direct, perm); + if ( -1 != fd_direct ) { + memcpy ( &fh->f_fs_ptr, &fd_direct, sizeof(int)); + } + } + return OMPI_SUCCESS; } diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index b2ff58b4199..3242d692aac 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -169,6 +169,22 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, opal_output(1, "mca_fs_base_file_select() failed\n"); goto fn_fail; } + + ret = ompio_fh->f_fs->fs_file_open (comm, + filename, + amode, + info, + ompio_fh); + + + + + if ( OMPI_SUCCESS != ret ) { + ret = MPI_ERR_FILE; + goto fn_fail; + } + + if (OMPI_SUCCESS != (ret = mca_fbtl_base_file_select (ompio_fh, NULL))) { opal_output(1, "mca_fbtl_base_file_select() failed\n"); @@ -231,19 +247,6 @@ ompio_io_ompio_file_open (ompi_communicator_t *comm, } } - ret = ompio_fh->f_fs->fs_file_open (comm, - filename, - amode, - info, - ompio_fh); - - - - - if ( OMPI_SUCCESS != ret ) { - ret = MPI_ERR_FILE; - goto fn_fail; - } /* If file has been opened in the append mode, move the internal