diff --git a/dpctl-capi/CMakeLists.txt b/dpctl-capi/CMakeLists.txt
index 77891d6ee7..60156f721a 100644
--- a/dpctl-capi/CMakeLists.txt
+++ b/dpctl-capi/CMakeLists.txt
@@ -86,6 +86,7 @@ elseif(UNIX)
         "-Wuninitialized "
         "-Wmissing-declarations "
         "-fdiagnostics-color=auto "
+        "-Wno-deprecated-declarations "
     )
     string(CONCAT SDL_FLAGS
         "-fstack-protector "
diff --git a/dpctl-capi/dbg_build.sh b/dpctl-capi/dbg_build.sh
index 06055fad84..958a4f8ee3 100755
--- a/dpctl-capi/dbg_build.sh
+++ b/dpctl-capi/dbg_build.sh
@@ -13,7 +13,7 @@ cmake                                                       \
     -DCMAKE_CXX_COMPILER=dpcpp                              \
     -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}                \
     -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX}                   \
-    -DDPCTL_ENABLE_LO_PROGRAM_CREATION=${USE_LO_HEADERS}    \
+    -DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON                   \
     -DDPCTL_BUILD_CAPI_TESTS=ON                             \
     -DDPCTL_GENERATE_COVERAGE=ON                            \
     ..
diff --git a/dpctl-capi/tests/CMakeLists.txt b/dpctl-capi/tests/CMakeLists.txt
index 9ac78a2c19..1000634794 100644
--- a/dpctl-capi/tests/CMakeLists.txt
+++ b/dpctl-capi/tests/CMakeLists.txt
@@ -35,11 +35,12 @@ if(DPCTL_GENERATE_COVERAGE)
     list(REMOVE_ITEM
         dpctl_sources
         "${CMAKE_CURRENT_SOURCE_DIR}/../source/dpctl_vector_templ.cpp"
+        "${CMAKE_CURRENT_SOURCE_DIR}/../source/dpcpp_kernels.cpp"
     )
 
     # Add profiling flags
     set(CMAKE_CXX_FLAGS
-        "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping"
+        "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping -DDPCTL_COVERAGE"
     )
 
     # Add all dpctl sources into a single executable so that we can run coverage
@@ -89,6 +90,10 @@ if(DPCTL_GENERATE_COVERAGE)
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
 else()
+    add_library(dpcpp_kernels
+        STATIC
+        ${CMAKE_CURRENT_SOURCE_DIR}/dpcpp_kernels.cpp
+    )
     file(GLOB_RECURSE sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
     add_executable(dpctl_c_api_tests EXCLUDE_FROM_ALL ${sources})
     target_link_libraries(dpctl_c_api_tests
@@ -96,6 +101,7 @@ else()
         GTest::GTest
         DPCTLSyclInterface
         ${LEVEL_ZERO_LIBRARY}
+        dpcpp_kernels
     )
 endif()
 
diff --git a/dpctl-capi/tests/dpcpp_kernels.cpp b/dpctl-capi/tests/dpcpp_kernels.cpp
new file mode 100644
index 0000000000..897aa86de4
--- /dev/null
+++ b/dpctl-capi/tests/dpcpp_kernels.cpp
@@ -0,0 +1,89 @@
+#include "dpcpp_kernels.hpp"
+#include <CL/sycl.hpp>
+#include <cstddef>
+
+template sycl::kernel
+dpcpp_kernels::get_fill_kernel<int>(sycl::queue &, size_t, int *, int);
+
+template sycl::kernel
+dpcpp_kernels::get_fill_kernel<unsigned int>(sycl::queue &,
+                                             size_t,
+                                             unsigned int *,
+                                             unsigned int);
+
+template sycl::kernel
+dpcpp_kernels::get_fill_kernel<double>(sycl::queue &, size_t, double *, double);
+
+template sycl::kernel
+dpcpp_kernels::get_fill_kernel<float>(sycl::queue &, size_t, float *, float);
+
+template sycl::kernel
+dpcpp_kernels::get_range_kernel<int>(sycl::queue &, size_t, int *);
+
+template sycl::kernel
+dpcpp_kernels::get_range_kernel<unsigned int>(sycl::queue &,
+                                              size_t,
+                                              unsigned int *);
+
+template sycl::kernel
+dpcpp_kernels::get_range_kernel<float>(sycl::queue &, size_t, float *);
+
+template sycl::kernel
+dpcpp_kernels::get_range_kernel<double>(sycl::queue &, size_t, double *);
+
+template sycl::kernel dpcpp_kernels::get_mad_kernel<int, int>(sycl::queue &,
+                                                              size_t,
+                                                              int *,
+                                                              int *,
+                                                              int *,
+                                                              int);
+
+template sycl::kernel
+dpcpp_kernels::get_mad_kernel<unsigned int, unsigned int>(sycl::queue &,
+                                                          size_t,
+                                                          unsigned int *,
+                                                          unsigned int *,
+                                                          unsigned int *,
+                                                          unsigned int);
+
+template sycl::kernel dpcpp_kernels::get_local_sort_kernel<int>(sycl::queue &,
+                                                                size_t,
+                                                                size_t,
+                                                                int *,
+                                                                size_t);
+
+template sycl::kernel
+dpcpp_kernels::get_local_count_exceedance_kernel<int>(sycl::queue &,
+                                                      size_t,
+                                                      size_t,
+                                                      int *,
+                                                      size_t,
+                                                      int,
+                                                      int *);
+
+template sycl::kernel
+dpcpp_kernels::get_local_count_exceedance_kernel<unsigned int>(sycl::queue &,
+                                                               size_t,
+                                                               size_t,
+                                                               unsigned int *,
+                                                               size_t,
+                                                               unsigned int,
+                                                               int *);
+
+template sycl::kernel
+dpcpp_kernels::get_local_count_exceedance_kernel<float>(sycl::queue &,
+                                                        size_t,
+                                                        size_t,
+                                                        float *,
+                                                        size_t,
+                                                        float,
+                                                        int *);
+
+template sycl::kernel
+dpcpp_kernels::get_local_count_exceedance_kernel<double>(sycl::queue &,
+                                                         size_t,
+                                                         size_t,
+                                                         double *,
+                                                         size_t,
+                                                         double,
+                                                         int *);
diff --git a/dpctl-capi/tests/dpcpp_kernels.hpp b/dpctl-capi/tests/dpcpp_kernels.hpp
new file mode 100644
index 0000000000..099ea1a2da
--- /dev/null
+++ b/dpctl-capi/tests/dpcpp_kernels.hpp
@@ -0,0 +1,299 @@
+#pragma once
+#include <CL/sycl.hpp>
+
+namespace dpcpp_kernels
+{
+
+namespace
+{
+template <typename T> class populate_a;
+
+template <typename T> class populate_b;
+
+template <typename T, typename scT> class mad_kern;
+
+template <typename name, class kernelFunc>
+auto make_cgh_function(int n, kernelFunc func)
+{
+    auto Kernel = [&](sycl::handler &cgh) {
+        cgh.parallel_for<name>(sycl::range<1>(n), func);
+    };
+    return Kernel;
+};
+
+template <typename Ty, typename scT> struct MadFunc
+{
+    const Ty *in1, *in2;
+    Ty *out;
+    scT val;
+    MadFunc(const Ty *a, const Ty *b, Ty *c, scT d)
+        : in1(a), in2(b), out(c), val(d)
+    {
+    }
+    void operator()(sycl::id<1> myId) const
+    {
+        auto gid = myId[0];
+        out[gid] = in1[gid] + val * in2[gid];
+        return;
+    }
+};
+
+template <typename T> struct FillFunc
+{
+    T *out;
+    T val;
+    FillFunc(T *a, T val) : out(a), val(val) {}
+    void operator()(sycl::id<1> myId) const
+    {
+        auto gid = myId[0];
+        out[gid] = val;
+        return;
+    };
+};
+
+template <typename T> struct RangeFunc
+{
+    T *out;
+    RangeFunc(T *b) : out(b) {}
+    void operator()(sycl::id<1> myId) const
+    {
+        auto gid = myId[0];
+        out[gid] = T(gid);
+        return;
+    };
+};
+
+} // namespace
+
+template <typename T>
+sycl::kernel get_fill_kernel(sycl::queue &q, size_t n, T *out, T fill_val)
+{
+    // out[i] = fill_val
+    sycl::program program(q.get_context());
+
+    [[maybe_unused]] auto cgh_fn =
+        make_cgh_function<class populate_a<T>>(n, FillFunc<T>(out, fill_val));
+
+    program.build_with_kernel_type<populate_a<T>>();
+    return program.get_kernel<populate_a<T>>();
+};
+
+template <typename T>
+sycl::kernel get_range_kernel(sycl::queue &q, size_t n, T *b)
+{
+    // b[i] = i
+    sycl::program program(q.get_context());
+
+    [[maybe_unused]] auto cgh_fn =
+        make_cgh_function<class populate_b<T>>(n, RangeFunc<T>(b));
+
+    program.build_with_kernel_type<populate_b<T>>();
+    return program.get_kernel<populate_b<T>>();
+};
+
+template <typename T, typename scT>
+sycl::kernel
+get_mad_kernel(sycl::queue &q, size_t n, T *in1, T *in2, T *out, scT val)
+{
+    // c[i] = a[i] + b[i] * val
+    sycl::program program(q.get_context());
+
+    [[maybe_unused]] auto cgh_fn = make_cgh_function<class mad_kern<T, scT>>(
+        n, MadFunc<T, scT>(in1, in2, out, val));
+
+    program.build_with_kernel_type<mad_kern<T, scT>>();
+    return program.get_kernel<mad_kern<T, scT>>();
+};
+
+template <typename name,
+          typename localAccessorT,
+          class KernelFuncArgs,
+          class KernelFunctor>
+auto make_cgh_nd_function_with_local_memory(const sycl::nd_range<1> &nd_range,
+                                            size_t slm_size,
+                                            KernelFuncArgs kern_params)
+{
+    auto Kernel = [&](sycl::handler &cgh) {
+        localAccessorT lm(slm_size, cgh);
+        cgh.parallel_for<name>(nd_range, KernelFunctor(kern_params, lm));
+    };
+    return Kernel;
+};
+
+template <typename name, class KernelFunctor>
+auto make_cgh_nd_function(const sycl::nd_range<1> &nd_range, KernelFunctor kern)
+{
+    auto Kernel = [&](sycl::handler &cgh) {
+        cgh.parallel_for<name>(nd_range, kern);
+    };
+    return Kernel;
+};
+
+template <typename T> struct LocalSortArgs
+{
+    T *arr;
+    size_t global_array_size;
+    size_t wg_chunk_size;
+    LocalSortArgs(T *arr, size_t arr_len, size_t wg_len)
+        : arr(arr), global_array_size(arr_len), wg_chunk_size(wg_len)
+    {
+    }
+    ~LocalSortArgs() {}
+
+    T *get_array_pointer() const
+    {
+        return arr;
+    }
+    size_t get_array_size() const
+    {
+        return global_array_size;
+    }
+    size_t get_chunk_size() const
+    {
+        return wg_chunk_size;
+    }
+};
+
+template <typename T, typename localAccessorT> struct LocalSortFunc
+{
+    /*
+
+     */
+    T *arr;
+    size_t global_array_size;
+    size_t wg_chunk_size;
+    localAccessorT lm;
+    LocalSortFunc(T *arr, size_t arr_len, size_t wg_len, localAccessorT lm)
+        : arr(arr), global_array_size(arr_len), wg_chunk_size(wg_len), lm(lm)
+    {
+    }
+    template <class paramsT>
+    LocalSortFunc(paramsT params, localAccessorT lm)
+        : arr(params.get_array_pointer()),
+          global_array_size(params.get_array_size()),
+          wg_chunk_size(params.get_chunk_size()), lm(lm)
+    {
+    }
+    ~LocalSortFunc() {}
+    void operator()(sycl::nd_item<1> item) const
+    {
+        /* Use odd-even merge sort to sort lws chunk of array */
+        size_t group_id = item.get_group_linear_id();
+        size_t chunk_size =
+            sycl::min((group_id + 1) * wg_chunk_size, global_array_size) -
+            group_id * wg_chunk_size;
+
+        // compute the greatest power of 2 less than chunk_size
+        size_t sp2 = 1;
+        while (sp2 < chunk_size) {
+            sp2 <<= 1;
+        }
+        sp2 >>= 1;
+
+        size_t gid = item.get_global_linear_id();
+        size_t lid = item.get_local_linear_id();
+
+        if (gid < global_array_size) {
+            lm[lid] = arr[gid];
+        }
+        item.barrier(sycl::access::fence_space::local_space);
+
+        for (size_t p = sp2; p > 0; p >>= 1) {
+            size_t q = sp2;
+            size_t r = 0;
+            for (size_t d = p; d > 0; d = q - p, q >>= 1, r = p) {
+                if ((lid < chunk_size - d) && (lid & p) == r) {
+                    size_t i = lid;
+                    size_t j = i + d;
+                    T v1 = lm[i];
+                    T v2 = lm[j];
+                    if (v1 > v2) {
+                        lm[i] = v2;
+                        lm[j] = v1;
+                    }
+                }
+                item.barrier(sycl::access::fence_space::local_space);
+            }
+        }
+        if (gid < global_array_size) {
+            arr[gid] = lm[lid];
+        }
+    };
+};
+
+template <typename T> class local_sort_kern;
+
+template <typename T>
+sycl::kernel get_local_sort_kernel(sycl::queue &q,
+                                   size_t gws,
+                                   size_t lws,
+                                   T *arr,
+                                   size_t arr_len)
+{
+    sycl::program program(q.get_context());
+
+    using local_accessor_t =
+        sycl::accessor<T, 1, sycl::access::mode::read_write,
+                       sycl::access::target::local>;
+
+    [[maybe_unused]] auto cgh_fn = make_cgh_nd_function_with_local_memory<
+        local_sort_kern<T>, local_accessor_t, LocalSortArgs<T>,
+        LocalSortFunc<T, local_accessor_t>>(
+        sycl::nd_range<1>(gws, lws), lws, LocalSortArgs<T>(arr, arr_len, lws));
+
+    program.build_with_kernel_type<local_sort_kern<T>>();
+    return program.get_kernel<local_sort_kern<T>>();
+};
+
+template <typename T> struct LocalCountExceedanceFunc
+{
+    T *arr;
+    size_t arr_len;
+    T threshold_val;
+    int *count_arr;
+    LocalCountExceedanceFunc(T *arr,
+                             size_t arr_len,
+                             T threshold_val,
+                             int *count_arr)
+        : arr(arr), arr_len(arr_len), threshold_val(threshold_val),
+          count_arr(count_arr)
+    {
+    }
+
+    void operator()(sycl::nd_item<1> item) const
+    {
+        /* count number of array elements in group chunk that
+           exceeds the threshold value */
+        size_t gid = item.get_global_linear_id();
+        int partial_sum = sycl::ONEAPI::reduce(
+            item.get_group(),
+            (gid < arr_len) ? int(arr[gid] > threshold_val) : int(0),
+            std::plus<int>());
+        count_arr[item.get_group_linear_id()] = partial_sum;
+    }
+};
+
+template <typename T> class local_exceedance_kern;
+
+template <typename T>
+sycl::kernel get_local_count_exceedance_kernel(sycl::queue &q,
+                                               size_t gws,
+                                               size_t lws,
+                                               T *arr,
+                                               size_t arr_len,
+                                               T threshold_val,
+                                               int *counts)
+{
+    sycl::program program(q.get_context());
+
+    [[maybe_unused]] auto cgh_fn =
+        make_cgh_nd_function<local_exceedance_kern<T>,
+                             LocalCountExceedanceFunc<T>>(
+            sycl::nd_range<1>(gws, lws),
+            LocalCountExceedanceFunc<T>(arr, arr_len, threshold_val, counts));
+
+    program.build_with_kernel_type<local_exceedance_kern<T>>();
+    return program.get_kernel<local_exceedance_kern<T>>();
+};
+
+} // namespace dpcpp_kernels
diff --git a/dpctl-capi/tests/test_sycl_queue_submit.cpp b/dpctl-capi/tests/test_sycl_queue_submit.cpp
index c94089aeef..ca9fb3f233 100644
--- a/dpctl-capi/tests/test_sycl_queue_submit.cpp
+++ b/dpctl-capi/tests/test_sycl_queue_submit.cpp
@@ -24,6 +24,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "Support/CBindingWrapping.h"
+#ifndef DPCTL_COVERAGE
+#include "dpcpp_kernels.hpp"
+#endif
 #include "dpctl_sycl_context_interface.h"
 #include "dpctl_sycl_device_interface.h"
 #include "dpctl_sycl_device_selector_interface.h"
@@ -123,3 +126,234 @@ TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy)
     DPCTLDevice_Delete(DRef);
     DPCTLDeviceSelector_Delete(DSRef);
 }
+
+#ifndef DPCTL_COVERAGE
+namespace
+{
+
+template <typename T,
+          DPCTLKernelArgType katT,
+          typename scT,
+          DPCTLKernelArgType katscT>
+bool common_submit_range_fn(sycl::queue &q, size_t n, scT val)
+{
+    T *a = sycl::malloc_device<T>(n, q);
+    T *b = sycl::malloc_device<T>(n, q);
+    T *c = sycl::malloc_device<T>(n, q);
+    T fill_val = 1;
+    size_t Range[] = {n};
+
+    auto popA_kernel = dpcpp_kernels::get_fill_kernel<T>(q, n, a, fill_val);
+    auto popB_kernel = dpcpp_kernels::get_range_kernel<T>(q, n, b);
+    auto mad_kernel = dpcpp_kernels::get_mad_kernel<T, scT>(q, n, a, b, c, val);
+
+    DPCTLSyclKernelRef popAKernRef =
+        reinterpret_cast<DPCTLSyclKernelRef>(&popA_kernel);
+    DPCTLSyclKernelRef popBKernRef =
+        reinterpret_cast<DPCTLSyclKernelRef>(&popB_kernel);
+    DPCTLSyclKernelRef madKernRef =
+        reinterpret_cast<DPCTLSyclKernelRef>(&mad_kernel);
+
+    DPCTLSyclQueueRef QRef = reinterpret_cast<DPCTLSyclQueueRef>(&q);
+    void *popAArgs[] = {reinterpret_cast<void *>(a),
+                        reinterpret_cast<void *>(&fill_val)};
+    DPCTLKernelArgType popAKernelArgTypes[] = {DPCTL_VOID_PTR, katT};
+
+    DPCTLSyclEventRef popAERef =
+        DPCTLQueue_SubmitRange(popAKernRef, QRef, popAArgs, popAKernelArgTypes,
+                               2, Range, 1, nullptr, 0);
+
+    void *popBArgs[] = {reinterpret_cast<void *>(b)};
+    DPCTLKernelArgType popBKernelArgTypes[] = {DPCTL_VOID_PTR};
+
+    DPCTLSyclEventRef popBERef =
+        DPCTLQueue_SubmitRange(popBKernRef, QRef, popBArgs, popBKernelArgTypes,
+                               1, Range, 1, nullptr, 0);
+
+    void *madArgs[] = {reinterpret_cast<void *>(a), reinterpret_cast<void *>(b),
+                       reinterpret_cast<void *>(c),
+                       reinterpret_cast<void *>(&val)};
+    DPCTLKernelArgType madKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR,
+                                              DPCTL_VOID_PTR, katscT};
+
+    DPCTLSyclEventRef deps[2] = {popAERef, popBERef};
+    DPCTLSyclEventRef madRef = DPCTLQueue_SubmitRange(
+        madKernRef, QRef, madArgs, madKernelArgTypes, 4, Range, 1, deps, 2);
+
+    DPCTLQueue_Wait(QRef);
+    DPCTLEvent_Delete(madRef);
+    DPCTLEvent_Delete(popBERef);
+    DPCTLEvent_Delete(popAERef);
+
+    bool worked = true;
+    T *host_data = new T[n];
+    q.memcpy(host_data, c, n * sizeof(T)).wait();
+    for (size_t i = 0; i < n; ++i) {
+        worked = worked && (host_data[i] == T(fill_val) + T(i) * T(val));
+    }
+    delete[] host_data;
+
+    sycl::free(c, q);
+    sycl::free(b, q);
+    sycl::free(a, q);
+
+    return worked;
+};
+
+template <typename T, DPCTLKernelArgType katT>
+bool common_submit_ndrange_fn(sycl::queue &q, size_t n)
+{
+    size_t lws = 64;
+    size_t n_groups = (n + lws - 1) / lws;
+    size_t gws = n_groups * lws;
+
+    T *a = sycl::malloc_device<T>(n, q);
+    int *counts = sycl::malloc_device<int>(n_groups, q);
+    size_t Range[] = {n};
+    size_t gRange[] = {gws};
+    size_t lRange[] = {lws};
+
+    auto popA_kernel = dpcpp_kernels::get_range_kernel<T>(q, n, a);
+    T threshold_val = T(n / 2);
+
+    auto count_kernel = dpcpp_kernels::get_local_count_exceedance_kernel<T>(
+        q, gws, lws, a, n, threshold_val, counts);
+
+    DPCTLSyclKernelRef countKernRef =
+        reinterpret_cast<DPCTLSyclKernelRef>(&count_kernel);
+    DPCTLSyclKernelRef popAKernRef =
+        reinterpret_cast<DPCTLSyclKernelRef>(&popA_kernel);
+
+    DPCTLSyclQueueRef QRef = reinterpret_cast<DPCTLSyclQueueRef>(&q);
+    void *popAArgs[] = {reinterpret_cast<void *>(a)};
+    DPCTLKernelArgType popAKernelArgTypes[] = {DPCTL_VOID_PTR};
+
+    DPCTLSyclEventRef popAERef =
+        DPCTLQueue_SubmitRange(popAKernRef, QRef, popAArgs, popAKernelArgTypes,
+                               1, Range, 1, nullptr, 0);
+
+    void *countArgs[] = {reinterpret_cast<void *>(a),
+                         reinterpret_cast<void *>(&n),
+                         reinterpret_cast<void *>(&threshold_val),
+                         reinterpret_cast<void *>(counts)};
+    DPCTLKernelArgType countKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_SIZE_T,
+                                                katT, DPCTL_VOID_PTR};
+    DPCTLSyclEventRef deps[1] = {popAERef};
+
+    DPCTLSyclEventRef aggregERef = DPCTLQueue_SubmitNDRange(
+        countKernRef, QRef, countArgs, countKernelArgTypes, 4, gRange, lRange,
+        1, deps, 1);
+
+    DPCTLEvent_Wait(aggregERef);
+    DPCTLEvent_Delete(popAERef);
+    DPCTLEvent_Delete(aggregERef);
+
+    bool worked = true;
+    T *host_a = new T[n];
+    int *host_counts = new int[n_groups];
+    q.memcpy(host_a, a, n * sizeof(T));
+    q.memcpy(host_counts, counts, n_groups * sizeof(int));
+    q.wait_and_throw();
+
+    sycl::free(a, q);
+    sycl::free(counts, q);
+
+    for (size_t group_id = 0, gid = 0; group_id < n_groups; ++group_id) {
+        int count = 0;
+        for (size_t lid = 0; lid < lws; ++lid, ++gid) {
+            if (gid < n) {
+                count += int(host_a[gid] > threshold_val);
+            }
+        }
+        worked = worked && (count == host_counts[group_id]);
+    }
+
+    delete[] host_counts;
+    delete[] host_a;
+
+    return worked;
+}
+
+} // namespace
+
+struct TestQueueSubmitRange : public ::testing::Test
+{
+    sycl::queue q;
+    size_t n_elems = 512;
+
+    TestQueueSubmitRange() : q(sycl::default_selector{}) {}
+    ~TestQueueSubmitRange() {}
+};
+
+TEST_F(TestQueueSubmitRange, ChkSubmitRangeInt)
+{
+    bool worked = false;
+    worked = common_submit_range_fn<int, DPCTL_INT, int, DPCTL_INT>(q, n_elems,
+                                                                    int(-1));
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitRange, ChkSubmitRangeUnsignedInt)
+{
+    bool worked = false;
+    worked =
+        common_submit_range_fn<unsigned int, DPCTL_UNSIGNED_INT, unsigned int,
+                               DPCTL_UNSIGNED_INT>(q, n_elems, int(2));
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitRange, ChkSubmitRangeFloat)
+{
+    bool worked = false;
+    worked = common_submit_range_fn<float, DPCTL_FLOAT, float, DPCTL_FLOAT>(
+        q, n_elems, float(0.5));
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitRange, ChkSubmitRangeDouble)
+{
+    bool worked = false;
+    worked = common_submit_range_fn<double, DPCTL_DOUBLE, double, DPCTL_DOUBLE>(
+        q, n_elems, double(-0.5));
+    EXPECT_TRUE(worked);
+}
+
+struct TestQueueSubmitNDRange : public ::testing::Test
+{
+    sycl::queue q;
+    size_t n_elems = 512;
+
+    TestQueueSubmitNDRange() : q(sycl::default_selector{}) {}
+    ~TestQueueSubmitNDRange() {}
+};
+
+TEST_F(TestQueueSubmitNDRange, ChkSubmitNDRangeInt)
+{
+    bool worked = false;
+    worked = common_submit_ndrange_fn<int, DPCTL_INT>(q, n_elems);
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitNDRange, ChkSubmitNDRangeUnsignedInt)
+{
+    bool worked = false;
+    worked =
+        common_submit_ndrange_fn<unsigned int, DPCTL_UNSIGNED_INT>(q, n_elems);
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitNDRange, ChkSubmitNDRangeFloat)
+{
+    bool worked = false;
+    worked = common_submit_ndrange_fn<float, DPCTL_FLOAT>(q, n_elems);
+    EXPECT_TRUE(worked);
+}
+
+TEST_F(TestQueueSubmitNDRange, ChkSubmitNDRangeDouble)
+{
+    bool worked = false;
+    worked = common_submit_ndrange_fn<double, DPCTL_DOUBLE>(q, n_elems);
+    EXPECT_TRUE(worked);
+}
+
+#endif