diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index 26e05b56f4263..0153d8086a20c 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -8,7 +8,7 @@ #pragma once -#include "CL/sycl/ONEAPI/accessor_property_list.hpp" +#include #include #include #include @@ -250,78 +250,100 @@ class reducer + static constexpr memory_scope getMemoryScope() { + return Space == access::address_space::local_space + ? memory_scope::work_group + : memory_scope::device; + } + +public: /// Atomic ADD operation: *ReduVarPtr += MValue; - template + template enable_if_t::type, T>::value && - IsReduOptForFastAtomicFetch::value && - sycl::detail::IsPlus::value> + (IsReduOptForFastAtomicFetch::value || + IsReduOptForAtomic64Add::value) && + sycl::detail::IsPlus::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_add(MValue); } /// Atomic BITWISE OR operation: *ReduVarPtr |= MValue; - template + template enable_if_t::type, T>::value && IsReduOptForFastAtomicFetch::value && - sycl::detail::IsBitOR::value> + sycl::detail::IsBitOR::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_or(MValue); } /// Atomic BITWISE XOR operation: *ReduVarPtr ^= MValue; - template + template enable_if_t::type, T>::value && IsReduOptForFastAtomicFetch::value && - sycl::detail::IsBitXOR::value> + sycl::detail::IsBitXOR::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_xor(MValue); } /// Atomic BITWISE AND operation: *ReduVarPtr &= MValue; - template + template enable_if_t::type, T>::value && IsReduOptForFastAtomicFetch::value && - sycl::detail::IsBitAND::value> + sycl::detail::IsBitAND::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_and(MValue); } /// Atomic MIN operation: *ReduVarPtr = ONEAPI::minimum(*ReduVarPtr, MValue); - template + template enable_if_t::type, T>::value && IsReduOptForFastAtomicFetch::value && - sycl::detail::IsMinimum::value> + sycl::detail::IsMinimum::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_min(MValue); } /// Atomic MAX operation: *ReduVarPtr = ONEAPI::maximum(*ReduVarPtr, MValue); - template + template enable_if_t::type, T>::value && IsReduOptForFastAtomicFetch::value && - sycl::detail::IsMaximum::value> + sycl::detail::IsMaximum::value && + (Space == access::address_space::global_space || + Space == access::address_space::local_space)> atomic_combine(_T *ReduVarPtr) const { - atomic(global_ptr(ReduVarPtr)) + atomic_ref(), Space>( + *multi_ptr(ReduVarPtr)) .fetch_max(MValue); } - /// Atomic ADD operation: for floating point using atomic_ref - template - enable_if_t::type, T>::value && - IsReduOptForAtomic64Add::value> - atomic_combine(_T *ReduVarPtr) const { - - atomic_ref( - *global_ptr(ReduVarPtr)) += MValue; - } - T MValue; }; @@ -361,8 +383,6 @@ class reduction_impl : private reduction_impl_base { ONEAPI::accessor_property_list<>>; static constexpr int accessor_dim = Dims; static constexpr int buffer_dim = (Dims == 0) ? 1 : Dims; - using local_accessor_type = - accessor; static constexpr bool has_atomic_add_float64 = IsReduOptForAtomic64Add::value; @@ -612,8 +632,15 @@ class reduction_impl : private reduction_impl_base { #endif } - static local_accessor_type getReadWriteLocalAcc(size_t Size, handler &CGH) { - return local_accessor_type(Size, CGH); + /// Creates and returns a local accessor with the \p Size elements. + /// By default the local accessor elements are of the same type as the + /// elements processed by the reduction, but may it be altered by specifying + /// \p _T explicitly if need an accessor with elements of different type. + template + static accessor<_T, buffer_dim, access::mode::read_write, + access::target::local> + getReadWriteLocalAcc(size_t Size, handler &CGH) { + return {Size, CGH}; } accessor @@ -686,6 +713,16 @@ class reduction_impl : private reduction_impl_base { return createHandlerWiredReadWriteAccessor(CGH, *MOutBufPtr); } + accessor + getReadWriteAccessorToInitializedGroupsCounter(handler &CGH) { + auto CounterMem = std::make_shared(0); + CGH.addReduction(CounterMem); + auto CounterBuf = std::make_shared>(CounterMem.get(), 1); + CGH.addReduction(CounterBuf); + return {*CounterBuf, CGH}; + } + bool hasUserDiscardWriteAccessor() { return MDWAcc != nullptr; } template @@ -780,6 +817,219 @@ struct get_reduction_aux_kernel_name_t +void reductionLoop(const range &Range, ReducerT &Reducer, + const nd_item<1> &NdId, KernelFunc &F) { + size_t Start = NdId.get_global_id(0); + size_t End = Range.size(); + size_t Stride = NdId.get_global_range(0); + for (size_t I = Start; I < End; I += Stride) + F(sycl::detail::getDelinearizedId(Range, I), Reducer); +} + +template +std::enable_if_t +reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const range &Range, + const nd_range<1> &NDRange, Reduction &Redu) { + auto Out = Redu.getReadWriteAccessorToInitializedMem(CGH); + auto GroupSum = Reduction::getReadWriteLocalAcc(1, CGH); + using Name = + typename get_reduction_main_kernel_name_t::name; + CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + // Call user's functions. Reducer.MValue gets initialized there. + typename Reduction::reducer_type Reducer; + reductionLoop(Range, Reducer, NDId, KernelFunc); + + auto LID = NDId.get_local_id(0); + if (LID == 0) + GroupSum[0] = Reducer.getIdentity(); + sycl::detail::workGroupBarrier(); + Reducer.template atomic_combine( + &GroupSum[0]); + + sycl::detail::workGroupBarrier(); + if (LID == 0) { + Reducer.MValue = GroupSum[0]; + Reducer.template atomic_combine(Reduction::getOutPointer(Out)); + } + }); +} + +template +std::enable_if_t +reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const range &Range, + const nd_range<1> &NDRange, Reduction &Redu) { + size_t WGSize = NDRange.get_local_range().size(); + size_t NWorkGroups = NDRange.get_group_range().size(); + + bool IsUpdateOfUserVar = !Reduction::is_usm && !Redu.initializeToIdentity(); + auto PartialSums = Redu.getWriteAccForPartialReds(NWorkGroups, CGH); + auto Out = + (NWorkGroups == 1) ? PartialSums : Redu.getWriteAccForPartialReds(1, CGH); + auto NWorkGroupsFinished = + Redu.getReadWriteAccessorToInitializedGroupsCounter(CGH); + auto DoReducePartialSumsInLastWG = + Reduction::template getReadWriteLocalAcc(1, CGH); + + using Name = + typename get_reduction_main_kernel_name_t::name; + CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + // Call user's functions. Reducer.MValue gets initialized there. + typename Reduction::reducer_type Reducer; + reductionLoop(Range, Reducer, NDId, KernelFunc); + + typename Reduction::binary_operation BOp; + auto Group = NDId.get_group(); + Reducer.MValue = reduce_over_group(Group, Reducer.MValue, BOp); + + size_t LID = NDId.get_local_id(0); + if (LID == 0) { + if (NWorkGroups == 1 && IsUpdateOfUserVar) + Reducer.MValue = BOp(Reducer.MValue, *Reduction::getOutPointer(Out)); + // if NWorkGroups == 1, then PartialsSum and Out point to same memory. + Reduction::getOutPointer(PartialSums)[NDId.get_group_linear_id()] = + Reducer.MValue; + + auto NFinished = + atomic_ref( + NWorkGroupsFinished[0]); + DoReducePartialSumsInLastWG[0] = + ++NFinished == NWorkGroups && NWorkGroups > 1; + } + + sycl::detail::workGroupBarrier(); + if (DoReducePartialSumsInLastWG[0]) { + auto LocalSum = Reducer.getIdentity(); + for (size_t I = LID; I < NWorkGroups; I += WGSize) + LocalSum = BOp(LocalSum, PartialSums[I]); + Reducer.MValue = reduce_over_group(Group, LocalSum, BOp); + + if (LID == 0) { + if (IsUpdateOfUserVar) + Reducer.MValue = BOp(Reducer.MValue, *Reduction::getOutPointer(Out)); + Reduction::getOutPointer(Out)[0] = Reducer.MValue; + } + } + }); +} + +template +std::enable_if_t +reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const range &Range, + const nd_range<1> &NDRange, Reduction &Redu) { + size_t WGSize = NDRange.get_local_range().size(); + size_t NWorkGroups = NDRange.get_group_range().size(); + + bool IsUpdateOfUserVar = !Reduction::is_usm && !Redu.initializeToIdentity(); + auto PartialSums = Redu.getWriteAccForPartialReds(NWorkGroups, CGH); + auto Out = + (NWorkGroups == 1) ? PartialSums : Redu.getWriteAccForPartialReds(1, CGH); + auto LocalReds = Reduction::getReadWriteLocalAcc(WGSize + 1, CGH); + auto NWorkGroupsFinished = + Redu.getReadWriteAccessorToInitializedGroupsCounter(CGH); + auto DoReducePartialSumsInLastWG = + Reduction::template getReadWriteLocalAcc(1, CGH); + + auto Identity = Redu.getIdentity(); + auto BOp = Redu.getBinaryOperation(); + using Name = + typename get_reduction_main_kernel_name_t::name; + CGH.parallel_for(NDRange, [=](nd_item<1> NDId) { + // Call user's functions. Reducer.MValue gets initialized there. + typename Reduction::reducer_type Reducer(Identity, BOp); + reductionLoop(Range, Reducer, NDId, KernelFunc); + + // Copy the element to local memory to prepare it for tree-reduction. + size_t LID = NDId.get_local_linear_id(); + LocalReds[LID] = Reducer.MValue; + if (LID == 0) + LocalReds[WGSize] = Identity; + sycl::detail::workGroupBarrier(); + + // Tree-reduction: reduce the local array LocalReds[:] to LocalReds[0]. + // LocalReds[WGSize] accumulates last/odd elements when the step + // of tree-reduction loop is not even. + size_t PrevStep = WGSize; + for (size_t CurStep = PrevStep >> 1; CurStep > 0; CurStep >>= 1) { + if (LID < CurStep) + LocalReds[LID] = BOp(LocalReds[LID], LocalReds[LID + CurStep]); + else if (LID == CurStep && (PrevStep & 0x1)) + LocalReds[WGSize] = BOp(LocalReds[WGSize], LocalReds[PrevStep - 1]); + sycl::detail::workGroupBarrier(); + PrevStep = CurStep; + } + + if (LID == 0) { + auto V = BOp(LocalReds[0], LocalReds[WGSize]); + if (NWorkGroups == 1 && IsUpdateOfUserVar) + V = BOp(V, *Reduction::getOutPointer(Out)); + // if NWorkGroups == 1, then PartialsSum and Out point to same memory. + Reduction::getOutPointer(PartialSums)[NDId.get_group_linear_id()] = V; + + auto NFinished = + atomic_ref( + NWorkGroupsFinished[0]); + DoReducePartialSumsInLastWG[0] = + ++NFinished == NWorkGroups && NWorkGroups > 1; + } + + sycl::detail::workGroupBarrier(); + if (DoReducePartialSumsInLastWG[0]) { + auto LocalSum = Identity; + for (size_t I = LID; I < NWorkGroups; I += WGSize) + LocalSum = BOp(LocalSum, Reduction::getOutPointer(PartialSums)[I]); + + LocalReds[LID] = LocalSum; + if (LID == 0) + LocalReds[WGSize] = Identity; + sycl::detail::workGroupBarrier(); + + size_t PrevStep = WGSize; + for (size_t CurStep = PrevStep >> 1; CurStep > 0; CurStep >>= 1) { + if (LID < CurStep) + LocalReds[LID] = BOp(LocalReds[LID], LocalReds[LID + CurStep]); + else if (LID == CurStep && (PrevStep & 0x1)) + LocalReds[WGSize] = BOp(LocalReds[WGSize], LocalReds[PrevStep - 1]); + sycl::detail::workGroupBarrier(); + PrevStep = CurStep; + } + if (LID == 0) { + auto V = BOp(LocalReds[0], LocalReds[WGSize]); + if (IsUpdateOfUserVar) + V = BOp(V, *Reduction::getOutPointer(Out)); + Reduction::getOutPointer(Out)[0] = V; + } + } + }); +} + +template +void reduCGFunc(handler &CGH, KernelType KernelFunc, const range &Range, + size_t MaxWGSize, uint32_t NumConcurrentWorkGroups, + Reduction &Redu) { + size_t NWorkItems = Range.size(); + size_t WGSize = std::min(NWorkItems, MaxWGSize); + size_t NWorkGroups = NWorkItems / WGSize; + if (NWorkItems % WGSize) + NWorkGroups++; + size_t MaxNWorkGroups = NumConcurrentWorkGroups; + NWorkGroups = std::min(NWorkGroups, MaxNWorkGroups); + size_t NDRItems = NWorkGroups * WGSize; + nd_range<1> NDRange{range<1>{NDRItems}, range<1>{WGSize}}; + + reduCGFuncImpl(CGH, KernelFunc, Range, NDRange, Redu); +} + /// Implements a command group function that enqueues a kernel that calls /// user's lambda function KernelFunc and also does one iteration of reduction /// of elements computed in user's lambda function. diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 1fd0bfe122453..001cdeec3bdfd 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -209,6 +209,11 @@ class reduction_impl; using cl::sycl::detail::enable_if_t; using cl::sycl::detail::queue_impl; +template +void reduCGFunc(handler &CGH, KernelType KernelFunc, const range &Range, + size_t MaxWGSize, uint32_t NumConcurrentWorkGroups, + Reduction &Redu); + template enable_if_t reduCGFuncAtomic64(handler &CGH, KernelType KernelFunc, @@ -262,6 +267,9 @@ reduSaveFinalResultToUserMemHelper(std::vector &Events, std::shared_ptr Queue, bool IsHost, Reduction &Redu, RestT... Rest); +__SYCL_EXPORT uint32_t +reduGetMaxNumConcurrentWorkGroups(std::shared_ptr Queue); + __SYCL_EXPORT size_t reduGetMaxWGSize(std::shared_ptr Queue, size_t LocalMemBytesPerWorkItem); @@ -501,23 +509,6 @@ class __SYCL_EXPORT handler { } } - static id<1> getDelinearizedIndex(const range<1>, const size_t Index) { - return {Index}; - } - - static id<2> getDelinearizedIndex(const range<2> Range, const size_t Index) { - size_t x = Index % Range[1]; - size_t y = Index / Range[1]; - return {y, x}; - } - - static id<3> getDelinearizedIndex(const range<3> Range, const size_t Index) { - size_t z = Index / (Range[1] * Range[2]); - size_t y = (Index / Range[2]) % Range[1]; - size_t x = Index % Range[2]; - return {z, y, x}; - } - /// Stores lambda to the template-free object /// /// Also initializes kernel name, list of arguments and requirements using @@ -593,9 +584,9 @@ class __SYCL_EXPORT handler { IsPHSrc, IsPHDst>> (LinearizedRange, [=](id<1> Id) { size_t Index = Id[0]; - id SrcIndex = getDelinearizedIndex(Src.get_range(), Index); - id DstIndex = getDelinearizedIndex(Dst.get_range(), Index); - Dst[DstIndex] = Src[SrcIndex]; + id SrcId = detail::getDelinearizedId(Src.get_range(), Index); + id DstId = detail::getDelinearizedId(Dst.get_range(), Index); + Dst[DstId] = Src[SrcId]; }); return true; } @@ -1360,6 +1351,50 @@ class __SYCL_EXPORT handler { #endif } + /// Defines and invokes a SYCL kernel function for the specified nd_range. + /// + /// The SYCL kernel function is defined as a lambda function or a named + /// function object type and given an id for indexing in the indexing + /// space defined by range \p Range. + /// The parameter \p Redu contains the object creted by the reduction() + /// function and defines the type and operation used in the corresponding + /// argument of 'reducer' type passed to lambda/functor function. + template + void parallel_for(range Range, Reduction Redu, + _KERNELFUNCPARAM(KernelFunc)) { + shared_ptr_class QueueCopy = MQueue; + + // Before running the kernels, check that device has enough local memory + // to hold local arrays required for the tree-reduction algorithm. + constexpr bool IsTreeReduction = + !Reduction::has_fast_reduce && !Reduction::has_fast_atomics; + size_t OneElemSize = + IsTreeReduction ? sizeof(typename Reduction::result_type) : 0; + uint32_t NumConcurrentWorkGroups = +#ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS + __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS; +#else + ONEAPI::detail::reduGetMaxNumConcurrentWorkGroups(MQueue); +#endif + // TODO: currently the maximal work group size is determined for the given + // queue/device, while it is safer to use queries to the kernel pre-compiled + // for the device. + size_t MaxWGSize = ONEAPI::detail::reduGetMaxWGSize(MQueue, OneElemSize); + ONEAPI::detail::reduCGFunc(*this, KernelFunc, Range, MaxWGSize, + NumConcurrentWorkGroups, Redu); + if (Reduction::is_usm || + (Reduction::has_fast_atomics && Redu.initializeToIdentity()) || + (!Reduction::has_fast_atomics && Redu.hasUserDiscardWriteAccessor())) { + this->finalize(); + handler CopyHandler(QueueCopy, MIsHost); + CopyHandler.saveCodeLoc(MCodeLoc); + ONEAPI::detail::reduSaveFinalResultToUserMem(CopyHandler, + Redu); + MLastEvent = CopyHandler.finalize(); + } + } + /// Implements parallel_for() accepting nd_range \p Range and one reduction /// object. This version uses fast sycl::atomic operations to update reduction /// variable at the end of each work-group work. diff --git a/sycl/include/CL/sycl/id.hpp b/sycl/include/CL/sycl/id.hpp index 151657aa661e8..8184d661e5551 100644 --- a/sycl/include/CL/sycl/id.hpp +++ b/sycl/include/CL/sycl/id.hpp @@ -254,6 +254,25 @@ size_t getOffsetForId(range Range, id Id, offset = offset * Range[i] + Offset[i] + Id[i]; return offset; } + +inline id<1> getDelinearizedId(const range<1> &, size_t Index) { + return {Index}; +} + +inline id<2> getDelinearizedId(const range<2> &Range, size_t Index) { + size_t X = Index % Range[1]; + size_t Y = Index / Range[1]; + return {Y, X}; +} + +inline id<3> getDelinearizedId(const range<3> &Range, size_t Index) { + size_t D1D2 = Range[1] * Range[2]; + size_t Z = Index / D1D2; + size_t ZRest = Index % D1D2; + size_t Y = ZRest / Range[2]; + size_t X = ZRest % Range[2]; + return {Z, Y, X}; +} } // namespace detail // C++ feature test macros are supported by all supported compilers diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index d603d80125995..bb1d19441f9a4 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -48,6 +48,20 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize, return WGSize; } +// Returns the estimated number of physical threads on the device associated +// with the given queue. +__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups( + std::shared_ptr Queue) { + device Dev = Queue->get_device(); + uint32_t NumThreads = Dev.get_info(); + // The heuristics require additional tuning for various devices and vendors. + // For now assuming that each of execution units have about 8 working threads + // gives good results on some known/supported GPU devices. + if (Dev.is_gpu()) + NumThreads *= 8; + return NumThreads; +} + __SYCL_EXPORT size_t reduGetMaxWGSize(std::shared_ptr Queue, size_t LocalMemBytesPerWorkItem) { diff --git a/sycl/test/abi/layout_array.cpp b/sycl/test/abi/layout_array.cpp index 22e2c893ae9fb..f28d3af04fb30 100644 --- a/sycl/test/abi/layout_array.cpp +++ b/sycl/test/abi/layout_array.cpp @@ -9,9 +9,9 @@ #include -SYCL_EXTERNAL void range(sycl::range<2>) {} +SYCL_EXTERNAL void id(sycl::id<2>) {} -// CHECK: 0 | class sycl::range<2> +// CHECK: 0 | class sycl::id<2> // CHECK-NEXT: 0 | class sycl::detail::array<2> (base) // CHECK-NEXT: 0 | size_t [2] common_array // CHECK-NEXT: | [sizeof=16, dsize=16, align=8, @@ -19,9 +19,9 @@ SYCL_EXTERNAL void range(sycl::range<2>) {} //---------------------------- -SYCL_EXTERNAL void id(sycl::id<2>) {} +SYCL_EXTERNAL void range(sycl::range<2>) {} -// CHECK: 0 | class sycl::id<2> +// CHECK: 0 | class sycl::range<2> // CHECK-NEXT: 0 | class sycl::detail::array<2> (base) // CHECK-NEXT: 0 | size_t [2] common_array // CHECK-NEXT: | [sizeof=16, dsize=16, align=8, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 0683df8dcc193..bb37bb2c1af37 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3674,6 +3674,7 @@ _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl6ONEAPI15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN2cl4sycl6ONEAPI15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE +_ZN2cl4sycl6ONEAPI6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE _ZN2cl4sycl6ONEAPI6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm _ZN2cl4sycl6ONEAPI6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl6detail10build_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERKSt6vectorINS0_6deviceESaIS8_EERKNS0_13property_listE