Added Cilk runtime library (libcilkrts) into GCC.
From-SVN: r204173
This commit is contained in:
parent
939b37da6d
commit
3038054c68
112 changed files with 58908 additions and 1 deletions
|
@ -1,3 +1,12 @@
|
|||
2013-10-29 Balaji V. Iyer <balaji.v.iyer@intel.com>
|
||||
|
||||
* Makefile.def: Add libcilkrts to target_modules. Make libcilkrts
|
||||
depend on libstdc++ and libgcc.
|
||||
* configure: Regenerate.
|
||||
* configure.ac: Added libcilkrts to target binaries. Also, restrict
|
||||
libcilkrts for POSIX and i*86, and x86_64 architectures.
|
||||
* Makefile.in: Added libcilkrts related fields to support building it.
|
||||
|
||||
2013-10-26 Jeff Law <law@redhat.com>
|
||||
|
||||
* Makefile.def (target_modules): Remove libmudflap
|
||||
|
|
|
@ -125,6 +125,8 @@ target_modules = { module= libvtv;
|
|||
bootstrap=true;
|
||||
lib_path=.libs;
|
||||
raw_cxx=true; };
|
||||
target_modules = { module= libcilkrts;
|
||||
lib_path=.libs; };
|
||||
target_modules = { module= libssp; lib_path=.libs; };
|
||||
target_modules = { module= newlib; };
|
||||
target_modules = { module= libgcc; bootstrap=true; no_check=true; };
|
||||
|
@ -491,6 +493,7 @@ dependencies = { module=all-m4; on=all-build-texinfo; };
|
|||
// on libgcc and newlib/libgloss.
|
||||
lang_env_dependencies = { module=libjava; cxx=true; };
|
||||
lang_env_dependencies = { module=libitm; cxx=true; };
|
||||
lang_env_dependencies = { module=libcilkrts; cxx=true; };
|
||||
lang_env_dependencies = { module=newlib; no_c=true; };
|
||||
lang_env_dependencies = { module=libgloss; no_c=true; };
|
||||
lang_env_dependencies = { module=libgcc; no_gcc=true; no_c=true; };
|
||||
|
@ -531,6 +534,8 @@ dependencies = { module=install-target-libsanitizer; on=install-target-libstdc++
|
|||
dependencies = { module=install-target-libsanitizer; on=install-target-libgcc; };
|
||||
dependencies = { module=install-target-libvtv; on=install-target-libstdc++-v3; };
|
||||
dependencies = { module=install-target-libvtv; on=install-target-libgcc; };
|
||||
dependencies = { module=install-target-libcilkrts; on=install-target-libstdc++-v3; };
|
||||
dependencies = { module=install-target-libcilkrts; on=install-target-libgcc; };
|
||||
dependencies = { module=install-target-libjava; on=install-target-libgcc; };
|
||||
dependencies = { module=install-target-libitm; on=install-target-libgcc; };
|
||||
dependencies = { module=install-target-libobjc; on=install-target-libgcc; };
|
||||
|
|
1000
Makefile.in
1000
Makefile.in
File diff suppressed because it is too large
Load diff
20
configure
vendored
20
configure
vendored
|
@ -2772,6 +2772,7 @@ target_libraries="target-libgcc \
|
|||
target-libgloss \
|
||||
target-newlib \
|
||||
target-libgomp \
|
||||
target-libcilkrts \
|
||||
target-libatomic \
|
||||
target-libitm \
|
||||
target-libstdc++-v3 \
|
||||
|
@ -3164,6 +3165,25 @@ $as_echo "yes" >&6; }
|
|||
fi
|
||||
fi
|
||||
|
||||
# Disable libcilkrts on unsupported systems.
|
||||
if test -d ${srcdir}/libcilkrts; then
|
||||
if test x$enable_libcilkrts = x; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libcilkrts support" >&5
|
||||
$as_echo_n "checking for libcilkrts support... " >&6; }
|
||||
if (srcdir=${srcdir}/libcilkrts; \
|
||||
. ${srcdir}/configure.tgt; \
|
||||
test -n "$UNSUPPORTED")
|
||||
then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
$as_echo "no" >&6; }
|
||||
noconfigdirs="$noconfigdirs target-libcilkrts"
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
|
||||
$as_echo "yes" >&6; }
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Disable libitm on unsupported systems.
|
||||
if test -d ${srcdir}/libitm; then
|
||||
if test x$enable_libitm = x; then
|
||||
|
|
17
configure.ac
17
configure.ac
|
@ -156,6 +156,7 @@ target_libraries="target-libgcc \
|
|||
target-libgloss \
|
||||
target-newlib \
|
||||
target-libgomp \
|
||||
target-libcilkrts \
|
||||
target-libatomic \
|
||||
target-libitm \
|
||||
target-libstdc++-v3 \
|
||||
|
@ -506,6 +507,22 @@ if test -d ${srcdir}/libatomic; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# Disable libcilkrts on unsupported systems.
|
||||
if test -d ${srcdir}/libcilkrts; then
|
||||
if test x$enable_libcilkrts = x; then
|
||||
AC_MSG_CHECKING([for libcilkrts support])
|
||||
if (srcdir=${srcdir}/libcilkrts; \
|
||||
. ${srcdir}/configure.tgt; \
|
||||
test -n "$UNSUPPORTED")
|
||||
then
|
||||
AC_MSG_RESULT([no])
|
||||
noconfigdirs="$noconfigdirs target-libcilkrts"
|
||||
else
|
||||
AC_MSG_RESULT([yes])
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Disable libitm on unsupported systems.
|
||||
if test -d ${srcdir}/libitm; then
|
||||
if test x$enable_libitm = x; then
|
||||
|
|
105
libcilkrts/ChangeLog
Normal file
105
libcilkrts/ChangeLog
Normal file
|
@ -0,0 +1,105 @@
|
|||
2013-10-23 Balaji V. Iyer <balaji.v.iyer@intel.com>
|
||||
|
||||
* libcilkrts/Makefile.am: New file. Libcilkrts version 3902.
|
||||
* libcilkrts/Makefile.in: Likewise
|
||||
* libcilkrts/README: Likewise
|
||||
* libcilkrts/aclocal.m4: Likewise
|
||||
* libcilkrts/configure: Likewise
|
||||
* libcilkrts/configure.ac: Likewise
|
||||
* libcilkrts/include/cilk/cilk.h: Likewise
|
||||
* libcilkrts/include/cilk/cilk_api.h: Likewise
|
||||
* libcilkrts/include/cilk/cilk_api_linux.h: Likewise
|
||||
* libcilkrts/include/cilk/cilk_stub.h: Likewise
|
||||
* libcilkrts/include/cilk/cilk_undocumented.h: Likewise
|
||||
* libcilkrts/include/cilk/common.h: Likewise
|
||||
* libcilkrts/include/cilk/holder.h: Likewise
|
||||
* libcilkrts/include/cilk/hyperobject_base.h: Likewise
|
||||
* libcilkrts/include/cilk/metaprogramming.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_file.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_list.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_max.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_min.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_min_max.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_opadd.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_opand.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_opmul.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_opor.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_opxor.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_ostream.h: Likewise
|
||||
* libcilkrts/include/cilk/reducer_string.h: Likewise
|
||||
* libcilkrts/include/cilktools/cilkscreen.h: Likewise
|
||||
* libcilkrts/include/cilktools/cilkview.h: Likewise
|
||||
* libcilkrts/include/cilktools/fake_mutex.h: Likewise
|
||||
* libcilkrts/include/cilktools/lock_guard.h: Likewise
|
||||
* libcilkrts/include/internal/abi.h: Likewise
|
||||
* libcilkrts/include/internal/cilk_fake.h: Likewise
|
||||
* libcilkrts/include/internal/cilk_version.h: Likewise
|
||||
* libcilkrts/include/internal/inspector-abi.h: Likewise
|
||||
* libcilkrts/include/internal/metacall.h: Likewise
|
||||
* libcilkrts/include/internal/rev.mk: Likewise
|
||||
* libcilkrts/mk/cilk-version.mk: Likewise
|
||||
* libcilkrts/mk/unix-common.mk: Likewise
|
||||
* libcilkrts/runtime/acknowledgements.dox: Likewise
|
||||
* libcilkrts/runtime/bug.cpp: Likewise
|
||||
* libcilkrts/runtime/bug.h: Likewise
|
||||
* libcilkrts/runtime/c_reducers.c: Likewise
|
||||
* libcilkrts/runtime/cilk-abi-cilk-for.cpp: Likewise
|
||||
* libcilkrts/runtime/cilk-abi-vla-internal.c: Likewise
|
||||
* libcilkrts/runtime/cilk-abi-vla-internal.h: Likewise
|
||||
* libcilkrts/runtime/cilk-abi-vla.c: Likewise
|
||||
* libcilkrts/runtime/cilk-abi.c: Likewise
|
||||
* libcilkrts/runtime/cilk-ittnotify.h: Likewise
|
||||
* libcilkrts/runtime/cilk-tbb-interop.h: Likewise
|
||||
* libcilkrts/runtime/cilk_api.c: Likewise
|
||||
* libcilkrts/runtime/cilk_fiber-unix.cpp: Likewise
|
||||
* libcilkrts/runtime/cilk_fiber-unix.h: Likewise
|
||||
* libcilkrts/runtime/cilk_fiber.cpp: Likewise
|
||||
* libcilkrts/runtime/cilk_fiber.h: Likewise
|
||||
* libcilkrts/runtime/cilk_malloc.c: Likewise
|
||||
* libcilkrts/runtime/cilk_malloc.h: Likewise
|
||||
* libcilkrts/runtime/component.h: Likewise
|
||||
* libcilkrts/runtime/doxygen-layout.xml: Likewise
|
||||
* libcilkrts/runtime/doxygen.cfg: Likewise
|
||||
* libcilkrts/runtime/except-gcc.cpp: Likewise
|
||||
* libcilkrts/runtime/except-gcc.h: Likewise
|
||||
* libcilkrts/runtime/except.h: Likewise
|
||||
* libcilkrts/runtime/frame_malloc.c: Likewise
|
||||
* libcilkrts/runtime/frame_malloc.h: Likewise
|
||||
* libcilkrts/runtime/full_frame.c: Likewise
|
||||
* libcilkrts/runtime/full_frame.h: Likewise
|
||||
* libcilkrts/runtime/global_state.cpp: Likewise
|
||||
* libcilkrts/runtime/global_state.h: Likewise
|
||||
* libcilkrts/runtime/jmpbuf.c: Likewise
|
||||
* libcilkrts/runtime/jmpbuf.h: Likewise
|
||||
* libcilkrts/runtime/local_state.c: Likewise
|
||||
* libcilkrts/runtime/local_state.h: Likewise
|
||||
* libcilkrts/runtime/metacall_impl.c: Likewise
|
||||
* libcilkrts/runtime/metacall_impl.h: Likewise
|
||||
* libcilkrts/runtime/os-unix.c: Likewise
|
||||
* libcilkrts/runtime/os.h: Likewise
|
||||
* libcilkrts/runtime/os_mutex-unix.c: Likewise
|
||||
* libcilkrts/runtime/os_mutex.h: Likewise
|
||||
* libcilkrts/runtime/pedigrees.c: Likewise
|
||||
* libcilkrts/runtime/pedigrees.h: Likewise
|
||||
* libcilkrts/runtime/record-replay.cpp: Likewise
|
||||
* libcilkrts/runtime/record-replay.h: Likewise
|
||||
* libcilkrts/runtime/reducer_impl.cpp: Likewise
|
||||
* libcilkrts/runtime/reducer_impl.h: Likewise
|
||||
* libcilkrts/runtime/rts-common.h: Likewise
|
||||
* libcilkrts/runtime/scheduler.c: Likewise
|
||||
* libcilkrts/runtime/scheduler.h: Likewise
|
||||
* libcilkrts/runtime/signal_node.c: Likewise
|
||||
* libcilkrts/runtime/signal_node.h: Likewise
|
||||
* libcilkrts/runtime/spin_mutex.c: Likewise
|
||||
* libcilkrts/runtime/spin_mutex.h: Likewise
|
||||
* libcilkrts/runtime/stacks.h: Likewise
|
||||
* libcilkrts/runtime/stats.c: Likewise
|
||||
* libcilkrts/runtime/stats.h: Likewise
|
||||
* libcilkrts/runtime/symbol_test.c: Likewise
|
||||
* libcilkrts/runtime/sysdep-unix.c: Likewise
|
||||
* libcilkrts/runtime/sysdep.h: Likewise
|
||||
* libcilkrts/runtime/unix_symbols.t: Likewise
|
||||
* libcilkrts/runtime/worker_mutex.c: Likewise
|
||||
* libcilkrts/runtime/worker_mutex.h: Likewise
|
||||
|
173
libcilkrts/Makefile.am
Normal file
173
libcilkrts/Makefile.am
Normal file
|
@ -0,0 +1,173 @@
|
|||
# @copyright
|
||||
# Copyright (C) 2011, 2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
AUTOMAKE_OPTIONS = foreign
|
||||
|
||||
# Use when building GCC
|
||||
ACLOCAL_AMFLAGS = -I .. -I ../config
|
||||
|
||||
# Compiler and linker flags.
|
||||
GENERAL_FLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/runtime -I$(top_srcdir)/runtime/config/$(config_dir) -DIN_CILK_RUNTIME=1
|
||||
GENERAL_FLAGS += -D_Cilk_spawn="" -D_Cilk_sync="" -D_Cilk_for=for
|
||||
|
||||
# Enable Intel Cilk Plus extension
|
||||
GENERAL_FLAGS += -fcilkplus
|
||||
|
||||
AM_CFLAGS = $(GENERAL_FLAGS) -std=c99
|
||||
AM_CPPFLAGS = $(GENERAL_FLAGS)
|
||||
AM_LDFLAGS = -lpthread -ldl
|
||||
|
||||
# Target list.
|
||||
toolexeclib_LTLIBRARIES = libcilkrts.la
|
||||
|
||||
libcilkrts_la_SOURCES = \
|
||||
runtime/config/$(config_dir)/cilk-abi-vla.c \
|
||||
runtime/config/$(config_dir)/os-unix-sysdep.c \
|
||||
runtime/bug.cpp \
|
||||
runtime/cilk-abi.c \
|
||||
runtime/cilk-abi-cilk-for.cpp \
|
||||
runtime/cilk-abi-vla-internal.c \
|
||||
runtime/cilk_api.c \
|
||||
runtime/cilk_fiber.cpp \
|
||||
runtime/cilk_fiber-unix.cpp \
|
||||
runtime/cilk_malloc.c \
|
||||
runtime/c_reducers.c \
|
||||
runtime/except-gcc.cpp \
|
||||
runtime/frame_malloc.c \
|
||||
runtime/full_frame.c \
|
||||
runtime/global_state.cpp \
|
||||
runtime/jmpbuf.c \
|
||||
runtime/local_state.c \
|
||||
runtime/metacall_impl.c \
|
||||
runtime/os_mutex-unix.c \
|
||||
runtime/os-unix.c \
|
||||
runtime/pedigrees.c \
|
||||
runtime/record-replay.cpp \
|
||||
runtime/reducer_impl.cpp \
|
||||
runtime/scheduler.c \
|
||||
runtime/signal_node.c \
|
||||
runtime/spin_mutex.c \
|
||||
runtime/stats.c \
|
||||
runtime/symbol_test.c \
|
||||
runtime/sysdep-unix.c \
|
||||
runtime/worker_mutex.c
|
||||
|
||||
|
||||
# Load the $(REVISION) value.
|
||||
include include/internal/rev.mk
|
||||
|
||||
#libcilkrts_la_LDFLAGS = -rpath '$(libdir)'
|
||||
libcilkrts_la_LDFLAGS = -version-info 5:0:0
|
||||
libcilkrts_la_LDFLAGS += -lpthread -ldl
|
||||
|
||||
# If we're building on Linux, use the Linux version script
|
||||
if LINUX_LINKER_SCRIPT
|
||||
libcilkrts_la_LDFLAGS += -Wl,--version-script,$(srcdir)/runtime/linux-symbols.ver
|
||||
endif
|
||||
|
||||
# If we're building on MacOS, use the Mac versioning
|
||||
if MAC_LINKER_SCRIPT
|
||||
libcilkrts_la_LDFLAGS += -Wl,-exported_symbols_list,$(srcdir)/runtime/mac-symbols.txt
|
||||
endif
|
||||
|
||||
|
||||
# Hack for Cygwin
|
||||
libcilkrts_la_LDFLAGS += -no-undefined
|
||||
|
||||
# C/C++ header files for Cilk.
|
||||
cilkincludedir = $(includedir)/cilk
|
||||
cilkinclude_HEADERS = \
|
||||
include/cilk/cilk_api.h \
|
||||
include/cilk/cilk_api_linux.h \
|
||||
include/cilk/cilk.h \
|
||||
include/cilk/cilk_stub.h \
|
||||
include/cilk/cilk_undocumented.h \
|
||||
include/cilk/common.h \
|
||||
include/cilk/holder.h \
|
||||
include/cilk/hyperobject_base.h \
|
||||
include/cilk/metaprogramming.h \
|
||||
include/cilk/reducer_file.h \
|
||||
include/cilk/reducer.h \
|
||||
include/cilk/reducer_list.h \
|
||||
include/cilk/reducer_max.h \
|
||||
include/cilk/reducer_min.h \
|
||||
include/cilk/reducer_min_max.h \
|
||||
include/cilk/reducer_opadd.h \
|
||||
include/cilk/reducer_opand.h \
|
||||
include/cilk/reducer_opmul.h \
|
||||
include/cilk/reducer_opor.h \
|
||||
include/cilk/reducer_opxor.h \
|
||||
include/cilk/reducer_ostream.h \
|
||||
include/cilk/reducer_string.h
|
||||
|
||||
|
||||
# Work around what appears to be a GNU make bug handling MAKEFLAGS
|
||||
# values defined in terms of make variables, as is the case for CC and
|
||||
# friends when we are called from the top level Makefile.
|
||||
AM_MAKEFLAGS = \
|
||||
"AR_FLAGS=$(AR_FLAGS)" \
|
||||
"CC_FOR_BUILD=$(CC_FOR_BUILD)" \
|
||||
"CFLAGS=$(CFLAGS)" \
|
||||
"CXXFLAGS=$(CXXFLAGS)" \
|
||||
"CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
|
||||
"CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
|
||||
"INSTALL=$(INSTALL)" \
|
||||
"INSTALL_DATA=$(INSTALL_DATA)" \
|
||||
"INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
|
||||
"INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
|
||||
"JC1FLAGS=$(JC1FLAGS)" \
|
||||
"LDFLAGS=$(LDFLAGS)" \
|
||||
"LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
"LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
|
||||
"MAKE=$(MAKE)" \
|
||||
"MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
|
||||
"PICFLAG=$(PICFLAG)" \
|
||||
"PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
|
||||
"SHELL=$(SHELL)" \
|
||||
"RUNTESTFLAGS=$(RUNTESTFLAGS)" \
|
||||
"exec_prefix=$(exec_prefix)" \
|
||||
"infodir=$(infodir)" \
|
||||
"libdir=$(libdir)" \
|
||||
"prefix=$(prefix)" \
|
||||
"includedir=$(includedir)" \
|
||||
"AR=$(AR)" \
|
||||
"AS=$(AS)" \
|
||||
"LD=$(LD)" \
|
||||
"LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
"NM=$(NM)" \
|
||||
"PICFLAG=$(PICFLAG)" \
|
||||
"RANLIB=$(RANLIB)" \
|
||||
"DESTDIR=$(DESTDIR)"
|
||||
|
||||
MAKEOVERRIDES=
|
||||
|
1045
libcilkrts/Makefile.in
Normal file
1045
libcilkrts/Makefile.in
Normal file
File diff suppressed because it is too large
Load diff
84
libcilkrts/README
Normal file
84
libcilkrts/README
Normal file
|
@ -0,0 +1,84 @@
|
|||
Intel(R) Cilk(TM) Plus runtime library
|
||||
|
||||
Index:
|
||||
1. BUILDING
|
||||
2. USING
|
||||
3. DOXYGEN DOCUMENTATION
|
||||
4. QUESTIONS OR BUGS
|
||||
5. CONTRIBUTIONS
|
||||
|
||||
#
|
||||
# 1. BUILDING:
|
||||
#
|
||||
|
||||
To distribute applications that use the Intel Cilk Plus language
|
||||
extensions to non-development systems, you need to build the Intel
|
||||
Cilk Plus runtime library and distribute it with your application.
|
||||
|
||||
To build the libcilkrts.so runtime library component, you need the
|
||||
autoconf and automake packages, which are available through your
|
||||
favorite package manager. You also need a C/C++ compiler that
|
||||
supports the Intel Cilk Plus language extensions, since the runtime
|
||||
uses Intel Cilk Plus features internally. Use either the Intel(R)
|
||||
C++ Compiler (icc command) v12.1 or later, or in GCC 4.9 or later
|
||||
(gcc command).
|
||||
|
||||
Once you have the necessary prerequisites installed, you can use the
|
||||
following commands to create the library:
|
||||
|
||||
% libtoolize
|
||||
% aclocal
|
||||
% automake --add-missing
|
||||
% autoconf
|
||||
% ./configure
|
||||
% make
|
||||
% make install
|
||||
|
||||
This will produce the libcilkrts.so shared object. To install the
|
||||
library in a custom location, set the prefix while running the
|
||||
configure script:
|
||||
|
||||
% ./configure --prefix=/your/path/to/lib
|
||||
|
||||
#
|
||||
# 2. USING:
|
||||
#
|
||||
|
||||
The Intel(R) C++ Compiler will automatically try to bring in the
|
||||
Intel Cilk Plus runtime in any program that uses the relevant
|
||||
features. GCC requires explicit linking of both the library and
|
||||
its dependencies (libpthread, libdl). For example:
|
||||
|
||||
% gcc foo.c -lcilkrts -lpthread -ldl
|
||||
|
||||
#
|
||||
# 3. DOXYGEN DOCUMENTATION:
|
||||
#
|
||||
|
||||
The library source has Doxygen markup. Generate HTML documentation
|
||||
based on the markup by changing directory into runtime and running:
|
||||
|
||||
% doxygen doxygen.cfg
|
||||
|
||||
#
|
||||
# 4. QUESTIONS OR BUGS:
|
||||
#
|
||||
|
||||
Issues with the Intel Cilk Plus runtime can be addressed in the Intel
|
||||
Cilk Plus forums:
|
||||
http://software.intel.com/en-us/forums/intel-cilk-plus/
|
||||
|
||||
#
|
||||
# 5. CONTRIBUTIONS:
|
||||
#
|
||||
|
||||
The Intel Cilk Plus runtime library is dual licensed. The upstream copy
|
||||
of the library is maintained via the BSD-licensed version available at:
|
||||
http://cilkplus.org/
|
||||
|
||||
Changes to the Intel Cilk Plus runtime are welcome and should be
|
||||
contributed to the upstream version via http://cilkplus.org/.
|
||||
|
||||
------------------------
|
||||
Intel and Cilk are trademarks of Intel Corporation in the U.S. and/or
|
||||
other countries.
|
939
libcilkrts/aclocal.m4
vendored
Normal file
939
libcilkrts/aclocal.m4
vendored
Normal file
|
@ -0,0 +1,939 @@
|
|||
# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.64],,
|
||||
[m4_warning([this file was generated for autoconf 2.64.
|
||||
You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically `autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_AUTOMAKE_VERSION(VERSION)
|
||||
# ----------------------------
|
||||
# Automake X.Y traces this macro to ensure aclocal.m4 has been
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.11'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.11.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
# _AM_AUTOCONF_VERSION(VERSION)
|
||||
# -----------------------------
|
||||
# aclocal traces this macro to find the Autoconf version.
|
||||
# This is a private macro too. Using m4_define simplifies
|
||||
# the logic in aclocal, which can simply ignore this definition.
|
||||
m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
|
||||
# AM_SET_CURRENT_AUTOMAKE_VERSION
|
||||
# -------------------------------
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.11.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
|
||||
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
|
||||
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
|
||||
#
|
||||
# Of course, Automake must honor this variable whenever it calls a
|
||||
# tool from the auxiliary directory. The problem is that $srcdir (and
|
||||
# therefore $ac_aux_dir as well) can be either absolute or relative,
|
||||
# depending on how configure is run. This is pretty annoying, since
|
||||
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
|
||||
# source directory, any form will work fine, but in subdirectories a
|
||||
# relative path needs to be adjusted first.
|
||||
#
|
||||
# $ac_aux_dir/missing
|
||||
# fails when called from a subdirectory if $ac_aux_dir is relative
|
||||
# $top_srcdir/$ac_aux_dir/missing
|
||||
# fails if $ac_aux_dir is absolute,
|
||||
# fails when called from a subdirectory in a VPATH build with
|
||||
# a relative $ac_aux_dir
|
||||
#
|
||||
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
|
||||
# are both prefixed by $srcdir. In an in-source build this is usually
|
||||
# harmless because $srcdir is `.', but things will broke when you
|
||||
# start a VPATH build or use an absolute $srcdir.
|
||||
#
|
||||
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
|
||||
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
|
||||
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
|
||||
# and then we would define $MISSING as
|
||||
# MISSING="\${SHELL} $am_aux_dir/missing"
|
||||
# This will work as long as MISSING is not called from configure, because
|
||||
# unfortunately $(top_srcdir) has no meaning in configure.
|
||||
# However there are other variables, like CC, which are often used in
|
||||
# configure, and could therefore not use this "fixed" $ac_aux_dir.
|
||||
#
|
||||
# Another solution, used here, is to always expand $ac_aux_dir to an
|
||||
# absolute PATH. The drawback is that using absolute paths prevent a
|
||||
# configured tree to be moved without reconfiguration.
|
||||
|
||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||
[dnl Rely on autoconf to set up CDPATH properly.
|
||||
AC_PREREQ([2.50])dnl
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
])
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 9
|
||||
|
||||
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
|
||||
# -------------------------------------
|
||||
# Define a conditional.
|
||||
AC_DEFUN([AM_CONDITIONAL],
|
||||
[AC_PREREQ(2.52)dnl
|
||||
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
|
||||
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
|
||||
AC_SUBST([$1_TRUE])dnl
|
||||
AC_SUBST([$1_FALSE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
|
||||
m4_define([_AM_COND_VALUE_$1], [$2])dnl
|
||||
if $2; then
|
||||
$1_TRUE=
|
||||
$1_FALSE='#'
|
||||
else
|
||||
$1_TRUE='#'
|
||||
$1_FALSE=
|
||||
fi
|
||||
AC_CONFIG_COMMANDS_PRE(
|
||||
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
|
||||
AC_MSG_ERROR([[conditional "$1" was never defined.
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 10
|
||||
|
||||
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
|
||||
# written in clear, in which case automake, when reading aclocal.m4,
|
||||
# will think it sees a *use*, and therefore will trigger all it's
|
||||
# C support machinery. Also note that it means that autoscan, seeing
|
||||
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
|
||||
|
||||
|
||||
# _AM_DEPENDENCIES(NAME)
|
||||
# ----------------------
|
||||
# See how the compiler implements dependency checking.
|
||||
# NAME is "CC", "CXX", "GCJ", or "OBJC".
|
||||
# We try a few techniques and use that to set a single cache variable.
|
||||
#
|
||||
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
|
||||
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
|
||||
# dependency, and given that the user is not expected to run this macro,
|
||||
# just rely on AC_PROG_CC.
|
||||
AC_DEFUN([_AM_DEPENDENCIES],
|
||||
[AC_REQUIRE([AM_SET_DEPDIR])dnl
|
||||
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
|
||||
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
|
||||
AC_REQUIRE([AM_DEP_TRACK])dnl
|
||||
|
||||
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
|
||||
[$1], CXX, [depcc="$CXX" am_compiler_list=],
|
||||
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
|
||||
[$1], UPC, [depcc="$UPC" am_compiler_list=],
|
||||
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
|
||||
[depcc="$$1" am_compiler_list=])
|
||||
|
||||
AC_CACHE_CHECK([dependency style of $depcc],
|
||||
[am_cv_$1_dependencies_compiler_type],
|
||||
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
|
||||
# We make a subdir and do the tests there. Otherwise we can end up
|
||||
# making bogus files that we don't know about and never remove. For
|
||||
# instance it was reported that on HP-UX the gcc test will end up
|
||||
# making a dummy file named `D' -- because `-MD' means `put the output
|
||||
# in D'.
|
||||
mkdir conftest.dir
|
||||
# Copy depcomp to subdir because otherwise we won't find it if we're
|
||||
# using a relative directory.
|
||||
cp "$am_depcomp" conftest.dir
|
||||
cd conftest.dir
|
||||
# We will build objects and dependencies in a subdirectory because
|
||||
# it helps to detect inapplicable dependency modes. For instance
|
||||
# both Tru64's cc and ICC support -MD to output dependencies as a
|
||||
# side effect of compilation, but ICC will put the dependencies in
|
||||
# the current directory while Tru64 will put them in the object
|
||||
# directory.
|
||||
mkdir sub
|
||||
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
if test "$am_compiler_list" = ""; then
|
||||
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
|
||||
fi
|
||||
am__universal=false
|
||||
m4_case([$1], [CC],
|
||||
[case " $depcc " in #(
|
||||
*\ -arch\ *\ -arch\ *) am__universal=true ;;
|
||||
esac],
|
||||
[CXX],
|
||||
[case " $depcc " in #(
|
||||
*\ -arch\ *\ -arch\ *) am__universal=true ;;
|
||||
esac])
|
||||
|
||||
for depmode in $am_compiler_list; do
|
||||
# Setup a source with many dependencies, because some compilers
|
||||
# like to wrap large dependency lists on column 80 (with \), and
|
||||
# we should not choose a depcomp mode which is confused by this.
|
||||
#
|
||||
# We need to recreate these files for each test, as the compiler may
|
||||
# overwrite some of them when testing with obscure command lines.
|
||||
# This happens at least with the AIX C compiler.
|
||||
: > sub/conftest.c
|
||||
for i in 1 2 3 4 5 6; do
|
||||
echo '#include "conftst'$i'.h"' >> sub/conftest.c
|
||||
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
|
||||
# Solaris 8's {/usr,}/bin/sh.
|
||||
touch sub/conftst$i.h
|
||||
done
|
||||
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
|
||||
|
||||
# We check with `-c' and `-o' for the sake of the "dashmstdout"
|
||||
# mode. It turns out that the SunPro C++ compiler does not properly
|
||||
# handle `-M -o', and we need to detect this. Also, some Intel
|
||||
# versions had trouble with output in subdirs
|
||||
am__obj=sub/conftest.${OBJEXT-o}
|
||||
am__minus_obj="-o $am__obj"
|
||||
case $depmode in
|
||||
gcc)
|
||||
# This depmode causes a compiler race in universal mode.
|
||||
test "$am__universal" = false || continue
|
||||
;;
|
||||
nosideeffect)
|
||||
# after this tag, mechanisms are not by side-effect, so they'll
|
||||
# only be used when explicitly requested
|
||||
if test "x$enable_dependency_tracking" = xyes; then
|
||||
continue
|
||||
else
|
||||
break
|
||||
fi
|
||||
;;
|
||||
msvisualcpp | msvcmsys)
|
||||
# This compiler won't grok `-c -o', but also, the minuso test has
|
||||
# not run yet. These depmodes are late enough in the game, and
|
||||
# so weak that their functioning should not be impacted.
|
||||
am__obj=conftest.${OBJEXT-o}
|
||||
am__minus_obj=
|
||||
;;
|
||||
none) break ;;
|
||||
esac
|
||||
if depmode=$depmode \
|
||||
source=sub/conftest.c object=$am__obj \
|
||||
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
|
||||
$SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
|
||||
>/dev/null 2>conftest.err &&
|
||||
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
|
||||
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
|
||||
# icc doesn't choke on unknown options, it will just issue warnings
|
||||
# or remarks (even with -Werror). So we grep stderr for any message
|
||||
# that says an option was ignored or not supported.
|
||||
# When given -MP, icc 7.0 and 7.1 complain thusly:
|
||||
# icc: Command line warning: ignoring option '-M'; no argument required
|
||||
# The diagnosis changed in icc 8.0:
|
||||
# icc: Command line remark: option '-MP' not supported
|
||||
if (grep 'ignoring option' conftest.err ||
|
||||
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
|
||||
am_cv_$1_dependencies_compiler_type=$depmode
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
cd ..
|
||||
rm -rf conftest.dir
|
||||
else
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
fi
|
||||
])
|
||||
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
|
||||
AM_CONDITIONAL([am__fastdep$1], [
|
||||
test "x$enable_dependency_tracking" != xno \
|
||||
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
|
||||
])
|
||||
|
||||
|
||||
# AM_SET_DEPDIR
|
||||
# -------------
|
||||
# Choose a directory name for dependency files.
|
||||
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
|
||||
AC_DEFUN([AM_SET_DEPDIR],
|
||||
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
|
||||
])
|
||||
|
||||
|
||||
# AM_DEP_TRACK
|
||||
# ------------
|
||||
AC_DEFUN([AM_DEP_TRACK],
|
||||
[AC_ARG_ENABLE(dependency-tracking,
|
||||
[ --disable-dependency-tracking speeds up one-time build
|
||||
--enable-dependency-tracking do not reject slow dependency extractors])
|
||||
if test "x$enable_dependency_tracking" != xno; then
|
||||
am_depcomp="$ac_aux_dir/depcomp"
|
||||
AMDEPBACKSLASH='\'
|
||||
fi
|
||||
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
|
||||
AC_SUBST([AMDEPBACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
#serial 5
|
||||
|
||||
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# ------------------------------
|
||||
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[{
|
||||
# Autoconf 2.62 quotes --file arguments for eval, but not when files
|
||||
# are listed without --file. Let's play safe and only enable the eval
|
||||
# if we detect the quoting.
|
||||
case $CONFIG_FILES in
|
||||
*\'*) eval set x "$CONFIG_FILES" ;;
|
||||
*) set x $CONFIG_FILES ;;
|
||||
esac
|
||||
shift
|
||||
for mf
|
||||
do
|
||||
# Strip MF so we end up with the name of the file.
|
||||
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
||||
# Check whether this is an Automake generated Makefile or not.
|
||||
# We used to match only the files named `Makefile.in', but
|
||||
# some people rename them; so instead we look at the file content.
|
||||
# Grep'ing the first line is not enough: some people post-process
|
||||
# each Makefile.in and add a new line on top of each file to say so.
|
||||
# Grep'ing the whole file is not good either: AIX grep has a line
|
||||
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
||||
dirpart=`AS_DIRNAME("$mf")`
|
||||
else
|
||||
continue
|
||||
fi
|
||||
# Extract the definition of DEPDIR, am__include, and am__quote
|
||||
# from the Makefile without running `make'.
|
||||
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
||||
test -z "$DEPDIR" && continue
|
||||
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
||||
test -z "am__include" && continue
|
||||
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
||||
# When using ansi2knr, U may be empty or an underscore; expand it
|
||||
U=`sed -n 's/^U = //p' < "$mf"`
|
||||
# Find all dependency output files, they are included files with
|
||||
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
||||
# simplest approach to changing $(DEPDIR) to its actual value in the
|
||||
# expansion.
|
||||
for file in `sed -n "
|
||||
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
||||
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
|
||||
# Make sure the directory exists.
|
||||
test -f "$dirpart/$file" && continue
|
||||
fdir=`AS_DIRNAME(["$file"])`
|
||||
AS_MKDIR_P([$dirpart/$fdir])
|
||||
# echo "creating $dirpart/$file"
|
||||
echo '# dummy' > "$dirpart/$file"
|
||||
done
|
||||
done
|
||||
}
|
||||
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
|
||||
|
||||
# AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# -----------------------------
|
||||
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||
#
|
||||
# This code is only required when automatic dependency tracking
|
||||
# is enabled. FIXME. This creates each `.P' file that we will
|
||||
# need in order to bootstrap the dependency handling code.
|
||||
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AC_CONFIG_COMMANDS([depfiles],
|
||||
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
||||
])
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 16
|
||||
|
||||
# This macro actually does too much. Some checks are only needed if
|
||||
# your package does certain things. But this isn't really a big deal.
|
||||
|
||||
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
|
||||
# AM_INIT_AUTOMAKE([OPTIONS])
|
||||
# -----------------------------------------------
|
||||
# The call with PACKAGE and VERSION arguments is the old style
|
||||
# call (pre autoconf-2.50), which is being phased out. PACKAGE
|
||||
# and VERSION should now be passed to AC_INIT and removed from
|
||||
# the call to AM_INIT_AUTOMAKE.
|
||||
# We support both call styles for the transition. After
|
||||
# the next Automake release, Autoconf can make the AC_INIT
|
||||
# arguments mandatory, and then we can depend on a new Autoconf
|
||||
# release and drop the old call support.
|
||||
AC_DEFUN([AM_INIT_AUTOMAKE],
|
||||
[AC_PREREQ([2.62])dnl
|
||||
dnl Autoconf wants to disallow AM_ names. We explicitly allow
|
||||
dnl the ones we care about.
|
||||
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
|
||||
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
|
||||
AC_REQUIRE([AC_PROG_INSTALL])dnl
|
||||
if test "`cd $srcdir && pwd`" != "`pwd`"; then
|
||||
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
|
||||
# is not polluted with repeated "-I."
|
||||
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
|
||||
# test to see if srcdir already configured
|
||||
if test -f $srcdir/config.status; then
|
||||
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
|
||||
fi
|
||||
fi
|
||||
|
||||
# test whether we have cygpath
|
||||
if test -z "$CYGPATH_W"; then
|
||||
if (cygpath --version) >/dev/null 2>/dev/null; then
|
||||
CYGPATH_W='cygpath -w'
|
||||
else
|
||||
CYGPATH_W=echo
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([CYGPATH_W])
|
||||
|
||||
# Define the identity of the package.
|
||||
dnl Distinguish between old-style and new-style calls.
|
||||
m4_ifval([$2],
|
||||
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
|
||||
AC_SUBST([PACKAGE], [$1])dnl
|
||||
AC_SUBST([VERSION], [$2])],
|
||||
[_AM_SET_OPTIONS([$1])dnl
|
||||
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
|
||||
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
|
||||
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
|
||||
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
|
||||
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
|
||||
|
||||
_AM_IF_OPTION([no-define],,
|
||||
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
|
||||
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
|
||||
|
||||
# Some tools Automake needs.
|
||||
AC_REQUIRE([AM_SANITY_CHECK])dnl
|
||||
AC_REQUIRE([AC_ARG_PROGRAM])dnl
|
||||
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOCONF, autoconf)
|
||||
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOHEADER, autoheader)
|
||||
AM_MISSING_PROG(MAKEINFO, makeinfo)
|
||||
AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||
AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
|
||||
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
|
||||
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
|
||||
[_AM_PROG_TAR([v7])])])
|
||||
_AM_IF_OPTION([no-dependencies],,
|
||||
[AC_PROVIDE_IFELSE([AC_PROG_CC],
|
||||
[_AM_DEPENDENCIES(CC)],
|
||||
[define([AC_PROG_CC],
|
||||
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_CXX],
|
||||
[_AM_DEPENDENCIES(CXX)],
|
||||
[define([AC_PROG_CXX],
|
||||
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
|
||||
[_AM_DEPENDENCIES(OBJC)],
|
||||
[define([AC_PROG_OBJC],
|
||||
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
|
||||
])
|
||||
_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
|
||||
dnl The `parallel-tests' driver may need to know about EXEEXT, so add the
|
||||
dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro
|
||||
dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
|
||||
AC_CONFIG_COMMANDS_PRE(dnl
|
||||
[m4_provide_if([_AM_COMPILER_EXEEXT],
|
||||
[AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
|
||||
])
|
||||
|
||||
dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
||||
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
||||
dnl mangled by Autoconf and run in a shell conditional statement.
|
||||
m4_define([_AC_COMPILER_EXEEXT],
|
||||
m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
|
||||
|
||||
|
||||
# When config.status generates a header, we must update the stamp-h file.
|
||||
# This file resides in the same directory as the config header
|
||||
# that is generated. The stamp files are numbered to have different names.
|
||||
|
||||
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
|
||||
# loop where config.status creates the headers, so we can generate
|
||||
# our stamp files there.
|
||||
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
|
||||
[# Compute $1's index in $config_headers.
|
||||
_am_arg=$1
|
||||
_am_stamp_count=1
|
||||
for _am_header in $config_headers :; do
|
||||
case $_am_header in
|
||||
$_am_arg | $_am_arg:* )
|
||||
break ;;
|
||||
* )
|
||||
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
|
||||
esac
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_SH
|
||||
# ------------------
|
||||
# Define $install_sh.
|
||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
if test x"${install_sh}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
*)
|
||||
install_sh="\${SHELL} $am_aux_dir/install-sh"
|
||||
esac
|
||||
fi
|
||||
AC_SUBST(install_sh)])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# AM_MAKE_INCLUDE()
|
||||
# -----------------
|
||||
# Check to see how make treats includes.
|
||||
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||
[am_make=${MAKE-make}
|
||||
cat > confinc << 'END'
|
||||
am__doit:
|
||||
@echo this is the am__doit target
|
||||
.PHONY: am__doit
|
||||
END
|
||||
# If we don't find an include directive, just comment out the code.
|
||||
AC_MSG_CHECKING([for style of include used by $am_make])
|
||||
am__include="#"
|
||||
am__quote=
|
||||
_am_result=none
|
||||
# First try GNU make style include.
|
||||
echo "include confinc" > confmf
|
||||
# Ignore all kinds of additional output from `make'.
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=include
|
||||
am__quote=
|
||||
_am_result=GNU
|
||||
;;
|
||||
esac
|
||||
# Now try BSD make style include.
|
||||
if test "$am__include" = "#"; then
|
||||
echo '.include "confinc"' > confmf
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=.include
|
||||
am__quote="\""
|
||||
_am_result=BSD
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
AC_SUBST([am__include])
|
||||
AC_SUBST([am__quote])
|
||||
AC_MSG_RESULT([$_am_result])
|
||||
rm -f confinc confmf
|
||||
])
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6
|
||||
|
||||
# AM_MISSING_PROG(NAME, PROGRAM)
|
||||
# ------------------------------
|
||||
AC_DEFUN([AM_MISSING_PROG],
|
||||
[AC_REQUIRE([AM_MISSING_HAS_RUN])
|
||||
$1=${$1-"${am_missing_run}$2"}
|
||||
AC_SUBST($1)])
|
||||
|
||||
|
||||
# AM_MISSING_HAS_RUN
|
||||
# ------------------
|
||||
# Define MISSING if not defined so far and test if it supports --run.
|
||||
# If it does, set am_missing_run to use it, otherwise, to nothing.
|
||||
AC_DEFUN([AM_MISSING_HAS_RUN],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
AC_REQUIRE_AUX_FILE([missing])dnl
|
||||
if test x"${MISSING+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
|
||||
*)
|
||||
MISSING="\${SHELL} $am_aux_dir/missing" ;;
|
||||
esac
|
||||
fi
|
||||
# Use eval to expand $SHELL
|
||||
if eval "$MISSING --run true"; then
|
||||
am_missing_run="$MISSING --run "
|
||||
else
|
||||
am_missing_run=
|
||||
AC_MSG_WARN([`missing' script is too old or missing])
|
||||
fi
|
||||
])
|
||||
|
||||
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_MKDIR_P
|
||||
# ---------------
|
||||
# Check for `mkdir -p'.
|
||||
AC_DEFUN([AM_PROG_MKDIR_P],
|
||||
[AC_PREREQ([2.60])dnl
|
||||
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
|
||||
dnl while keeping a definition of mkdir_p for backward compatibility.
|
||||
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
|
||||
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
|
||||
dnl Makefile.ins that do not define MKDIR_P, so we do our own
|
||||
dnl adjustment using top_builddir (which is defined more often than
|
||||
dnl MKDIR_P).
|
||||
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
|
||||
case $mkdir_p in
|
||||
[[\\/$]]* | ?:[[\\/]]*) ;;
|
||||
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
|
||||
esac
|
||||
])
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# _AM_MANGLE_OPTION(NAME)
|
||||
# -----------------------
|
||||
AC_DEFUN([_AM_MANGLE_OPTION],
|
||||
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
# _AM_SET_OPTION(NAME)
|
||||
# ------------------------------
|
||||
# Set option NAME. Presently that only means defining a flag for this option.
|
||||
AC_DEFUN([_AM_SET_OPTION],
|
||||
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
|
||||
|
||||
# _AM_SET_OPTIONS(OPTIONS)
|
||||
# ----------------------------------
|
||||
# OPTIONS is a space-separated list of Automake options.
|
||||
AC_DEFUN([_AM_SET_OPTIONS],
|
||||
[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
|
||||
|
||||
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
|
||||
# -------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 5
|
||||
|
||||
# AM_SANITY_CHECK
|
||||
# ---------------
|
||||
AC_DEFUN([AM_SANITY_CHECK],
|
||||
[AC_MSG_CHECKING([whether build environment is sane])
|
||||
# Just in case
|
||||
sleep 1
|
||||
echo timestamp > conftest.file
|
||||
# Reject unsafe characters in $srcdir or the absolute working directory
|
||||
# name. Accept space and tab only in the latter.
|
||||
am_lf='
|
||||
'
|
||||
case `pwd` in
|
||||
*[[\\\"\#\$\&\'\`$am_lf]]*)
|
||||
AC_MSG_ERROR([unsafe absolute working directory name]);;
|
||||
esac
|
||||
case $srcdir in
|
||||
*[[\\\"\#\$\&\'\`$am_lf\ \ ]]*)
|
||||
AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);;
|
||||
esac
|
||||
|
||||
# Do `set' in a subshell so we don't clobber the current shell's
|
||||
# arguments. Must try -L first in case configure is actually a
|
||||
# symlink; some systems play weird games with the mod time of symlinks
|
||||
# (eg FreeBSD returns the mod time of the symlink's containing
|
||||
# directory).
|
||||
if (
|
||||
set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
|
||||
if test "$[*]" = "X"; then
|
||||
# -L didn't work.
|
||||
set X `ls -t "$srcdir/configure" conftest.file`
|
||||
fi
|
||||
rm -f conftest.file
|
||||
if test "$[*]" != "X $srcdir/configure conftest.file" \
|
||||
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
|
||||
|
||||
# If neither matched, then we have a broken ls. This can happen
|
||||
# if, for instance, CONFIG_SHELL is bash and it inherits a
|
||||
# broken ls alias from the environment. This has actually
|
||||
# happened. Such a system could not be considered "sane".
|
||||
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
|
||||
alias in your environment])
|
||||
fi
|
||||
|
||||
test "$[2]" = conftest.file
|
||||
)
|
||||
then
|
||||
# Ok.
|
||||
:
|
||||
else
|
||||
AC_MSG_ERROR([newly created file is older than distributed files!
|
||||
Check your system clock])
|
||||
fi
|
||||
AC_MSG_RESULT(yes)])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_STRIP
|
||||
# ---------------------
|
||||
# One issue with vendor `install' (even GNU) is that you can't
|
||||
# specify the program used to strip binaries. This is especially
|
||||
# annoying in cross-compiling environments, where the build's strip
|
||||
# is unlikely to handle the host's binaries.
|
||||
# Fortunately install-sh will honor a STRIPPROG variable, so we
|
||||
# always use install-sh in `make install-strip', and initialize
|
||||
# STRIPPROG with the value of the STRIP variable (set by the user).
|
||||
AC_DEFUN([AM_PROG_INSTALL_STRIP],
|
||||
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||
# Installed binaries are usually stripped using `strip' when the user
|
||||
# run `make install-strip'. However `strip' might not be the right
|
||||
# tool to use in cross-compilation environments, therefore Automake
|
||||
# will honor the `STRIP' environment variable to overrule this program.
|
||||
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
|
||||
if test "$cross_compiling" != no; then
|
||||
AC_CHECK_TOOL([STRIP], [strip], :)
|
||||
fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# _AM_SUBST_NOTMAKE(VARIABLE)
|
||||
# ---------------------------
|
||||
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
|
||||
# This macro is traced by Automake.
|
||||
AC_DEFUN([_AM_SUBST_NOTMAKE])
|
||||
|
||||
# AM_SUBST_NOTMAKE(VARIABLE)
|
||||
# ---------------------------
|
||||
# Public sister of _AM_SUBST_NOTMAKE.
|
||||
AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# _AM_PROG_TAR(FORMAT)
|
||||
# --------------------
|
||||
# Check how to create a tarball in format FORMAT.
|
||||
# FORMAT should be one of `v7', `ustar', or `pax'.
|
||||
#
|
||||
# Substitute a variable $(am__tar) that is a command
|
||||
# writing to stdout a FORMAT-tarball containing the directory
|
||||
# $tardir.
|
||||
# tardir=directory && $(am__tar) > result.tar
|
||||
#
|
||||
# Substitute a variable $(am__untar) that extract such
|
||||
# a tarball read from stdin.
|
||||
# $(am__untar) < result.tar
|
||||
AC_DEFUN([_AM_PROG_TAR],
|
||||
[# Always define AMTAR for backward compatibility.
|
||||
AM_MISSING_PROG([AMTAR], [tar])
|
||||
m4_if([$1], [v7],
|
||||
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
|
||||
[m4_case([$1], [ustar],, [pax],,
|
||||
[m4_fatal([Unknown tar format])])
|
||||
AC_MSG_CHECKING([how to create a $1 tar archive])
|
||||
# Loop over all known methods to create a tar archive until one works.
|
||||
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
|
||||
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
|
||||
# Do not fold the above two line into one, because Tru64 sh and
|
||||
# Solaris sh will not grok spaces in the rhs of `-'.
|
||||
for _am_tool in $_am_tools
|
||||
do
|
||||
case $_am_tool in
|
||||
gnutar)
|
||||
for _am_tar in tar gnutar gtar;
|
||||
do
|
||||
AM_RUN_LOG([$_am_tar --version]) && break
|
||||
done
|
||||
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
|
||||
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
|
||||
am__untar="$_am_tar -xf -"
|
||||
;;
|
||||
plaintar)
|
||||
# Must skip GNU tar: if it does not support --format= it doesn't create
|
||||
# ustar tarball either.
|
||||
(tar --version) >/dev/null 2>&1 && continue
|
||||
am__tar='tar chf - "$$tardir"'
|
||||
am__tar_='tar chf - "$tardir"'
|
||||
am__untar='tar xf -'
|
||||
;;
|
||||
pax)
|
||||
am__tar='pax -L -x $1 -w "$$tardir"'
|
||||
am__tar_='pax -L -x $1 -w "$tardir"'
|
||||
am__untar='pax -r'
|
||||
;;
|
||||
cpio)
|
||||
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
|
||||
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
|
||||
am__untar='cpio -i -H $1 -d'
|
||||
;;
|
||||
none)
|
||||
am__tar=false
|
||||
am__tar_=false
|
||||
am__untar=false
|
||||
;;
|
||||
esac
|
||||
|
||||
# If the value was cached, stop now. We just wanted to have am__tar
|
||||
# and am__untar set.
|
||||
test -n "${am_cv_prog_tar_$1}" && break
|
||||
|
||||
# tar/untar a dummy directory, and stop if the command works
|
||||
rm -rf conftest.dir
|
||||
mkdir conftest.dir
|
||||
echo GrepMe > conftest.dir/file
|
||||
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
|
||||
rm -rf conftest.dir
|
||||
if test -s conftest.tar; then
|
||||
AM_RUN_LOG([$am__untar <conftest.tar])
|
||||
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
|
||||
fi
|
||||
done
|
||||
rm -rf conftest.dir
|
||||
|
||||
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
|
||||
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
|
||||
AC_SUBST([am__tar])
|
||||
AC_SUBST([am__untar])
|
||||
]) # _AM_PROG_TAR
|
||||
|
||||
m4_include([../config/depstand.m4])
|
||||
m4_include([../config/lead-dot.m4])
|
||||
m4_include([../config/multi.m4])
|
||||
m4_include([../config/override.m4])
|
||||
m4_include([../libtool.m4])
|
||||
m4_include([../ltoptions.m4])
|
||||
m4_include([../ltsugar.m4])
|
||||
m4_include([../ltversion.m4])
|
||||
m4_include([../lt~obsolete.m4])
|
16649
libcilkrts/configure
vendored
Normal file
16649
libcilkrts/configure
vendored
Normal file
File diff suppressed because it is too large
Load diff
148
libcilkrts/configure.ac
Normal file
148
libcilkrts/configure.ac
Normal file
|
@ -0,0 +1,148 @@
|
|||
# @copyright
|
||||
# Copyright (C) 2011-2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
AC_INIT([Cilk Runtime Library], [2.0], [cilk@intel.com])
|
||||
AC_PREREQ([2.64])
|
||||
|
||||
# Needed to define ${target}. Needs to be very early to avoid annoying
|
||||
# warning about calling AC_ARG_PROGRAM before AC_CANONICAL_SYSTEM
|
||||
AC_CANONICAL_SYSTEM
|
||||
AM_INIT_AUTOMAKE(foreign no-dist)
|
||||
|
||||
# Build a DLL on Windows
|
||||
# AC_LIBTOOL_WIN32_DLL
|
||||
AC_PROG_CXX
|
||||
AC_PROG_CC
|
||||
# AC_PROG_LIBTOOL
|
||||
# AC_CONFIG_MACRO_DIR([..])
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
AM_ENABLE_MULTILIB(, ..)
|
||||
|
||||
# Get target configury.
|
||||
. ${srcdir}/configure.tgt
|
||||
if test -n "$UNSUPPORTED"; then
|
||||
AC_MSG_ERROR([Configuration ${target} is unsupported.])
|
||||
fi
|
||||
|
||||
if test "${multilib}" = "yes"; then
|
||||
multilib_arg="--enable-multilib"
|
||||
else
|
||||
multilib_arg=
|
||||
fi
|
||||
|
||||
AC_MSG_CHECKING([for --enable-version-specific-runtime-libs])
|
||||
AC_ARG_ENABLE([version-specific-runtime-libs],
|
||||
AC_HELP_STRING([--enable-version-specific-runtime-libs],
|
||||
[Specify that runtime libraries should be installed in a compi
|
||||
ler-specific directory]),
|
||||
[case "$enableval" in
|
||||
yes) enable_version_specific_runtime_libs=yes ;;
|
||||
no) enable_version_specific_runtime_libs=no ;;
|
||||
*) AC_MSG_ERROR([Unknown argument to enable/disable version-specific libs
|
||||
]);;
|
||||
esac],
|
||||
[enable_version_specific_runtime_libs=no])
|
||||
AC_MSG_RESULT($enable_version_specific_runtime_libs)
|
||||
|
||||
|
||||
# Calculate toolexeclibdir
|
||||
# Also toolexecdir, though it's only used in toolexeclibdir
|
||||
case ${enable_version_specific_runtime_libs} in
|
||||
yes)
|
||||
# Need the gcc compiler version to know where to install libraries
|
||||
# and header files if --enable-version-specific-runtime-libs option
|
||||
# is selected.
|
||||
toolexecdir='$(libdir)/gcc/$(target_alias)'
|
||||
toolexeclibdir='$(toolexecdir)/$(gcc_version)$(MULTISUBDIR)'
|
||||
;;
|
||||
no)
|
||||
if test -n "$with_cross_host" &&
|
||||
test x"$with_cross_host" != x"no"; then
|
||||
# Install a library built with a cross compiler in tooldir, not libdir.
|
||||
toolexecdir='$(exec_prefix)/$(target_alias)'
|
||||
toolexeclibdir='$(toolexecdir)/lib'
|
||||
else
|
||||
toolexecdir='$(libdir)/gcc-lib/$(target_alias)'
|
||||
toolexeclibdir='$(libdir)'
|
||||
fi
|
||||
multi_os_directory=`$CC -print-multi-os-directory`
|
||||
case $multi_os_directory in
|
||||
.) ;; # Avoid trailing /.
|
||||
*) toolexeclibdir=$toolexeclibdir/$multi_os_directory ;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
# Set config_dir based on the target. config_dir specifies where to get
|
||||
# target-specific files. The generic implementation is incomplete, but
|
||||
# contains information on what's needed
|
||||
case "${target}" in
|
||||
|
||||
x86_64-*-*)
|
||||
config_dir="x86"
|
||||
;;
|
||||
|
||||
i[456]86-*-*)
|
||||
config_dir="x86"
|
||||
;;
|
||||
|
||||
*)
|
||||
config_dir="generic"
|
||||
;;
|
||||
|
||||
esac
|
||||
AC_SUBST(config_dir)
|
||||
|
||||
# We have linker scripts for appropriate operating systems
|
||||
linux_linker_script=no
|
||||
case "${host}" in
|
||||
*-*-linux*)
|
||||
linux_linker_script=yes
|
||||
;;
|
||||
esac
|
||||
AM_CONDITIONAL(LINUX_LINKER_SCRIPT, test "$linux_linker_script" = "yes")
|
||||
|
||||
mac_linker_script=no
|
||||
case "${host}" in
|
||||
*-*-apple*)
|
||||
mac_linker_script=yes
|
||||
;;
|
||||
esac
|
||||
AM_CONDITIONAL(MAC_LINKER_SCRIPT, test "$mac_linker_script" = "yes")
|
||||
|
||||
AM_PROG_LIBTOOL
|
||||
AC_SUBST(toolexecdir)
|
||||
AC_SUBST(toolexeclibdir)
|
||||
|
||||
# Must be last
|
||||
AC_OUTPUT
|
61
libcilkrts/configure.tgt
Normal file
61
libcilkrts/configure.tgt
Normal file
|
@ -0,0 +1,61 @@
|
|||
# @copyright
|
||||
# Copyright (C) 2011-2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Disable Cilk Runtime library for non x86 architecture...for now.
|
||||
case "${target}" in
|
||||
x86_64-*-*)
|
||||
;;
|
||||
i?86-*-*)
|
||||
;;
|
||||
*-*-*)
|
||||
UNSUPPORTED=1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Disable libcilkrts on non POSIX hosted systems.
|
||||
if test x$enable_libcilkrts = x ; then
|
||||
# Enable libcilkrts by default on hosted POSIX systems.
|
||||
case "${target}" in
|
||||
*-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
|
||||
;;
|
||||
*-*-netbsd* | *-*-freebsd* | *-*-openbsd* | *-*-dragonfly*)
|
||||
;;
|
||||
*-*-solaris2* | *-*-hpux11*)
|
||||
;;
|
||||
*-*-darwin* | *-*-aix*)
|
||||
;;
|
||||
*)
|
||||
UNSUPPORTED=1
|
||||
;;
|
||||
esac
|
||||
fi
|
71
libcilkrts/include/cilk/cilk.h
Normal file
71
libcilkrts/include/cilk/cilk.h
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* cilk.h -*-C++-*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file cilk.h
|
||||
*
|
||||
* @brief Provides convenient aliases for the Cilk language keywords.
|
||||
*
|
||||
* @details
|
||||
* Since Cilk is a nonstandard extension to both C and C++, the Cilk
|
||||
* language keywords all begin with “`_Cilk_`”, which guarantees that they
|
||||
* will not conflict with user-defined identifiers in properly written
|
||||
* programs, so that “standard” C and C++ programs can safely be
|
||||
* compiled a Cilk-enabled C or C++ compiler.
|
||||
*
|
||||
* However, this means that the keywords _look_ like something grafted on to
|
||||
* the base language. Therefore, you can include this header:
|
||||
*
|
||||
* #include "cilk/cilk.h"
|
||||
*
|
||||
* and then write the Cilk keywords with a “`cilk_`” prefix instead of
|
||||
* “`_Cilk_`”.
|
||||
*
|
||||
* @ingroup language
|
||||
*/
|
||||
|
||||
|
||||
/** @defgroup language Language Keywords
|
||||
* Definitions having to do with the Cilk language.
|
||||
* @{
|
||||
*/
|
||||
|
||||
#ifndef cilk_spawn
|
||||
# define cilk_spawn _Cilk_spawn ///< Spawn a task that can execute in parallel.
|
||||
# define cilk_sync _Cilk_sync ///< Wait for spawned tasks to complete.
|
||||
# define cilk_for _Cilk_for ///< Execute iterations of a for loop in parallel.
|
||||
#endif
|
||||
|
||||
/// @}
|
424
libcilkrts/include/cilk/cilk_api.h
Normal file
424
libcilkrts/include/cilk/cilk_api.h
Normal file
|
@ -0,0 +1,424 @@
|
|||
/* cilk_api.h
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file cilk_api.h
|
||||
*
|
||||
* @brief Defines the documented API exposed by the Cilk Plus for use
|
||||
* by applications.
|
||||
*
|
||||
* @ingroup api
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_API_H
|
||||
#define INCLUDED_CILK_API_H
|
||||
|
||||
/** @defgroup api Runtime API
|
||||
* API to allow user programs to interact with the Cilk runtime.
|
||||
* @{
|
||||
*/
|
||||
|
||||
#ifndef CILK_STUB /* Real (non-stub) definitions */
|
||||
|
||||
#if ! defined(__cilk) && ! defined(USE_CILK_API)
|
||||
# ifdef _WIN32
|
||||
# error Cilk API is being used with non-Cilk compiler (or Cilk is disabled)
|
||||
# else
|
||||
# warning Cilk API is being used with non-Cilk compiler (or Cilk is disabled)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <cilk/common.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <cstddef> /* Defines size_t */
|
||||
#else
|
||||
# include <stddef.h> /* Defines size_t */
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef IN_CILK_RUNTIME
|
||||
/* Ensure the library is brought if any of these functions are being called. */
|
||||
# pragma comment(lib, "cilkrts")
|
||||
# endif
|
||||
|
||||
# ifndef __cplusplus
|
||||
# include <wchar.h>
|
||||
# endif
|
||||
#endif /* _WIN32 */
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Return values from __cilkrts_set_param() and __cilkrts_set_param_w()
|
||||
*/
|
||||
enum __cilkrts_set_param_status {
|
||||
__CILKRTS_SET_PARAM_SUCCESS = 0, /**< Success - parameter set */
|
||||
__CILKRTS_SET_PARAM_UNIMP = 1, /**< Unimplemented parameter */
|
||||
__CILKRTS_SET_PARAM_XRANGE = 2, /**< Parameter value out of range */
|
||||
__CILKRTS_SET_PARAM_INVALID = 3, /**< Invalid parameter value */
|
||||
__CILKRTS_SET_PARAM_LATE = 4 /**< Too late to change parameter value */
|
||||
};
|
||||
|
||||
/** Set user controllable runtime parameters
|
||||
*
|
||||
* Call this function to set runtime parameters that control the behavior
|
||||
* of the Cilk scheduler.
|
||||
*
|
||||
* @param param A string specifying the parameter to be set. One of:
|
||||
* - `"nworkers"`
|
||||
* - `"force reduce"`
|
||||
* @param value A string specifying the parameter value.
|
||||
* @returns A value from the @ref __cilkrts_set_param_status
|
||||
* enumeration indicating the result of the operation.
|
||||
*
|
||||
* @par The "nworkers" parameter
|
||||
*
|
||||
* This parameter specifies the number of worker threads to be created by the
|
||||
* Cilk runtime. @a Value must be a string of digits to be parsed by
|
||||
* `strtol()`.
|
||||
*
|
||||
* The number of worker threads is:
|
||||
* 1. the value set with `__cilkrts_set_param("nworkers")`, if it is
|
||||
* positive; otherwise,
|
||||
* 2. the value of the CILK_NWORKERS environment variable, if it is
|
||||
* defined; otherwise
|
||||
* 3. the number of cores available, as reported by the operating system.
|
||||
*
|
||||
* @note
|
||||
* Technically, Cilk distinguishes between the _user thread_ (the thread that
|
||||
* the user code was executing on when the Cilk runtime started), and
|
||||
* _worker threads_ (new threads created by the Cilk runtime to support
|
||||
* Cilk parallelism). `nworkers` actually includes both the user thread and
|
||||
* the worker threads; that is, it is one greater than the number of true
|
||||
* “worker threads”.
|
||||
*
|
||||
* @note
|
||||
* Setting `nworkers = 1` produces serial behavior. Cilk spawns and syncs will
|
||||
* be executed, but with only one worker, continuations will never be stolen,
|
||||
* so all code will execute in serial.
|
||||
*
|
||||
* @warning
|
||||
* The number of worker threads can only be set *before* the runtime has
|
||||
* started. Attempting to set it when the runtime is running will have no
|
||||
* effect, and will return an error code. You can call __cilkrts_end_cilk()
|
||||
* to shut down the runtime to change the number of workers.
|
||||
*
|
||||
* @warning
|
||||
* The default Cilk scheduler behavior is usually pretty good. The ability
|
||||
* to override `nworkers` can be useful for experimentation, but it won’t
|
||||
* usually be necessary for getting good performance.
|
||||
*
|
||||
* @par The "force reduce" parameter
|
||||
*
|
||||
* This parameter controls whether the runtime should allocate a new view
|
||||
* for a reducer for every parallel strand that it is accessed on. (See
|
||||
* @ref pagereducers.) @a Value must be `"1"` or `"true"` to enable the
|
||||
* “force reduce” behavior, or `"0"` or `"false"` to disable it.
|
||||
*
|
||||
* “Force reduce” behavior will also be enabled if
|
||||
* `__cilkrts_set_param("force reduce")` is not called, but the
|
||||
* `CILK_FORCE_REDUCE` environment variable is defined.
|
||||
*
|
||||
* @warning
|
||||
* When this option is enabled, `nworkers` should be set to `1`. Using “force
|
||||
* reduce” with more than one worker may result in runtime errors.
|
||||
*
|
||||
* @warning
|
||||
* Enabling this option can significantly reduce performance. It should
|
||||
* _only_ be used as a debugging tool.
|
||||
*/
|
||||
CILK_API(int) __cilkrts_set_param(const char *param, const char *value);
|
||||
|
||||
#ifdef _WIN32
|
||||
/**
|
||||
* Set user controllable parameters using wide strings
|
||||
*
|
||||
* @note This variant of __cilkrts_set_param() is only available
|
||||
* on Windows.
|
||||
*
|
||||
* @copydetails __cilkrts_set_param
|
||||
*/
|
||||
CILK_API(int) __cilkrts_set_param_w(const wchar_t *param, const wchar_t *value);
|
||||
#endif
|
||||
|
||||
/** Shut down and deallocate all Cilk state. The runtime will abort the
|
||||
* application if Cilk is still in use by this thread. Otherwise the runtime
|
||||
* will wait for all other threads using Cilk to exit.
|
||||
*/
|
||||
CILK_API(void) __cilkrts_end_cilk(void);
|
||||
|
||||
/** Initialize the Cilk data structures and start the runtime.
|
||||
*/
|
||||
CILK_API(void) __cilkrts_init(void);
|
||||
|
||||
/** Return the runtime `nworkers` parameter. (See the discussion of `nworkers`
|
||||
* in the documentation for __cilkrts_set_param().)
|
||||
*/
|
||||
CILK_API(int) __cilkrts_get_nworkers(void);
|
||||
|
||||
/** Return the number of thread data structures.
|
||||
*
|
||||
* This function returns the number of data structures that has been allocated
|
||||
* allocated by the runtime to hold information about user and worker threads.
|
||||
*
|
||||
* If you don’t already know what this is good for, then you probably don’t
|
||||
* need it.
|
||||
*/
|
||||
CILK_API(int) __cilkrts_get_total_workers(void);
|
||||
|
||||
/** What thread is the function running on?
|
||||
*
|
||||
* Return a small integer identifying the current thread. Each worker thread
|
||||
* started by the Cilk runtime library has a unique worker number in the range
|
||||
* `1 .. nworkers - 1`.
|
||||
*
|
||||
* All _user_ threads (threads started by the user, or by other libraries) are
|
||||
* identified as worker number 0. Therefore, the worker number is not unique
|
||||
* across multiple user threads.
|
||||
*/
|
||||
CILK_API(int) __cilkrts_get_worker_number(void);
|
||||
|
||||
/** Test whether “force reduce” behavior is enabled.
|
||||
*
|
||||
* @return Non-zero if force-reduce mode is on, zero if it is off.
|
||||
*/
|
||||
CILK_API(int) __cilkrts_get_force_reduce(void);
|
||||
|
||||
/** Interact with tools
|
||||
*/
|
||||
CILK_API(void)
|
||||
__cilkrts_metacall(unsigned int tool, unsigned int code, void *data);
|
||||
|
||||
#ifdef _WIN32
|
||||
/// Windows exception description record.
|
||||
typedef struct _EXCEPTION_RECORD _EXCEPTION_RECORD;
|
||||
|
||||
/** Function signature for Windows exception notification callbacks.
|
||||
*/
|
||||
typedef void (*__cilkrts_pfn_seh_callback)(const _EXCEPTION_RECORD *exception);
|
||||
|
||||
/** Specify a function to call when a non-C++ exception is caught.
|
||||
*
|
||||
* Cilk Plus parallelism plays nicely with C++ exception handling, but the
|
||||
* Cilk Plus runtime has no way to unwind the stack across a strand boundary
|
||||
* for Microsoft SEH (“Structured Exception Handling”) exceptions. Therefore,
|
||||
* when the runtime catches such an exception, it must abort the application.
|
||||
*
|
||||
* If an SEH callback has been set, the runtime will call it before aborting.
|
||||
*
|
||||
* @param pfn A pointer to a callback function to be called before the
|
||||
* runtime aborts the program because of an SEH exception.
|
||||
*/
|
||||
CILK_API(int) __cilkrts_set_seh_callback(__cilkrts_pfn_seh_callback pfn);
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#if __CILKRTS_ABI_VERSION >= 1
|
||||
/* Pedigree API is available only for compilers that use ABI version >= 1. */
|
||||
|
||||
|
||||
/** @name Pedigrees
|
||||
*/
|
||||
//@{
|
||||
|
||||
// @cond internal
|
||||
|
||||
/** Support for __cilkrts_get_pedigree.
|
||||
*/
|
||||
CILK_API(__cilkrts_pedigree)
|
||||
__cilkrts_get_pedigree_internal(__cilkrts_worker *w);
|
||||
|
||||
/** Support for __cilkrts_bump_worker_rank.
|
||||
*/
|
||||
CILK_API(int)
|
||||
__cilkrts_bump_worker_rank_internal(__cilkrts_worker* w);
|
||||
|
||||
/// @endcond
|
||||
|
||||
|
||||
/** Get the current pedigree, in a linked list representation.
|
||||
*
|
||||
* This routine returns a copy of the last node in the pedigree list.
|
||||
* For example, if the current pedigree (in order) is <1, 2, 3, 4>,
|
||||
* then this method returns a node with rank == 4, and whose parent
|
||||
* field points to the node with rank of 3. In summary, following the
|
||||
* nodes in the chain visits the terms of the pedigree in reverse.
|
||||
*
|
||||
* The returned node is guaranteed to be valid only until the caller
|
||||
* of this routine has returned.
|
||||
*/
|
||||
__CILKRTS_INLINE
|
||||
__cilkrts_pedigree __cilkrts_get_pedigree(void)
|
||||
{
|
||||
return __cilkrts_get_pedigree_internal(__cilkrts_get_tls_worker());
|
||||
}
|
||||
|
||||
/** Context used by __cilkrts_get_pedigree_info.
|
||||
*
|
||||
* @deprecated
|
||||
* This data structure is only used by the deprecated
|
||||
* __cilkrts_get_pedigree_info function.
|
||||
*
|
||||
* Callers should initialize the `data` array to NULL and set the `size`
|
||||
* field to `sizeof(__cilkrts_pedigree_context_t)` before the first call
|
||||
* to __cilkrts_get_pedigree_info(), and should not examine or modify it
|
||||
* thereafter.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
__STDNS size_t size; /**< Size of the struct in bytes */
|
||||
void *data[3]; /**< Opaque context data */
|
||||
} __cilkrts_pedigree_context_t;
|
||||
|
||||
/** Get pedigree information.
|
||||
*
|
||||
* @deprecated
|
||||
* Use __cilkrts_get_pedigree() instead.
|
||||
*
|
||||
* This routine allows code to walk up the stack of Cilk frames to gather
|
||||
* the pedigree.
|
||||
*
|
||||
* Initialize the pedigree walk by filling the pedigree context with NULLs
|
||||
* and setting the size field to sizeof(__cilkrts_pedigree_context).
|
||||
* Other than initialization to NULL to start the walk, user coder should
|
||||
* consider the pedigree context data opaque and should not examine or
|
||||
* modify it.
|
||||
*
|
||||
* @returns 0 - Success - birthrank is valid
|
||||
* @returns >0 - End of pedigree walk
|
||||
* @returns -1 - Failure - No worker bound to thread
|
||||
* @returns -2 - Failure - Sanity check failed,
|
||||
* @returns -3 - Failure - Invalid context size
|
||||
* @returns -4 - Failure - Internal error - walked off end of chain of frames
|
||||
*/
|
||||
CILK_API(int)
|
||||
__cilkrts_get_pedigree_info(/* In/Out */ __cilkrts_pedigree_context_t *context,
|
||||
/* Out */ uint64_t *sf_birthrank);
|
||||
|
||||
/** Get the rank of the currently executing worker.
|
||||
*
|
||||
* @deprecated
|
||||
* Use `__cilkrts_get_pedigree().rank` instead.
|
||||
*
|
||||
* @returns 0 - Success - *rank is valid
|
||||
* @returns <0 - Failure - *rank is not changed
|
||||
*/
|
||||
CILK_EXPORT_AND_INLINE
|
||||
int __cilkrts_get_worker_rank(uint64_t *rank)
|
||||
{
|
||||
*rank = __cilkrts_get_pedigree().rank;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Increment the pedigree rank of the currently executing worker.
|
||||
*
|
||||
* @returns 0 - Success - rank was incremented
|
||||
* @returns-1 - Failure
|
||||
*/
|
||||
CILK_EXPORT_AND_INLINE
|
||||
int __cilkrts_bump_worker_rank(void)
|
||||
{
|
||||
return __cilkrts_bump_worker_rank_internal(__cilkrts_get_tls_worker());
|
||||
}
|
||||
|
||||
/** Increment the pedigree rank for a cilk_for loop.
|
||||
* Obsolete.
|
||||
*
|
||||
* @deprecated
|
||||
* This function was provided to allow the user to manipulate the pedigree
|
||||
* rank of a `cilk_for` loop. The compiler now generates code to do that
|
||||
* manipulation automatically, so this function is now unnecessary. It may
|
||||
* be called, but will have no effect.
|
||||
*/
|
||||
CILK_EXPORT_AND_INLINE
|
||||
int __cilkrts_bump_loop_rank(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* __CILKRTS_ABI_VERSION >= 1 */
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#else /* CILK_STUB */
|
||||
|
||||
// Programs compiled with CILK_STUB are not linked with the Cilk runtime
|
||||
// library, so they should not have external references to runtime functions.
|
||||
// Therefore, the functions are replaced with stubs.
|
||||
|
||||
#ifdef _WIN32
|
||||
#define __cilkrts_set_param_w(name,value) ((value), 0)
|
||||
#define __cilkrts_set_seh_callback(pfn) (0)
|
||||
#endif
|
||||
#define __cilkrts_set_param(name,value) ((value), 0)
|
||||
#define __cilkrts_end_cilk() ((void) 0)
|
||||
#define __cilkrts_init() ((void) 0)
|
||||
#define __cilkrts_get_nworkers() (1)
|
||||
#define __cilkrts_get_total_workers() (1)
|
||||
#define __cilkrts_get_worker_number() (0)
|
||||
#define __cilkrts_get_force_reduce() (0)
|
||||
#define __cilkrts_metacall(tool,code,data) ((tool), (code), (data), 0)
|
||||
|
||||
#if __CILKRTS_ABI_VERSION >= 1
|
||||
/* Pedigree stubs */
|
||||
#define __cilkrts_get_pedigree_info(context, sf_birthrank) (-1)
|
||||
#define __cilkrts_get_worker_rank(rank) (*(rank) = 0)
|
||||
#define __cilkrts_bump_worker_rank() (-1)
|
||||
#define __cilkrts_bump_loop_rank() (-1)
|
||||
|
||||
/*
|
||||
* A stub method for __cilkrts_get_pedigree.
|
||||
* Returns an empty __cilkrts_pedigree.
|
||||
*/
|
||||
__CILKRTS_INLINE
|
||||
__cilkrts_pedigree __cilkrts_get_pedigree_stub(void)
|
||||
{
|
||||
__cilkrts_pedigree ans;
|
||||
ans.rank = 0;
|
||||
ans.parent = NULL;
|
||||
return ans;
|
||||
}
|
||||
|
||||
/* Renamed to an actual stub method. */
|
||||
#define __cilkrts_get_pedigree() __cilkrts_get_pedigree_stub()
|
||||
|
||||
#endif /* __CILKRTS_ABI_VERSION >= 1 */
|
||||
|
||||
#endif /* CILK_STUB */
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* INCLUDED_CILK_API_H */
|
38
libcilkrts/include/cilk/cilk_api_linux.h
Normal file
38
libcilkrts/include/cilk/cilk_api_linux.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/* THIS FILE IS DEPRECATED. USE cilk_api.h INSTEAD. */
|
||||
#include <cilk/cilk_api.h>
|
55
libcilkrts/include/cilk/cilk_stub.h
Normal file
55
libcilkrts/include/cilk/cilk_stub.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* cilk_stub.h -*-C++-*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_STUB_DOT_H
|
||||
#define INCLUDED_CILK_STUB_DOT_H
|
||||
|
||||
/* Definitions for creating a serialization from a Cilk program.
|
||||
* These definitions are suitable for use by a compiler that is not
|
||||
* Cilk-enabled.
|
||||
*/
|
||||
|
||||
/* Pretend we are a non-Cilk compiler */
|
||||
#undef __cilk
|
||||
#define CILK_STUB
|
||||
|
||||
/* Replace Cilk keywords with serial equivalents */
|
||||
#define _Cilk_spawn
|
||||
#define _Cilk_sync
|
||||
#define _Cilk_for for
|
||||
|
||||
#endif /* ! defined(INCLUDED_CILK_STUB_DOT_H) */
|
131
libcilkrts/include/cilk/cilk_undocumented.h
Normal file
131
libcilkrts/include/cilk/cilk_undocumented.h
Normal file
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* cilk_undocumented.h
|
||||
*
|
||||
* This file defines exported functions that are not included in the standard
|
||||
* documentation.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_UNDOCUMENTED_H
|
||||
#define INCLUDED_CILK_UNDOCUMENTED_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
|
||||
#ifndef CILK_STUB
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/*
|
||||
* __cilkrts_synched
|
||||
*
|
||||
* Allows an application to determine if there are any outstanding children at
|
||||
* this instant. This function will examine the current full frame to
|
||||
* determine this. This function will return a valid result only when called
|
||||
* within a spawn continuation, within the stack frame of the continuation
|
||||
* itself.
|
||||
*/
|
||||
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
int __cilkrts_synched(void);
|
||||
|
||||
/*
|
||||
* __cilkrts_cilkscreen_puts
|
||||
*
|
||||
* Allows an application to write a string to the Cilkscreen log.
|
||||
* The standard error stream will be flushed after the write.
|
||||
*/
|
||||
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
void __cilkrts_cilkscreen_puts(const char *);
|
||||
|
||||
/*
|
||||
* __cilkrts_get_sf
|
||||
*
|
||||
* A debugging aid that allows an application to get the __cilkrts_stack_frame
|
||||
* for the current function. Only compiled into the DLL in debug builds.
|
||||
*/
|
||||
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
void *__cilkrts_get_sf(void);
|
||||
|
||||
/**
|
||||
* Returns the size of stacks created by Cilk.
|
||||
*/
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
size_t __cilkrts_get_stack_size(void);
|
||||
|
||||
/**
|
||||
* Dumps runtime statistics to stderr.
|
||||
* Undocumented API for debugging.
|
||||
*/
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
void __cilkrts_dump_stats(void);
|
||||
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
int __cilkrts_irml_version(void);
|
||||
|
||||
struct __cilk_tbb_unwatch_thunk;
|
||||
struct __cilk_tbb_stack_op_thunk;
|
||||
|
||||
CILK_EXPORT __CILKRTS_NOTHROW
|
||||
int __cilkrts_watch_stack(struct __cilk_tbb_unwatch_thunk *u,
|
||||
struct __cilk_tbb_stack_op_thunk o);
|
||||
|
||||
#ifndef IN_CILK_RUNTIME
|
||||
#ifdef _WIN32
|
||||
/* Do not use CILK_API because __cilkrts_worker_stub must be __stdcall */
|
||||
CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall
|
||||
__cilkrts_worker_stub(void *arg);
|
||||
#else
|
||||
/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */
|
||||
CILK_EXPORT void* __CILKRTS_NOTHROW
|
||||
__cilkrts_worker_stub(void *arg);
|
||||
#endif /* _WIN32 */
|
||||
#endif /* IN_CILK_RUNTIME */
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#else /* CILK_STUB */
|
||||
|
||||
/* Stubs for the api functions */
|
||||
|
||||
#define __cilkrts_get_stack_size() (0)
|
||||
#define __cilkrts_synched() (1)
|
||||
|
||||
#endif /* CILK_STUB */
|
||||
|
||||
#endif /* INCLUDED_CILK_UNDOCUMENTED_H */
|
376
libcilkrts/include/cilk/common.h
Normal file
376
libcilkrts/include/cilk/common.h
Normal file
|
@ -0,0 +1,376 @@
|
|||
/** common.h
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file common.h
|
||||
*
|
||||
* @brief Defines common macros and structures used by the Intel Cilk Plus
|
||||
* runtime.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
|
||||
/** @defgroup common Common Definitions
|
||||
* Macro, structure, and class definitions used elsewhere in the runtime.
|
||||
* @{
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_COMMON
|
||||
#define INCLUDED_CILK_COMMON
|
||||
|
||||
#ifdef __cplusplus
|
||||
/** Namespace for all Cilk definitions that can be included in user code.
|
||||
*/
|
||||
namespace cilk {
|
||||
|
||||
/** Namespace for definitions that are primarily intended for use
|
||||
* in other Cilk definitions.
|
||||
*/
|
||||
namespace internal {}
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Cilk library version = 1.01
|
||||
*/
|
||||
#define CILK_LIBRARY_VERSION 102
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <cassert>
|
||||
#else
|
||||
# include <assert.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Prefix standard library function and type names with __STDNS in order to
|
||||
* get correct lookup in both C and C++.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# define __STDNS std::
|
||||
#else
|
||||
# define __STDNS
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def CILK_EXPORT
|
||||
* Define export of runtime functions from shared library.
|
||||
* Should be exported only from cilkrts*.dll/cilkrts*.so
|
||||
* @def CILK_EXPORT_DATA
|
||||
* Define export of runtime data from shared library.
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
# ifdef IN_CILK_RUNTIME
|
||||
# define CILK_EXPORT __declspec(dllexport)
|
||||
# define CILK_EXPORT_DATA __declspec(dllexport)
|
||||
# else
|
||||
# define CILK_EXPORT __declspec(dllimport)
|
||||
# define CILK_EXPORT_DATA __declspec(dllimport)
|
||||
# endif /* IN_CILK_RUNTIME */
|
||||
#elif defined(__CYGWIN__) || defined(__APPLE__) || defined(_DARWIN_C_SOURCE)
|
||||
# define CILK_EXPORT /* nothing */
|
||||
# define CILK_EXPORT_DATA /* nothing */
|
||||
#else /* Unix/gcc */
|
||||
# ifdef IN_CILK_RUNTIME
|
||||
# define CILK_EXPORT __attribute__((visibility("protected")))
|
||||
# define CILK_EXPORT_DATA __attribute__((visibility("protected")))
|
||||
# else
|
||||
# define CILK_EXPORT /* nothing */
|
||||
# define CILK_EXPORT_DATA /* nothing */
|
||||
# endif /* IN_CILK_RUNTIME */
|
||||
#endif /* Unix/gcc */
|
||||
|
||||
/**
|
||||
* @def __CILKRTS_BEGIN_EXTERN_C
|
||||
* Macro to denote the start of a section in which all names have "C" linkage.
|
||||
* That is, none of the names are to be mangled.
|
||||
* @see __CILKRTS_END_EXTERN_C
|
||||
* @see __CILKRTS_EXTERN_C
|
||||
*
|
||||
* @def __CILKRTS_END_EXTERN_C
|
||||
* Macro to denote the end of a section in which all names have "C" linkage.
|
||||
* That is, none of the names are to be mangled.
|
||||
* @see __CILKRTS_BEGIN_EXTERN_C
|
||||
* @see __CILKRTS_EXTERN_C
|
||||
*
|
||||
* @def __CILKRTS_EXTERN_C
|
||||
* Macro to prefix a single definition which has "C" linkage.
|
||||
* That is, the defined name is not to be mangled.
|
||||
* @see __CILKRTS_BEGIN_EXTERN_C
|
||||
* @see __CILKRTS_END_EXTERN_C
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# define __CILKRTS_BEGIN_EXTERN_C extern "C" {
|
||||
# define __CILKRTS_END_EXTERN_C }
|
||||
# define __CILKRTS_EXTERN_C extern "C"
|
||||
#else
|
||||
# define __CILKRTS_BEGIN_EXTERN_C
|
||||
# define __CILKRTS_END_EXTERN_C
|
||||
# define __CILKRTS_EXTERN_C
|
||||
#endif
|
||||
|
||||
/**
|
||||
* OS-independent macro to specify a function which is known to not throw
|
||||
* an exception.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
# ifdef _WIN32
|
||||
# define __CILKRTS_NOTHROW __declspec(nothrow)
|
||||
# else /* Unix/gcc */
|
||||
# define __CILKRTS_NOTHROW __attribute__((nothrow))
|
||||
# endif /* Unix/gcc */
|
||||
#else
|
||||
# define __CILKRTS_NOTHROW /* nothing */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** Cache alignment. (Good enough for most architectures.)
|
||||
*/
|
||||
#define __CILKRTS_CACHE_LINE__ 64
|
||||
|
||||
/**
|
||||
* Macro to specify alignment of a data member in a structure.
|
||||
* Because of the way that gcc’s alignment attribute is defined, @a n must
|
||||
* be a numeric literal, not just a compile-time constant expression.
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
# define CILK_ALIGNAS(n) __declspec(align(n))
|
||||
#else /* Unix/gcc */
|
||||
# define CILK_ALIGNAS(n) __attribute__((__aligned__(n)))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Macro to specify cache-line alignment of a data member in a structure.
|
||||
*/
|
||||
#define __CILKRTS_CACHE_ALIGN CILK_ALIGNAS(__CILKRTS_CACHE_LINE__)
|
||||
|
||||
/**
|
||||
* Macro to specify a class as being at least as strictly aligned as some
|
||||
* type on Windows. gcc does not provide a way of doing this, so on Unix,
|
||||
* this just specifies the largest natural type alignment. Put the macro
|
||||
* between the `class` keyword and the class name:
|
||||
*
|
||||
* class CILK_ALIGNAS_TYPE(foo) bar { ... };
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
# define CILK_ALIGNAS_TYPE(t) __declspec(align(__alignof(t)))
|
||||
#else /* Unix/gcc */
|
||||
# define CILK_ALIGNAS_TYPE(t) __attribute__((__aligned__))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def CILK_API(RET_TYPE)
|
||||
* A function called explicitly by the programmer.
|
||||
* @def CILK_ABI(RET_TYPE)
|
||||
* A function called by compiler-generated code.
|
||||
* @def CILK_ABI_THROWS(RET_TYPE)
|
||||
* An ABI function that may throw an exception
|
||||
*
|
||||
* Even when these are the same definitions, they should be separate macros so
|
||||
* that they can be easily found in the code.
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
# define CILK_API(RET_TYPE) CILK_EXPORT RET_TYPE __CILKRTS_NOTHROW __cdecl
|
||||
# define CILK_ABI(RET_TYPE) CILK_EXPORT RET_TYPE __CILKRTS_NOTHROW __cdecl
|
||||
# define CILK_ABI_THROWS(RET_TYPE) CILK_EXPORT RET_TYPE __cdecl
|
||||
#else
|
||||
# define CILK_API(RET_TYPE) CILK_EXPORT RET_TYPE __CILKRTS_NOTHROW
|
||||
# define CILK_ABI(RET_TYPE) CILK_EXPORT RET_TYPE __CILKRTS_NOTHROW
|
||||
# define CILK_ABI_THROWS(RET_TYPE) CILK_EXPORT RET_TYPE
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __CILKRTS_ASSERT should be defined for debugging only, otherwise it
|
||||
* interferes with vectorization. Since NDEBUG is not reliable (it must be
|
||||
* set by the user), we must use a platform-specific detection of debug mode.
|
||||
*/
|
||||
#if defined(_WIN32) && defined(_DEBUG)
|
||||
/* Windows debug */
|
||||
# define __CILKRTS_ASSERT(e) assert(e)
|
||||
#elif (! defined(_WIN32)) && ! defined(__OPTIMIZE__)
|
||||
/* Unix non-optimized */
|
||||
# define __CILKRTS_ASSERT(e) assert(e)
|
||||
#elif defined __cplusplus
|
||||
/* C++ non-debug */
|
||||
# define __CILKRTS_ASSERT(e) static_cast<void>(0)
|
||||
#else
|
||||
/* C non-debug */
|
||||
# define __CILKRTS_ASSERT(e) ((void) 0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* OS-independent macro to specify a function that should be inlined
|
||||
*/
|
||||
#ifdef __cpluspus
|
||||
// C++
|
||||
# define __CILKRTS_INLINE inline
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||
// C99
|
||||
# define __CILKRTS_INLINE static inline
|
||||
#elif defined(_MSC_VER)
|
||||
// C89 on Windows
|
||||
# define __CILKRTS_INLINE __inline
|
||||
#else
|
||||
// C89 on GCC-compatible systems
|
||||
# define __CILKRTS_INLINE extern __inline__
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Functions marked as CILK_EXPORT_AND_INLINE have both
|
||||
* inline versions defined in the Cilk API, as well as
|
||||
* non-inlined versions that are exported (for
|
||||
* compatibility with previous versions that did not
|
||||
* inline the functions).
|
||||
*/
|
||||
#ifdef COMPILING_CILK_API_FUNCTIONS
|
||||
# define CILK_EXPORT_AND_INLINE CILK_EXPORT
|
||||
#else
|
||||
# define CILK_EXPORT_AND_INLINE __CILKRTS_INLINE
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Try to determine if compiler supports rvalue references.
|
||||
*/
|
||||
#if defined(__cplusplus) && !defined(__CILKRTS_RVALUE_REFERENCES)
|
||||
# if __cplusplus >= 201103L // C++11
|
||||
# define __CILKRTS_RVALUE_REFERENCES 1
|
||||
# elif defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
# define __CILKRTS_RVALUE_REFERENCES 1
|
||||
# elif __cplusplus >= 199711L && __cplusplus < 201103L
|
||||
// Compiler recognizes a language version prior to C++11
|
||||
# elif __INTEL_COMPILER == 1200 && defined(__STDC_HOSTED__)
|
||||
// Intel compiler version 12.0
|
||||
// __cplusplus has a non-standard definition. In the absence of a
|
||||
// proper definition, look for the C++0x macro, __STDC_HOSTED__.
|
||||
# define __CILKRTS_RVALUE_REFERENCES 1
|
||||
# elif __INTEL_COMPILER > 1200 && defined(CHAR16T)
|
||||
// Intel compiler version >= 12.1
|
||||
// __cplusplus has a non-standard definition. In the absence of a
|
||||
// proper definition, look for the Intel macro, CHAR16T
|
||||
# define __CILKRTS_RVALUE_REFERENCES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Include stdint.h to define the standard integer types.
|
||||
*
|
||||
* Unfortunately Microsoft doesn't provide stdint.h until Visual Studio 2010,
|
||||
* so use our own definitions until those are available
|
||||
*/
|
||||
|
||||
#if ! defined(_MSC_VER) || (_MSC_VER >= 1600)
|
||||
# include <stdint.h>
|
||||
#else
|
||||
# ifndef __MS_STDINT_TYPES_DEFINED__
|
||||
# define __MS_STDINT_TYPES_DEFINED__
|
||||
typedef signed char int8_t;
|
||||
typedef short int16_t;
|
||||
typedef int int32_t;
|
||||
typedef __int64 int64_t;
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
# endif /* __MS_STDINT_TYPES_DEFINED__ */
|
||||
#endif /* ! defined(_MSC_VER) || (_MSC_VER >= 1600) */
|
||||
|
||||
/**
|
||||
* @brief Application Binary Interface version of the Cilk runtime library.
|
||||
*
|
||||
* The ABI version is determined by the compiler used. An object file
|
||||
* compiled with a higher ABI version is not compatible with a library that is
|
||||
* compiled with a lower ABI version. An object file compiled with a lower
|
||||
* ABI version, however, can be used with a library compiled with a higher ABI
|
||||
* version unless otherwise stated.
|
||||
*/
|
||||
#ifndef __CILKRTS_ABI_VERSION
|
||||
# ifdef IN_CILK_RUNTIME
|
||||
# define __CILKRTS_ABI_VERSION 1
|
||||
# elif __INTEL_COMPILER > 1200
|
||||
// Intel compiler version >= 12.1
|
||||
# define __CILKRTS_ABI_VERSION 1
|
||||
# else
|
||||
// Compiler does not support ABI version 1
|
||||
// (Non-Intel compiler or Intel compiler prior to version 12.1).
|
||||
# define __CILKRTS_ABI_VERSION 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// These structs are exported because the inlining of
|
||||
// the internal version of API methods require a worker
|
||||
// structure as parameter.
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
/// Worker struct, exported for inlined API methods
|
||||
/// @ingroup api
|
||||
struct __cilkrts_worker;
|
||||
|
||||
/// Worker struct, exported for inlined API methods
|
||||
/// @ingroup api
|
||||
typedef struct __cilkrts_worker __cilkrts_worker;
|
||||
|
||||
/// Worker struct pointer, exported for inlined API methods
|
||||
/// @ingroup api
|
||||
typedef struct __cilkrts_worker *__cilkrts_worker_ptr;
|
||||
|
||||
|
||||
/// Fetch the worker out of TLS.
|
||||
CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker(void);
|
||||
|
||||
/// void *, defined to work around complaints from the compiler
|
||||
/// about using __declspec(nothrow) after the "void *" return type
|
||||
typedef void * __cilkrts_void_ptr;
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
|
||||
#if __CILKRTS_ABI_VERSION >= 1
|
||||
// Pedigree API is available only for compilers that use ABI version >= 1.
|
||||
|
||||
/** Pedigree information kept in the worker and stack frame.
|
||||
* @ingroup api
|
||||
*/
|
||||
typedef struct __cilkrts_pedigree
|
||||
{
|
||||
/** Rank at start of spawn helper. Saved rank for spawning functions */
|
||||
uint64_t rank;
|
||||
|
||||
/** Link to next in chain */
|
||||
const struct __cilkrts_pedigree *parent;
|
||||
} __cilkrts_pedigree;
|
||||
|
||||
#endif // __CILKRTS_ABI_VERSION >= 1
|
||||
|
||||
/// @}
|
||||
|
||||
#endif /* INCLUDED_CILK_COMMON */
|
1000
libcilkrts/include/cilk/holder.h
Normal file
1000
libcilkrts/include/cilk/holder.h
Normal file
File diff suppressed because it is too large
Load diff
172
libcilkrts/include/cilk/hyperobject_base.h
Normal file
172
libcilkrts/include/cilk/hyperobject_base.h
Normal file
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_HYPEROBJECT_BASE
|
||||
#define INCLUDED_CILK_HYPEROBJECT_BASE
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <cstdlib>
|
||||
# include <cstddef>
|
||||
#else
|
||||
# include <stdlib.h>
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
#include <cilk/common.h>
|
||||
|
||||
#if defined _WIN32 || defined _WIN64
|
||||
# if !defined CILK_STUB && !defined IN_CILK_RUNTIME
|
||||
/* bring in the Cilk library, which has definitions for some of these
|
||||
* functions. */
|
||||
# pragma comment(lib, "cilkrts")
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The __CILKRTS_STRAND_PURE attribute tells the compiler that the value
|
||||
* returned by 'func' for a given argument to 'func' will remain valid until
|
||||
* the next strand boundary (spawn or sync) or until the next call to a
|
||||
* function with the __CILKRTS_STRAND_STALE attribute using the same function
|
||||
* argument.
|
||||
*/
|
||||
#if 0 && defined __cilk && (defined __GNUC__ && !defined _WIN32) && defined __cilkartsrev
|
||||
# define __CILKRTS_STRAND_PURE(func) \
|
||||
func __attribute__((__cilk_hyper__("lookup")))
|
||||
# define __CILKRTS_STRAND_STALE(func) \
|
||||
func __attribute__((__cilk_hyper__("flush")))
|
||||
#else
|
||||
# define __CILKRTS_STRAND_PURE(func) func
|
||||
# define __CILKRTS_STRAND_STALE(func) func
|
||||
#endif
|
||||
|
||||
/*****************************************************************************
|
||||
* C runtime interface to the hyperobject subsystem
|
||||
*****************************************************************************/
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/* Callback function signatures. The 'r' argument always points to the
|
||||
* reducer itself and is commonly ignored. */
|
||||
typedef void (*cilk_c_reducer_reduce_fn_t)(void* r, void* lhs, void* rhs);
|
||||
typedef void (*cilk_c_reducer_identity_fn_t)(void* r, void* view);
|
||||
typedef void (*cilk_c_reducer_destroy_fn_t)(void* r, void* view);
|
||||
typedef void* (*cilk_c_reducer_allocate_fn_t)(void* r, __STDNS size_t bytes);
|
||||
typedef void (*cilk_c_reducer_deallocate_fn_t)(void* r, void* view);
|
||||
|
||||
/** Representation of the monoid */
|
||||
typedef struct cilk_c_monoid {
|
||||
cilk_c_reducer_reduce_fn_t reduce_fn;
|
||||
cilk_c_reducer_identity_fn_t identity_fn;
|
||||
cilk_c_reducer_destroy_fn_t destroy_fn;
|
||||
cilk_c_reducer_allocate_fn_t allocate_fn;
|
||||
cilk_c_reducer_deallocate_fn_t deallocate_fn;
|
||||
} cilk_c_monoid;
|
||||
|
||||
/** Base of the hyperobject */
|
||||
typedef struct __cilkrts_hyperobject_base
|
||||
{
|
||||
cilk_c_monoid __c_monoid;
|
||||
unsigned long long __flags;
|
||||
__STDNS ptrdiff_t __view_offset; /* offset (in bytes) to leftmost view */
|
||||
__STDNS size_t __view_size; /* Size of each view */
|
||||
} __cilkrts_hyperobject_base;
|
||||
|
||||
|
||||
#ifndef CILK_STUB
|
||||
|
||||
/* Library functions. */
|
||||
CILK_EXPORT
|
||||
void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key);
|
||||
CILK_EXPORT void __CILKRTS_STRAND_STALE(
|
||||
__cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key));
|
||||
CILK_EXPORT void* __CILKRTS_STRAND_PURE(
|
||||
__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key));
|
||||
|
||||
CILK_EXPORT
|
||||
void* __cilkrts_hyperobject_alloc(void* ignore, __STDNS size_t bytes);
|
||||
CILK_EXPORT
|
||||
void __cilkrts_hyperobject_dealloc(void* ignore, void* view);
|
||||
|
||||
/* No-op destroy function */
|
||||
CILK_EXPORT
|
||||
void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2);
|
||||
|
||||
|
||||
#else // CILK_STUB
|
||||
|
||||
// Programs compiled with CILK_STUB are not linked with the Cilk runtime
|
||||
// library, so they should not have external references to cilkrts functions.
|
||||
// Furthermore, they don't need the hyperobject functionality, so the
|
||||
// functions can be stubbed.
|
||||
|
||||
#define __cilkrts_hyperobject_create __cilkrts_hyperobject_create__stub
|
||||
__CILKRTS_INLINE
|
||||
void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key)
|
||||
{}
|
||||
|
||||
#define __cilkrts_hyperobject_destroy __cilkrts_hyperobject_destroy__stub
|
||||
__CILKRTS_INLINE
|
||||
void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key)
|
||||
{}
|
||||
|
||||
#define __cilkrts_hyperobject_lookup __cilkrts_hyperobject_lookup__stub
|
||||
__CILKRTS_INLINE
|
||||
void* __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key)
|
||||
{ return (char*)(key) + key->__view_offset; }
|
||||
|
||||
// Pointers to these functions are stored into monoids, so real functions
|
||||
// are needed.
|
||||
|
||||
#define __cilkrts_hyperobject_alloc __cilkrts_hyperobject_alloc__stub
|
||||
__CILKRTS_INLINE
|
||||
void* __cilkrts_hyperobject_alloc(void* ignore, __STDNS size_t bytes)
|
||||
{ assert(0); return __STDNS malloc(bytes); }
|
||||
|
||||
#define __cilkrts_hyperobject_dealloc __cilkrts_hyperobject_dealloc__stub
|
||||
__CILKRTS_INLINE
|
||||
void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
|
||||
{ assert(0); __STDNS free(view); }
|
||||
|
||||
#define __cilkrts_hyperobject_noop_destroy \
|
||||
__cilkrts_hyperobject_noop_destroy__stub
|
||||
__CILKRTS_INLINE
|
||||
void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
|
||||
{}
|
||||
|
||||
#endif
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif /* INCLUDED_CILK_HYPEROBJECT_BASE */
|
606
libcilkrts/include/cilk/metaprogramming.h
Normal file
606
libcilkrts/include/cilk/metaprogramming.h
Normal file
|
@ -0,0 +1,606 @@
|
|||
/* metaprogramming.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file metaprogramming.h
|
||||
*
|
||||
* @brief Defines metaprogramming utility classes used in the Cilk library.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
|
||||
#ifndef METAPROGRAMMING_H_INCLUDED
|
||||
#define METAPROGRAMMING_H_INCLUDED
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <functional>
|
||||
#include <new>
|
||||
#include <cstdlib>
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
|
||||
namespace cilk {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** Test if a class is empty.
|
||||
*
|
||||
* If @a Class is an empty (and therefore necessarily stateless) class, then
|
||||
* the “empty base-class optimization” guarantees that
|
||||
* `sizeof(check_for_empty_class<Class>) == sizeof(char)`. Conversely, if
|
||||
* `sizeof(check_for_empty_class<Class>) > sizeof(char)`, then @a Class is not
|
||||
* empty, and we must discriminate distinct instances of @a Class.
|
||||
*
|
||||
* Typical usage:
|
||||
*
|
||||
* // General definition of A<B> for non-empty B:
|
||||
* template <typename B, bool BIsEmpty = class_is_empty<B>::value> >
|
||||
* class A { ... };
|
||||
*
|
||||
* // Specialized definition of A<B> for empty B:
|
||||
* template <typename B>
|
||||
* class A<B, true> { ... };
|
||||
*
|
||||
* @tparam Class The class to be tested for emptiness.
|
||||
*
|
||||
* @result The `value` member will be `true` if @a Class is empty,
|
||||
* `false` otherwise.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <class Class>
|
||||
class class_is_empty {
|
||||
class check_for_empty_class : public Class
|
||||
{
|
||||
char m_data;
|
||||
public:
|
||||
// Declared but not defined
|
||||
check_for_empty_class();
|
||||
check_for_empty_class(const check_for_empty_class&);
|
||||
check_for_empty_class& operator=(const check_for_empty_class&);
|
||||
~check_for_empty_class();
|
||||
};
|
||||
public:
|
||||
|
||||
/** Constant is true if and only if @a Class is empty.
|
||||
*/
|
||||
static const bool value = (sizeof(check_for_empty_class) == sizeof(char));
|
||||
};
|
||||
|
||||
|
||||
/** Get the alignment of a type.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* align_of<double>::value == 8
|
||||
*
|
||||
* @tparam Tp The type whose alignment is to be computed.
|
||||
*
|
||||
* @result The `value` member of an instantiation of this class template
|
||||
* will hold the integral alignment requirement of @a Tp.
|
||||
*
|
||||
* @pre @a Tp shall be a complete type.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <typename Tp>
|
||||
struct align_of
|
||||
{
|
||||
private:
|
||||
struct imp {
|
||||
char m_padding;
|
||||
Tp m_val;
|
||||
|
||||
// The following declarations exist to suppress compiler-generated
|
||||
// definitions, in case @a Tp does not have a public default
|
||||
// constructor, copy constructor, or destructor.
|
||||
imp(const imp&); // Declared but not defined
|
||||
~imp(); // Declared but not defined
|
||||
};
|
||||
|
||||
public:
|
||||
/// The integral alignment requirement of @a Tp.
|
||||
static const std::size_t value = (sizeof(imp) - sizeof(Tp));
|
||||
};
|
||||
|
||||
|
||||
/** A class containing raw bytes with a specified alignment and size.
|
||||
*
|
||||
* An object of type `aligned_storage<S, A>` will have alignment `A` and
|
||||
* size at least `S`. Its contents will be uninitialized bytes.
|
||||
*
|
||||
* @tparam Size The required minimum size of the resulting class.
|
||||
* @tparam Alignment The required alignment of the resulting class.
|
||||
*
|
||||
* @pre @a Alignment shall be a power of 2 no greater then 64.
|
||||
*
|
||||
* @note This is implemented using the `CILK_ALIGNAS` macro, which uses
|
||||
* the non-standard, implementation-specific features
|
||||
* `__declspec(align(N))` on Windows, and
|
||||
* `__attribute__((__aligned__(N)))` on Unix. The `gcc` implementation
|
||||
* of `__attribute__((__aligned__(N)))` requires a numeric literal `N`
|
||||
* (_not_ an arbitrary compile-time constant expression). Therefore,
|
||||
* this class is implemented using specialization on the required
|
||||
* alignment.
|
||||
*
|
||||
* @note The template class is specialized only for the supported
|
||||
* alignments. An attempt to instantiate it for an unsupported
|
||||
* alignment will result in a compilation error.
|
||||
*/
|
||||
template <std::size_t Size, std::size_t Alignment>
|
||||
struct aligned_storage;
|
||||
|
||||
template<std::size_t Size> class aligned_storage<Size, 1>
|
||||
{ CILK_ALIGNAS( 1) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 2>
|
||||
{ CILK_ALIGNAS( 2) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 4>
|
||||
{ CILK_ALIGNAS( 4) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 8>
|
||||
{ CILK_ALIGNAS( 8) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 16>
|
||||
{ CILK_ALIGNAS(16) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 32>
|
||||
{ CILK_ALIGNAS(32) char m_bytes[Size]; };
|
||||
template<std::size_t Size> class aligned_storage<Size, 64>
|
||||
{ CILK_ALIGNAS(64) char m_bytes[Size]; };
|
||||
|
||||
|
||||
/** A buffer of uninitialized bytes with the same size and alignment as a
|
||||
* specified type.
|
||||
*
|
||||
* The class `storage_for_object<Type>` will have the same size and alignment
|
||||
* properties as `Type`, but it will contain only raw (uninitialized) bytes.
|
||||
* This allows the definition of a data member which can contain a `Type`
|
||||
* object which is initialized explicitly under program control, rather
|
||||
* than implicitly as part of the initialization of the containing class.
|
||||
* For example:
|
||||
*
|
||||
* class C {
|
||||
* storage_for_object<MemberClass> _member;
|
||||
* public:
|
||||
* C() ... // Does NOT initialize _member
|
||||
* void initialize(args)
|
||||
* { new (_member.pointer()) MemberClass(args); }
|
||||
* const MemberClass& member() const { return _member.object(); }
|
||||
* MemberClass& member() { return _member.object(); }
|
||||
*
|
||||
* @tparam Type The type whose size and alignment are to be reflected
|
||||
* by this class.
|
||||
*/
|
||||
template <typename Type>
|
||||
class storage_for_object :
|
||||
aligned_storage< sizeof(Type), align_of<Type>::value >
|
||||
{
|
||||
public:
|
||||
/// Return a typed reference to the buffer.
|
||||
const Type& object() const { return *reinterpret_cast<Type*>(this); }
|
||||
Type& object() { return *reinterpret_cast<Type*>(this); }
|
||||
};
|
||||
|
||||
|
||||
/** Get the functor class corresponding to a binary function type.
|
||||
*
|
||||
* The `binary_functor` template class class can be instantiated with a binary
|
||||
* functor class or with a real binary function, and will yield an equivalent
|
||||
* binary functor class class in either case.
|
||||
*
|
||||
* @tparam F A binary functor class, a binary function type, or a pointer to
|
||||
* binary function type.
|
||||
*
|
||||
* @result `binary_functor<F>::%type` will be the same as @a F if @a F is
|
||||
* a class. It will be a `std::pointer_to_binary_function` wrapper
|
||||
* if @a F is a binary function or binary function pointer type.
|
||||
* (It will _not_ necessarily be an `Adaptable Binary Function`
|
||||
* class, since @a F might be a non-adaptable binary functor
|
||||
* class.)
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <typename F>
|
||||
struct binary_functor {
|
||||
/// The binary functor class equivalent to @a F.
|
||||
typedef F type;
|
||||
};
|
||||
|
||||
/// @copydoc binary_functor
|
||||
/// Specialization for binary function.
|
||||
template <typename R, typename A, typename B>
|
||||
struct binary_functor<R(A,B)> {
|
||||
/// The binary functor class equivalent to @a F.
|
||||
typedef std::pointer_to_binary_function<A, B, R> type;
|
||||
};
|
||||
|
||||
/// @copydoc binary_functor
|
||||
/// Specialization for pointer to binary function.
|
||||
template <typename R, typename A, typename B>
|
||||
struct binary_functor<R(*)(A,B)> {
|
||||
/// The binary functor class equivalent to @a F.
|
||||
typedef std::pointer_to_binary_function<A, B, R> type;
|
||||
};
|
||||
|
||||
|
||||
/** Indirect binary function class with specified types.
|
||||
*
|
||||
* `typed_indirect_binary_function<F>` is an `Adaptable Binary Function` class
|
||||
* based on an existing binary functor class or binary function type @a F. If
|
||||
* @a F is a stateless class, then this class will be empty, and its
|
||||
* `operator()` will invoke @a F’s `operator()`. Otherwise, an object of this
|
||||
* class will hold a pointer to an object of type @a F, and will refer its
|
||||
* `operator()` calls to the pointed-to @a F object.
|
||||
*
|
||||
* That is, suppose that we have the declarations:
|
||||
*
|
||||
* F *p;
|
||||
* typed_indirect_binary_function<F, int, int, bool> ibf(p);
|
||||
*
|
||||
* Then:
|
||||
*
|
||||
* - `ibf(x, y) == (*p)(x, y)`.
|
||||
* - `ibf(x, y)` will not do a pointer dereference if `F` is an empty class.
|
||||
*
|
||||
* @note Just to repeat: if `F` is an empty class, then
|
||||
* `typed_indirect_binary_function\<F\>' is also an empty class.
|
||||
* This is critical for its use in the @ref min_max::view_base
|
||||
* "min/max reducer view classes", where it allows the view to
|
||||
* call a comparison functor in the monoid without actually
|
||||
* having to allocate a pointer in the view class when the
|
||||
* comparison class is empty.
|
||||
*
|
||||
* @note If you have an `Adaptable Binary Function` class or a binary
|
||||
* function type, then you can use the
|
||||
* @ref indirect_binary_function class, which derives the
|
||||
* argument and result types parameter type instead of requiring
|
||||
* you to specify them as template arguments.
|
||||
*
|
||||
* @tparam F A binary functor class, a binary function type, or a pointer to
|
||||
* binary function type.
|
||||
* @param A1 The first argument type.
|
||||
* @param A2 The second argument type.
|
||||
* @param R The result type.
|
||||
*
|
||||
* @see min_max::comparator_base
|
||||
* @see indirect_binary_function
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template < typename F
|
||||
, typename A1
|
||||
, typename A2
|
||||
, typename R
|
||||
, typename Functor = typename binary_functor<F>::type
|
||||
, bool FunctorIsEmpty = class_is_empty<Functor>::value
|
||||
>
|
||||
class typed_indirect_binary_function : std::binary_function<A1, A2, R>
|
||||
{
|
||||
const F* f;
|
||||
public:
|
||||
/// Constructor captures a pointer to the wrapped function.
|
||||
typed_indirect_binary_function(const F* f) : f(f) {}
|
||||
|
||||
/// Return the comparator pointer, or `NULL` if the comparator is stateless.
|
||||
const F* pointer() const { return f; }
|
||||
|
||||
/// Apply the pointed-to functor to the arguments.
|
||||
R operator()(const A1& a1, const A2& a2) const { return (*f)(a1, a2); }
|
||||
};
|
||||
|
||||
|
||||
/// @copydoc typed_indirect_binary_function
|
||||
/// Specialization for an empty functor class. (This is only possible if @a F
|
||||
/// itself is an empty class. If @a F is a function or pointer-to-function
|
||||
/// type, then the functor will contain a pointer.)
|
||||
template <typename F, typename A1, typename A2, typename R, typename Functor>
|
||||
class typed_indirect_binary_function<F, A1, A2, R, Functor, true> :
|
||||
std::binary_function<A1, A2, R>
|
||||
{
|
||||
public:
|
||||
/// Return `NULL` for the comparator pointer of a stateless comparator.
|
||||
const F* pointer() const { return 0; }
|
||||
|
||||
/// Constructor discards the pointer to a stateless functor class.
|
||||
typed_indirect_binary_function(const F* f) {}
|
||||
|
||||
/// Create an instance of the stateless functor class and apply it to the arguments.
|
||||
R operator()(const A1& a1, const A2& a2) const { return F()(a1, a2); }
|
||||
};
|
||||
|
||||
|
||||
/** Indirect binary function class with inferred types.
|
||||
*
|
||||
* This is identical to @ref typed_indirect_binary_function, except that it
|
||||
* derives the binary function argument and result types from the parameter
|
||||
* type @a F instead of taking them as additional template parameters. If @a F
|
||||
* is a class type, then it must be an `Adaptable Binary Function`.
|
||||
*
|
||||
* @see typed_indirect_binary_function
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <typename F, typename Functor = typename binary_functor<F>::type>
|
||||
class indirect_binary_function :
|
||||
typed_indirect_binary_function< F
|
||||
, typename Functor::first_argument_type
|
||||
, typename Functor::second_argument_type
|
||||
, typename Functor::result_type
|
||||
>
|
||||
{
|
||||
typedef typed_indirect_binary_function< F
|
||||
, typename Functor::first_argument_type
|
||||
, typename Functor::second_argument_type
|
||||
, typename Functor::result_type
|
||||
>
|
||||
base;
|
||||
public:
|
||||
indirect_binary_function(const F* f) : base(f) {} ///< Constructor
|
||||
};
|
||||
|
||||
|
||||
/** Choose a type based on a boolean constant.
|
||||
*
|
||||
* This metafunction is identical to C++11’s condition metafunction.
|
||||
* It needs to be here until we can reasonably assume that users will be
|
||||
* compiling with C++11.
|
||||
*
|
||||
* @tparam Cond A boolean constant.
|
||||
* @tparam IfTrue A type.
|
||||
* @tparam IfFalse A type.
|
||||
* @result The `type` member will be a typedef of @a IfTrue if @a Cond
|
||||
* is true, and a typedef of @a IfFalse if @a Cond is false.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <bool Cond, typename IfTrue, typename IfFalse>
|
||||
struct condition
|
||||
{
|
||||
typedef IfTrue type; ///< The type selected by the condition.
|
||||
};
|
||||
|
||||
/// @copydoc condition
|
||||
/// Specialization for @a Cond == `false`.
|
||||
template <typename IfTrue, typename IfFalse>
|
||||
struct condition<false, IfTrue, IfFalse>
|
||||
{
|
||||
typedef IfFalse type; ///< The type selected by the condition.
|
||||
};
|
||||
|
||||
|
||||
/** @def __CILKRTS_STATIC_ASSERT
|
||||
*
|
||||
* @brief Compile-time assertion.
|
||||
*
|
||||
* Causes a compilation error if a compile-time constant expression is false.
|
||||
*
|
||||
* @par Usage example.
|
||||
* This assertion is used in reducer_min_max.h to avoid defining
|
||||
* legacy reducer classes that would not be binary-compatible with the
|
||||
* same classes compiled with earlier versions of the reducer library.
|
||||
*
|
||||
* __CILKRTS_STATIC_ASSERT(
|
||||
* internal::class_is_empty< internal::binary_functor<Compare> >::value,
|
||||
* "cilk::reducer_max<Value, Compare> only works with an empty Compare class");
|
||||
*
|
||||
* @note In a C++11 compiler, this is just the language predefined
|
||||
* `static_assert` macro.
|
||||
*
|
||||
* @note In a non-C++11 compiler, the @a Msg string is not directly included
|
||||
* in the compiler error message, but it may appear if the compiler
|
||||
* prints the source line that the error occurred on.
|
||||
*
|
||||
* @param Cond The expression to test.
|
||||
* @param Msg A string explaining the failure.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
#if defined(__INTEL_CXX11_MODE__) || defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
# define __CILKRTS_STATIC_ASSERT(Cond, Msg) static_assert(Cond, Msg)
|
||||
#else
|
||||
# define __CILKRTS_STATIC_ASSERT(Cond, Msg) \
|
||||
typedef int __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \
|
||||
[::cilk::internal::static_assert_failure<(Cond)>::Success]
|
||||
|
||||
/// @cond internal
|
||||
template <bool> struct static_assert_failure { };
|
||||
template <> struct static_assert_failure<true> { enum { Success = 1 }; };
|
||||
|
||||
# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \
|
||||
__CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(__cilkrts_static_assert_, __LINE__)
|
||||
# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(a, b) \
|
||||
__CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b)
|
||||
# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) a ## b
|
||||
/// @endcond
|
||||
|
||||
#endif
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** @name Aligned heap management.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Implementation-specific aligned memory allocation function.
|
||||
*
|
||||
* @param size The minimum number of bytes to allocate.
|
||||
* @param alignment The required alignment (must be a power of 2).
|
||||
* @return The address of a block of memory of at least @a size
|
||||
* bytes. The address will be a multiple of @a alignment.
|
||||
* `NULL` if the allocation fails.
|
||||
*
|
||||
* @see deallocate_aligned()
|
||||
*/
|
||||
inline void* allocate_aligned(std::size_t size, std::size_t alignment)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
#if defined(ANDROID) || defined(__ANDROID__)
|
||||
return memalign(std::max(alignment, sizeof(void*)), size);
|
||||
#else
|
||||
void* ptr;
|
||||
return (posix_memalign(&ptr, std::max(alignment, sizeof(void*)), size) == 0) ? ptr : 0;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Implementation-specific aligned memory deallocation function.
|
||||
*
|
||||
* @param ptr A pointer which was returned by a call to alloc_aligned().
|
||||
*/
|
||||
inline void deallocate_aligned(void* ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
std::free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Class to allocate and guard an aligned pointer.
|
||||
*
|
||||
* A new_aligned_pointer object allocates aligned heap-allocated memory when
|
||||
* it is created, and automatically deallocates it when it is destroyed
|
||||
* unless its `ok()` function is called.
|
||||
*
|
||||
* @tparam T The type of the object to allocate on the heap. The allocated
|
||||
* will have the size and alignment of an object of type T.
|
||||
*/
|
||||
template <typename T>
|
||||
class new_aligned_pointer {
|
||||
void* m_ptr;
|
||||
public:
|
||||
/// Constructor allocates the pointer.
|
||||
new_aligned_pointer() :
|
||||
m_ptr(allocate_aligned(sizeof(T), internal::align_of<T>::value)) {}
|
||||
/// Destructor deallocates the pointer.
|
||||
~new_aligned_pointer() { if (m_ptr) deallocate_aligned(m_ptr); }
|
||||
/// Get the pointer.
|
||||
operator void*() { return m_ptr; }
|
||||
/// Return the pointer and release the guard.
|
||||
T* ok() {
|
||||
T* ptr = static_cast<T*>(m_ptr);
|
||||
m_ptr = 0;
|
||||
return ptr;
|
||||
}
|
||||
};
|
||||
|
||||
//@}
|
||||
|
||||
/// @endcond
|
||||
|
||||
} // namespace internal
|
||||
|
||||
//@{
|
||||
|
||||
/** Allocate an aligned data structure on the heap.
|
||||
*
|
||||
* `cilk::aligned_new<T>([args])` is equivalent to `new T([args])`, except
|
||||
* that it guarantees that the returned pointer will be at least as aligned
|
||||
* as the alignment requirements of type `T`.
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <typename T>
|
||||
T* aligned_new()
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T();
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
template <typename T, typename T1>
|
||||
T* aligned_new(const T1& x1)
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T(x1);
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
template <typename T, typename T1, typename T2>
|
||||
T* aligned_new(const T1& x1, const T2& x2)
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T(x1, x2);
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
template <typename T, typename T1, typename T2, typename T3>
|
||||
T* aligned_new(const T1& x1, const T2& x2, const T3& x3)
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T(x1, x2, x3);
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
template <typename T, typename T1, typename T2, typename T3, typename T4>
|
||||
T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4)
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T(x1, x2, x3, x4);
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
template <typename T, typename T1, typename T2, typename T3, typename T4, typename T5>
|
||||
T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5)
|
||||
{
|
||||
internal::new_aligned_pointer<T> ptr;
|
||||
new (ptr) T(x1, x2, x3, x4, x5);
|
||||
return ptr.ok();
|
||||
}
|
||||
|
||||
//@}
|
||||
|
||||
|
||||
/** Deallocate an aligned data structure on the heap.
|
||||
*
|
||||
* `cilk::aligned_delete(ptr)` is equivalent to `delete ptr`, except that it
|
||||
* operates on a pointer that was allocated by aligned_new().
|
||||
*
|
||||
* @ingroup common
|
||||
*/
|
||||
template <typename T>
|
||||
void aligned_delete(const T* ptr)
|
||||
{
|
||||
ptr->~T();
|
||||
internal::deallocate_aligned((void*)ptr);
|
||||
}
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // METAPROGRAMMING_H_INCLUDED
|
1900
libcilkrts/include/cilk/reducer.h
Normal file
1900
libcilkrts/include/cilk/reducer.h
Normal file
File diff suppressed because it is too large
Load diff
37
libcilkrts/include/cilk/reducer_file.h
Normal file
37
libcilkrts/include/cilk/reducer_file.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
1127
libcilkrts/include/cilk/reducer_list.h
Normal file
1127
libcilkrts/include/cilk/reducer_list.h
Normal file
File diff suppressed because it is too large
Load diff
46
libcilkrts/include/cilk/reducer_max.h
Normal file
46
libcilkrts/include/cilk/reducer_max.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* reducer_max.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_max.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel maximum reductions.
|
||||
*
|
||||
* @ingroup ReducersMinMax
|
||||
*
|
||||
* @see ReducersMinMax
|
||||
*/
|
||||
|
||||
#include "reducer_min_max.h"
|
46
libcilkrts/include/cilk/reducer_min.h
Normal file
46
libcilkrts/include/cilk/reducer_min.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* reducer_min.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_min.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel minimum reductions.
|
||||
*
|
||||
* @ingroup ReducersMinMax
|
||||
*
|
||||
* @see ReducersMinMax
|
||||
*/
|
||||
|
||||
#include "reducer_min_max.h"
|
3606
libcilkrts/include/cilk/reducer_min_max.h
Normal file
3606
libcilkrts/include/cilk/reducer_min_max.h
Normal file
File diff suppressed because it is too large
Load diff
690
libcilkrts/include/cilk/reducer_opadd.h
Normal file
690
libcilkrts/include/cilk/reducer_opadd.h
Normal file
|
@ -0,0 +1,690 @@
|
|||
/* reducer_opadd.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_opadd.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel addition reductions.
|
||||
*
|
||||
* @ingroup ReducersAdd
|
||||
*
|
||||
* @see ReducersAdd
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OPADD_H_INCLUDED
|
||||
#define REDUCER_OPADD_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
|
||||
/** @defgroup ReducersAdd Addition Reducers
|
||||
*
|
||||
* Addition reducers allow the computation of the sum of a set of values in
|
||||
* parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file `reducers.md`, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redopadd_usage Usage Example
|
||||
*
|
||||
* cilk::reducer< cilk::op_add<int> > r;
|
||||
* cilk_for (int i = 0; i != N; ++i) {
|
||||
* *r += a[i];
|
||||
* }
|
||||
* return r.get_value();
|
||||
*
|
||||
* @section redopadd_monoid The Monoid
|
||||
*
|
||||
* @subsection redopadd_monoid_values Value Set
|
||||
*
|
||||
* The value set of an addition reducer is the set of values of `Type`, which
|
||||
* is expected to be a builtin numeric type (or something like it, such as
|
||||
* `std::complex`).
|
||||
*
|
||||
* @subsection redopadd_monoid_operator Operator
|
||||
*
|
||||
* The operator of an addition reducer is the addition operator, defined by
|
||||
* the “`+`” binary operator on `Type`.
|
||||
*
|
||||
* @subsection redopadd_monoid_identity Identity
|
||||
*
|
||||
* The identity value of the reducer is the numeric value “`0`”. This is
|
||||
* expected to be the value of the default constructor `Type()`.
|
||||
*
|
||||
* @section redopadd_operations Operations
|
||||
*
|
||||
* @subsection redopadd_constructors Constructors
|
||||
*
|
||||
* reducer() // identity
|
||||
* reducer(const Type& value)
|
||||
* reducer(move_in(Type& variable))
|
||||
*
|
||||
* @subsection redopadd_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const Type& value)
|
||||
* const Type& = r.get_value() const
|
||||
* r.move_in(Type& variable)
|
||||
* r.move_out(Type& variable)
|
||||
*
|
||||
* @subsection redopadd_initial Initial Values
|
||||
*
|
||||
* If an addition reducer is constructed without an explicit initial value,
|
||||
* then its initial value will be its identity value, as long as `Type`
|
||||
* satisfies the requirements of @ref redopadd_types.
|
||||
*
|
||||
* @subsection redopadd_view_ops View Operations
|
||||
*
|
||||
* *r += a
|
||||
* *r -= a
|
||||
* ++*r
|
||||
* --*r
|
||||
* (*r)++
|
||||
* (*r)--
|
||||
* *r = *r + a
|
||||
* *r = *r - a
|
||||
* *r = *r ± a1 ± a2 … ± an
|
||||
*
|
||||
* The post-increment and post-decrement operations do not return a value. (If
|
||||
* they did, they would expose the value contained in the view, which is
|
||||
* non-deterministic in the middle of a reduction.)
|
||||
*
|
||||
* Note that subtraction operations are allowed on an addition reducer because
|
||||
* subtraction is equivalent to addition with a negated operand. It is true
|
||||
* that `(x - y) - z` is not equivalent to `x - (y - z)`, but
|
||||
* `(x + (-y)) + (-z)` _is_ equivalent to `x + ((-y) + (-z))`.
|
||||
*
|
||||
* @section redopadd_floating_point Issues with Floating-Point Types
|
||||
*
|
||||
* Because of precision and round-off issues, floating-point addition is not
|
||||
* really associative. For example, `(1e30 + -1e30) + 1 == 1`, but
|
||||
* `1e30 + (-1e30 + 1) == 0`.
|
||||
*
|
||||
* In many cases, this won’t matter, but computations which have been
|
||||
* carefully ordered to control round-off errors may not deal well with
|
||||
* being reassociated. In general, you should be sure to understand the
|
||||
* floating-point behavior of your program before doing any transformation
|
||||
* that will reassociate its computations.
|
||||
*
|
||||
* @section redopadd_types Type and Operator Requirements
|
||||
*
|
||||
* `Type` must be `Copy Constructible`, `Default Constructible`, and
|
||||
* `Assignable`.
|
||||
*
|
||||
* The operator “`+=`” must be defined on `Type`, with `x += a` having the
|
||||
* same meaning as `x = x + a`. In addition, if the code uses the “`-=`”,
|
||||
* pre-increment, post-increment, pre-decrement, or post-decrement operators,
|
||||
* then the corresponding operators must be defined on `Type`.
|
||||
*
|
||||
* The expression `Type()` must be a valid expression which yields the
|
||||
* identity value (the value of `Type` whose numeric value is zero).
|
||||
*
|
||||
* @section redopadd_in_c Addition Reducers in C
|
||||
*
|
||||
* The @ref CILK_C_REDUCER_OPADD and @ref CILK_C_REDUCER_OPADD_TYPE macros can
|
||||
* be used to do addition reductions in C. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPADD(r, double, 0);
|
||||
* CILK_C_REGISTER_REDUCER(r);
|
||||
* cilk_for(int i = 0; i != n; ++i) {
|
||||
* REDUCER_VIEW(r) += a[i];
|
||||
* }
|
||||
* CILK_C_UNREGISTER_REDUCER(r);
|
||||
* printf("The sum of the elements of a is %f\n", REDUCER_VIEW(r));
|
||||
*
|
||||
* See @ref reducers_c_predefined.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** The addition reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_add<Type> >`. It holds the accumulator variable
|
||||
* for the reduction, and allows only addition and subtraction operations to
|
||||
* be performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `+=` operation would be used in an expression like `*r += a`, where
|
||||
* `r` is an op_add reducer variable.
|
||||
*
|
||||
* @tparam Type The type of the contained accumulator variable. This will
|
||||
* be the value type of a monoid_with_view that is
|
||||
* instantiated with this view.
|
||||
*
|
||||
* @see ReducersAdd
|
||||
* @see op_add
|
||||
*
|
||||
* @ingroup ReducersAdd
|
||||
*/
|
||||
template <typename Type>
|
||||
class op_add_view : public scalar_view<Type>
|
||||
{
|
||||
typedef scalar_view<Type> base;
|
||||
|
||||
public:
|
||||
/** Class to represent the right-hand side of
|
||||
* `*reducer = *reducer ± value`.
|
||||
*
|
||||
* The only assignment operator for the op_add_view class takes an
|
||||
* rhs_proxy as its operand. This results in the syntactic restriction
|
||||
* that the only expressions that can be assigned to an op_add_view are
|
||||
* ones which generate an rhs_proxy — that is, expressions of the form
|
||||
* `op_add_view ± value ... ± value`.
|
||||
*
|
||||
* @warning
|
||||
* The lhs and rhs views in such an assignment must be the same;
|
||||
* otherwise, the behavior will be undefined. (I.e., `v1 = v1 + x` is
|
||||
* legal; `v1 = v2 + x` is illegal.) This condition will be checked with a
|
||||
* runtime assertion when compiled in debug mode.
|
||||
*
|
||||
* @see op_add_view
|
||||
*/
|
||||
class rhs_proxy {
|
||||
friend class op_add_view;
|
||||
|
||||
const op_add_view* m_view;
|
||||
Type m_value;
|
||||
|
||||
// Constructor is invoked only from op_add_view::operator+() and
|
||||
// op_add_view::operator-().
|
||||
//
|
||||
rhs_proxy(const op_add_view* view, const Type& value) :
|
||||
m_view(view), m_value(value) {}
|
||||
|
||||
rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
|
||||
rhs_proxy(); // Disable default constructor
|
||||
|
||||
public:
|
||||
//@{
|
||||
/** Add or subtract an additional rhs value. If `v` is an op_add_view
|
||||
* and `a1` is a value, then the expression `v + a1` invokes the view’s
|
||||
* `operator+()` to create an rhs_proxy for `(v, a1)`; then
|
||||
* `v + a1 + a2` invokes the rhs_proxy’s `operator+()` to create a new
|
||||
* rhs_proxy for `(v, a1+a2)`. This allows the right-hand side of an
|
||||
* assignment to be not just `view ± value`, but
|
||||
* `view ± value ± value ... ± value`. The effect is that
|
||||
*
|
||||
* v = v ± a1 ± a2 ... ± an;
|
||||
*
|
||||
* is evaluated as
|
||||
*
|
||||
* v = v ± (±a1 ± a2 ... ± an);
|
||||
*/
|
||||
rhs_proxy& operator+(const Type& x) { m_value += x; return *this; }
|
||||
rhs_proxy& operator-(const Type& x) { m_value -= x; return *this; }
|
||||
//@}
|
||||
};
|
||||
|
||||
|
||||
/** Default/identity constructor. This constructor initializes the
|
||||
* contained value to `Type()`, which is expected to be the identity value
|
||||
* for addition on `Type`.
|
||||
*/
|
||||
op_add_view() : base() {}
|
||||
|
||||
/** Construct with a specified initial value.
|
||||
*/
|
||||
explicit op_add_view(const Type& v) : base(v) {}
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_add monoid to combine the views
|
||||
* of two strands when the right strand merges with the left one. It adds
|
||||
* the value contained in the right-strand view to the value contained in
|
||||
* the left-strand view, and leaves the value in the right-strand view
|
||||
* undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_add monoid to implement the monoid
|
||||
* reduce operation.
|
||||
*/
|
||||
void reduce(op_add_view* right) { this->m_value += right->m_value; }
|
||||
|
||||
/** @name Accumulator variable updates.
|
||||
*
|
||||
* These functions support the various syntaxes for incrementing or
|
||||
* decrementing the accumulator variable contained in the view.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Increment the accumulator variable by @a x.
|
||||
*/
|
||||
op_add_view& operator+=(const Type& x) { this->m_value += x; return *this; }
|
||||
|
||||
/** Decrement the accumulator variable by @a x.
|
||||
*/
|
||||
op_add_view& operator-=(const Type& x) { this->m_value -= x; return *this; }
|
||||
|
||||
/** Pre-increment.
|
||||
*/
|
||||
op_add_view& operator++() { ++this->m_value; return *this; }
|
||||
|
||||
/** Post-increment.
|
||||
*
|
||||
* @note Conventionally, post-increment operators return the old value
|
||||
* of the incremented variable. However, reducer views do not
|
||||
* expose their contained values, so `view++` does not have a
|
||||
* return value.
|
||||
*/
|
||||
void operator++(int) { this->m_value++; }
|
||||
|
||||
/** Pre-decrement.
|
||||
*/
|
||||
op_add_view& operator--() { --this->m_value; return *this; }
|
||||
|
||||
/** Post-decrement.
|
||||
*
|
||||
* @note Conventionally, post-decrement operators return the old value
|
||||
* of the decremented variable. However, reducer views do not
|
||||
* expose their contained values, so `view--` does not have a
|
||||
* return value.
|
||||
*/
|
||||
void operator--(int) { this->m_value--; }
|
||||
|
||||
/** Create an object representing `*this + x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator+(const Type& x) const { return rhs_proxy(this, x); }
|
||||
|
||||
/** Create an object representing `*this - x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator-(const Type& x) const { return rhs_proxy(this, -x); }
|
||||
|
||||
/** Assign the result of a `view ± value` expression to the view. Note that
|
||||
* this is the only assignment operator for this class.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
op_add_view& operator=(const rhs_proxy& rhs) {
|
||||
__CILKRTS_ASSERT(this == rhs.m_view);
|
||||
this->m_value += rhs.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
|
||||
/** Monoid class for addition reductions. Instantiate the cilk::reducer
|
||||
* template class with an op_add monoid to create an addition reducer class.
|
||||
* For example, to compute
|
||||
* the sum of a set of `int` values:
|
||||
*
|
||||
* cilk::reducer< cilk::op_add<int> > r;
|
||||
*
|
||||
* @tparam Type The reducer value type.
|
||||
* @tparam Align If `false` (the default), reducers instantiated on this
|
||||
* monoid will be naturally aligned (the Cilk library 1.0
|
||||
* behavior). If `true`, reducers instantiated on this monoid
|
||||
* will be cache-aligned for binary compatibility with
|
||||
* reducers in Cilk library version 0.9.
|
||||
*
|
||||
* @see ReducersAdd
|
||||
* @see op_add_view
|
||||
*
|
||||
* @ingroup ReducersAdd
|
||||
*/
|
||||
template <typename Type, bool Align = false>
|
||||
struct op_add : public monoid_with_view<op_add_view<Type>, Align> {};
|
||||
|
||||
/** **Deprecated** addition reducer wrapper class.
|
||||
*
|
||||
* reducer_opadd is the same as @ref reducer<@ref op_add>, except that
|
||||
* reducer_opadd is a proxy for the contained view, so that accumulator
|
||||
* variable update operations can be applied directly to the reducer. For
|
||||
* example, a value is added to a `reducer<%op_add>` with `*r += a`, but a
|
||||
* value can be added to a `%reducer_opadd` with `r += a`.
|
||||
*
|
||||
* @deprecated Users are strongly encouraged to use `reducer<monoid>`
|
||||
* reducers rather than the old wrappers like reducer_opadd.
|
||||
* The `reducer<monoid>` reducers show the reducer/monoid/view
|
||||
* architecture more clearly, are more consistent in their
|
||||
* implementation, and present a simpler model for new
|
||||
* user-implemented reducers.
|
||||
*
|
||||
* @note Implicit conversions are provided between `%reducer_opadd`
|
||||
* and `reducer<%op_add>`. This allows incremental code
|
||||
* conversion: old code that used `%reducer_opadd` can pass a
|
||||
* `%reducer_opadd` to a converted function that now expects a
|
||||
* pointer or reference to a `reducer<%op_add>`, and vice
|
||||
* versa.
|
||||
*
|
||||
* @tparam Type The value type of the reducer.
|
||||
*
|
||||
* @see op_add
|
||||
* @see reducer
|
||||
* @see ReducersAdd
|
||||
*
|
||||
* @ingroup ReducersAdd
|
||||
*/
|
||||
template <typename Type>
|
||||
class reducer_opadd : public reducer< op_add<Type, true> >
|
||||
{
|
||||
typedef reducer< op_add<Type, true> > base;
|
||||
using base::view;
|
||||
|
||||
public:
|
||||
/// The view type for the reducer.
|
||||
typedef typename base::view_type view_type;
|
||||
|
||||
/// The view’s rhs proxy type.
|
||||
typedef typename view_type::rhs_proxy rhs_proxy;
|
||||
|
||||
/// The view type for the reducer.
|
||||
typedef view_type View;
|
||||
|
||||
/// The monoid type for the reducer.
|
||||
typedef typename base::monoid_type Monoid;
|
||||
|
||||
/** @name Constructors
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Default (identity) constructor.
|
||||
*
|
||||
* Constructs the wrapper with the default initial value of `Type()`.
|
||||
*/
|
||||
reducer_opadd() {}
|
||||
|
||||
/** Value constructor.
|
||||
*
|
||||
* Constructs the wrapper with a specified initial value.
|
||||
*/
|
||||
explicit reducer_opadd(const Type& initial_value) : base(initial_value) {}
|
||||
|
||||
//@}
|
||||
|
||||
/** @name Forwarded functions
|
||||
* @details Functions that update the contained accumulator variable are
|
||||
* simply forwarded to the contained @ref op_add_view. */
|
||||
//@{
|
||||
|
||||
/// @copydoc op_add_view::operator+=(const Type&)
|
||||
reducer_opadd& operator+=(const Type& x) { view() += x; return *this; }
|
||||
|
||||
/// @copydoc op_add_view::operator-=(const Type&)
|
||||
reducer_opadd& operator-=(const Type& x) { view() -= x; return *this; }
|
||||
|
||||
/// @copydoc op_add_view::operator++()
|
||||
reducer_opadd& operator++() { ++view(); return *this; }
|
||||
|
||||
/// @copydoc op_add_view::operator++(int)
|
||||
void operator++(int) { view()++; }
|
||||
|
||||
/// @copydoc op_add_view::operator-\-()
|
||||
reducer_opadd& operator--() { --view(); return *this; }
|
||||
|
||||
/// @copydoc op_add_view::operator-\-(int)
|
||||
void operator--(int) { view()--; }
|
||||
|
||||
// The legacy definitions of reducer_opadd::operator+() and
|
||||
// reducer_opadd::operator-() have different behavior and a different
|
||||
// return type than this definition. The legacy version is defined as a
|
||||
// member function, so this new version is defined as a free function to
|
||||
// give it a different signature, so that they won’t end up sharing a
|
||||
// single object file entry.
|
||||
|
||||
/// @copydoc op_add_view::operator+(const Type&) const
|
||||
friend rhs_proxy operator+(const reducer_opadd& r, const Type& x)
|
||||
{
|
||||
return r.view() + x;
|
||||
}
|
||||
/// @copydoc op_add_view::operator-(const Type&) const
|
||||
friend rhs_proxy operator-(const reducer_opadd& r, const Type& x)
|
||||
{
|
||||
return r.view() - x;
|
||||
}
|
||||
/// @copydoc op_add_view::operator=(const rhs_proxy&)
|
||||
reducer_opadd& operator=(const rhs_proxy& temp)
|
||||
{
|
||||
view() = temp;
|
||||
return *this;
|
||||
}
|
||||
//@}
|
||||
|
||||
/** @name Dereference
|
||||
* @details Dereferencing a wrapper is a no-op. It simply returns the
|
||||
* wrapper. Combined with the rule that the wrapper forwards view
|
||||
* operations to its contained view, this means that view operations can
|
||||
* be written the same way on reducers and wrappers, which is convenient
|
||||
* for incrementally converting old code using wrappers to use reducers
|
||||
* instead. That is:
|
||||
*
|
||||
* reducer< op_add<int> > r;
|
||||
* *r += a; // *r returns the view
|
||||
* // operator += is a view member function
|
||||
*
|
||||
* reducer_opadd<int> w;
|
||||
* *w += a; // *w returns the wrapper
|
||||
* // operator += is a wrapper member function that
|
||||
* // calls the corresponding view function
|
||||
*/
|
||||
//@{
|
||||
reducer_opadd& operator*() { return *this; }
|
||||
reducer_opadd const& operator*() const { return *this; }
|
||||
|
||||
reducer_opadd* operator->() { return this; }
|
||||
reducer_opadd const* operator->() const { return this; }
|
||||
//@}
|
||||
|
||||
/** @name Upcast
|
||||
* @details In Cilk library 0.9, reducers were always cache-aligned. In
|
||||
* library 1.0, reducer cache alignment is optional. By default, reducers
|
||||
* are unaligned (i.e., just naturally aligned), but legacy wrappers
|
||||
* inherit from cache-aligned reducers for binary compatibility.
|
||||
*
|
||||
* This means that a wrapper will automatically be upcast to its aligned
|
||||
* reducer base class. The following conversion operators provide
|
||||
* pseudo-upcasts to the corresponding unaligned reducer class.
|
||||
*/
|
||||
//@{
|
||||
operator reducer< op_add<Type, false> >& ()
|
||||
{
|
||||
return *reinterpret_cast< reducer< op_add<Type, false> >* >(this);
|
||||
}
|
||||
operator const reducer< op_add<Type, false> >& () const
|
||||
{
|
||||
return *reinterpret_cast< const reducer< op_add<Type, false> >* >(this);
|
||||
}
|
||||
//@}
|
||||
};
|
||||
|
||||
/// @cond internal
|
||||
/** Metafunction specialization for reducer conversion.
|
||||
*
|
||||
* This specialization of the @ref legacy_reducer_downcast template class
|
||||
* defined in reducer.h causes the `reducer< op_add<Type> >` class to have an
|
||||
* `operator reducer_opadd<Type>& ()` conversion operator that statically
|
||||
* downcasts the `reducer<op_add>` to the corresponding `reducer_opadd` type.
|
||||
* (The reverse conversion, from `reducer_opadd` to `reducer<op_add>`, is just
|
||||
* an upcast, which is provided for free by the language.)
|
||||
*
|
||||
* @ingroup ReducersAdd
|
||||
*/
|
||||
template <typename Type, bool Align>
|
||||
struct legacy_reducer_downcast<reducer<op_add<Type, Align> > >
|
||||
{
|
||||
typedef reducer_opadd<Type> type;
|
||||
};
|
||||
/// @endcond
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
|
||||
/** @ingroup ReducersAdd
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name C Language Reducer Macros
|
||||
*
|
||||
* These macros are used to declare and work with numeric op_add reducers in
|
||||
* C code.
|
||||
*
|
||||
* @see @ref page_reducers_in_c
|
||||
*/
|
||||
//@{
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Opadd reducer type name.
|
||||
*
|
||||
* This macro expands into the identifier which is the name of the op_add
|
||||
* reducer type for a specified numeric type.
|
||||
*
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersAdd
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPADD_TYPE(tn) \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn)
|
||||
|
||||
/** Declare an op_add reducer object.
|
||||
*
|
||||
* This macro expands into a declaration of an op_add reducer object for a
|
||||
* specified numeric type. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPADD(my_reducer, double, 0.0);
|
||||
*
|
||||
* @param obj The variable name to be used for the declared reducer object.
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
* @param v The initial value for the reducer. (A value which can be
|
||||
* assigned to the numeric type represented by @a tn.)
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersAdd
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPADD(obj,tn,v) \
|
||||
CILK_C_REDUCER_OPADD_TYPE(tn) obj = \
|
||||
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opadd_reduce_,tn), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opadd_identity_,tn), \
|
||||
__cilkrts_hyperobject_noop_destroy, v)
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** Declare the op_add reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into external function declarations for functions which
|
||||
* implement the reducer functionality for the op_add reducer type for a
|
||||
* specified numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPADD_DECLARATION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r); \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn);
|
||||
|
||||
/** Define the op_add reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into function definitions for functions which implement
|
||||
* the reducer functionality for the op_add reducer type for a specified
|
||||
* numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPADD_DEFINITION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r) \
|
||||
{ *(t*)l += *(t*)r; } \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn) \
|
||||
{ *(t*)v = 0; }
|
||||
|
||||
//@{
|
||||
/** @def CILK_C_REDUCER_OPADD_INSTANCE
|
||||
* @brief Declare or define implementation functions for a reducer type.
|
||||
*
|
||||
* In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
|
||||
* will be defined, and this macro will generate reducer implementation
|
||||
* functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined,
|
||||
* and this macro will expand into external declarations for the functions.
|
||||
*/
|
||||
#ifdef CILK_C_DEFINE_REDUCERS
|
||||
# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPADD_DEFINITION(t,tn)
|
||||
#else
|
||||
# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPADD_DECLARATION(t,tn)
|
||||
#endif
|
||||
//@}
|
||||
|
||||
/* Declare or define an instance of the reducer type and its functions for each
|
||||
* numeric type.
|
||||
*/
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(char, char)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned char, uchar)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(signed char, schar)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(wchar_t, wchar_t)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(short, short)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned short, ushort)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(int, int)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, uint)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, unsigned) /* alternate name */
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(long, long)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned long, ulong)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(long long, longlong)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long, ulonglong)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(float, float)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(double, double)
|
||||
CILK_C_REDUCER_OPADD_INSTANCE(long double, longdouble)
|
||||
|
||||
//@endcond
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* REDUCER_OPADD_H_INCLUDED */
|
604
libcilkrts/include/cilk/reducer_opand.h
Normal file
604
libcilkrts/include/cilk/reducer_opand.h
Normal file
|
@ -0,0 +1,604 @@
|
|||
/* reducer_opand.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_opand.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel bitwise and reductions.
|
||||
*
|
||||
* @ingroup ReducersAnd
|
||||
*
|
||||
* @see ReducersAnd
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OPAND_H_INCLUDED
|
||||
#define REDUCER_OPAND_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
|
||||
/** @defgroup ReducersAnd Bitwise And Reducers
|
||||
*
|
||||
* Bitwise and reducers allow the computation of the bitwise and of a set of
|
||||
* values in parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file `reducers.md`, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redopand_usage Usage Example
|
||||
*
|
||||
* cilk::reducer< cilk::op_and<unsigned> > r;
|
||||
* cilk_for (int i = 0; i != N; ++i) {
|
||||
* *r &= a[i];
|
||||
* }
|
||||
* unsigned result;
|
||||
* r.move_out(result);
|
||||
*
|
||||
* @section redopand_monoid The Monoid
|
||||
*
|
||||
* @subsection redopand_monoid_values Value Set
|
||||
*
|
||||
* The value set of a bitwise and reducer is the set of values of `Type`,
|
||||
* which is expected to be a builtin integer type which has a representation
|
||||
* as a sequence of bits (or something like it, such as `bool` or
|
||||
* `std::bitset`).
|
||||
*
|
||||
* @subsection redopand_monoid_operator Operator
|
||||
*
|
||||
* The operator of a bitwise and reducer is the bitwise and operator, defined
|
||||
* by the “`&`” binary operator on `Type`.
|
||||
*
|
||||
* @subsection redopand_monoid_identity Identity
|
||||
*
|
||||
* The identity value of the reducer is the value whose representation
|
||||
* contains all 1-bits. This is expected to be the value of the expression
|
||||
* `~Type()` (i.e., the bitwise negation operator applied to the default value
|
||||
* of the value type).
|
||||
*
|
||||
* @section redopand_operations Operations
|
||||
*
|
||||
* @subsection redopand_constructors Constructors
|
||||
*
|
||||
* reducer() // identity
|
||||
* reducer(const Type& value)
|
||||
* reducer(move_in(Type& variable))
|
||||
*
|
||||
* @subsection redopand_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const Type& value)
|
||||
* const Type& = r.get_value() const
|
||||
* r.move_in(Type& variable)
|
||||
* r.move_out(Type& variable)
|
||||
*
|
||||
* @subsection redopand_initial Initial Values
|
||||
*
|
||||
* If a bitwise and reducer is constructed without an explicit initial value,
|
||||
* then its initial value will be its identity value, as long as `Type`
|
||||
* satisfies the requirements of @ref redopand_types.
|
||||
*
|
||||
* @subsection redopand_view_ops View Operations
|
||||
*
|
||||
* *r &= a
|
||||
* *r = *r & a
|
||||
* *r = *r & a1 & a2 … & an
|
||||
*
|
||||
* @section redopand_types Type and Operator Requirements
|
||||
*
|
||||
* `Type` must be `Copy Constructible`, `Default Constructible`, and
|
||||
* `Assignable`.
|
||||
*
|
||||
* The operator “`&=`” must be defined on `Type`, with `x &= a` having the
|
||||
* same meaning as `x = x & a`.
|
||||
*
|
||||
* The expression `~ Type()` must be a valid expression which yields the
|
||||
* identity value (the value of `Type` whose representation consists of all
|
||||
* 1-bits).
|
||||
*
|
||||
* @section redopand_in_c Bitwise And Reducers in C
|
||||
*
|
||||
* The @ref CILK_C_REDUCER_OPAND and @ref CILK_C_REDUCER_OPAND_TYPE macros can
|
||||
* be used to do bitwise and reductions in C. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPAND(r, uint, ~0);
|
||||
* CILK_C_REGISTER_REDUCER(r);
|
||||
* cilk_for(int i = 0; i != n; ++i) {
|
||||
* REDUCER_VIEW(r) &= a[i];
|
||||
* }
|
||||
* CILK_C_UNREGISTER_REDUCER(r);
|
||||
* printf("The bitwise AND of the elements of a is %x\n", REDUCER_VIEW(r));
|
||||
*
|
||||
* See @ref reducers_c_predefined.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** The bitwise and reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_and<Type> >`. It holds the accumulator variable
|
||||
* for the reduction, and allows only `and` operations to be performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `&=` operation would be used in an expression like `*r &= a`, where
|
||||
* `r` is an opmod reducer variable.
|
||||
*
|
||||
* @tparam Type The type of the contained accumulator variable. This will
|
||||
* be the value type of a monoid_with_view that is
|
||||
* instantiated with this view.
|
||||
*
|
||||
* @see ReducersAnd
|
||||
* @see op_and
|
||||
*
|
||||
* @ingroup ReducersAnd
|
||||
*/
|
||||
template <typename Type>
|
||||
class op_and_view : public scalar_view<Type>
|
||||
{
|
||||
typedef scalar_view<Type> base;
|
||||
|
||||
public:
|
||||
/** Class to represent the right-hand side of `*reducer = *reducer & value`.
|
||||
*
|
||||
* The only assignment operator for the op_and_view class takes an
|
||||
* rhs_proxy as its operand. This results in the syntactic restriction
|
||||
* that the only expressions that can be assigned to an op_and_view are
|
||||
* ones which generate an rhs_proxy — that is, expressions of the form
|
||||
* `op_and_view & value ... & value`.
|
||||
*
|
||||
* @warning
|
||||
* The lhs and rhs views in such an assignment must be the same;
|
||||
* otherwise, the behavior will be undefined. (I.e., `v1 = v1 & x` is
|
||||
* legal; `v1 = v2 & x` is illegal.) This condition will be checked with
|
||||
* a runtime assertion when compiled in debug mode.
|
||||
*
|
||||
* @see op_and_view
|
||||
*/
|
||||
class rhs_proxy {
|
||||
private:
|
||||
friend class op_and_view;
|
||||
|
||||
const op_and_view* m_view;
|
||||
Type m_value;
|
||||
|
||||
// Constructor is invoked only from op_and_view::operator&().
|
||||
//
|
||||
rhs_proxy(const op_and_view* view, const Type& value) : m_view(view), m_value(value) {}
|
||||
|
||||
rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
|
||||
rhs_proxy(); // Disable default constructor
|
||||
|
||||
public:
|
||||
/** Bitwise and with an additional rhs value. If `v` is an op_and_view
|
||||
* and `a1` is a value, then the expression `v & a1` invokes the
|
||||
* view’s `operator&()` to create an rhs_proxy for `(v, a1)`; then
|
||||
* `v & a1 & a2` invokes the rhs_proxy’s `operator&()` to create a new
|
||||
* rhs_proxy for `(v, a1&a2)`. This allows the right-hand side of an
|
||||
* assignment to be not just `view & value`, but
|
||||
* `view & value & value ... & value`. The effect is that
|
||||
*
|
||||
* v = v & a1 & a2 ... & an;
|
||||
*
|
||||
* is evaluated as
|
||||
*
|
||||
* v = v & (a1 & a2 ... & an);
|
||||
*/
|
||||
rhs_proxy& operator&(const Type& x) { m_value &= x; return *this; }
|
||||
};
|
||||
|
||||
|
||||
/** Default/identity constructor. This constructor initializes the
|
||||
* contained value to `~ Type()`.
|
||||
*/
|
||||
op_and_view() : base(~Type()) {}
|
||||
|
||||
/** Construct with a specified initial value.
|
||||
*/
|
||||
explicit op_and_view(const Type& v) : base(v) {}
|
||||
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_and monoid to combine the views
|
||||
* of two strands when the right strand merges with the left one. It
|
||||
* “ands” the value contained in the left-strand view with the value
|
||||
* contained in the right-strand view, and leaves the value in the
|
||||
* right-strand view undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_and monoid to implement the monoid
|
||||
* reduce operation.
|
||||
*/
|
||||
void reduce(op_and_view* right) { this->m_value &= right->m_value; }
|
||||
|
||||
/** @name Accumulator variable updates.
|
||||
*
|
||||
* These functions support the various syntaxes for “anding” the
|
||||
* accumulator variable contained in the view with some value.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** And the accumulator variable with @a x.
|
||||
*/
|
||||
op_and_view& operator&=(const Type& x) { this->m_value &= x; return *this; }
|
||||
|
||||
/** Create an object representing `*this & x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator&(const Type& x) const { return rhs_proxy(this, x); }
|
||||
|
||||
/** Assign the result of a `view & value` expression to the view. Note that
|
||||
* this is the only assignment operator for this class.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
op_and_view& operator=(const rhs_proxy& rhs) {
|
||||
__CILKRTS_ASSERT(this == rhs.m_view);
|
||||
this->m_value &= rhs.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
/** Monoid class for bitwise and reductions. Instantiate the cilk::reducer
|
||||
* template class with an op_and monoid to create a bitwise and reducer
|
||||
* class. For example, to compute the bitwise and of a set of `unsigned long`
|
||||
* values:
|
||||
*
|
||||
* cilk::reducer< cilk::op_and<unsigned long> > r;
|
||||
*
|
||||
* @tparam Type The reducer value type.
|
||||
* @tparam Align If `false` (the default), reducers instantiated on this
|
||||
* monoid will be naturally aligned (the Cilk library 1.0
|
||||
* behavior). If `true`, reducers instantiated on this monoid
|
||||
* will be cache-aligned for binary compatibility with
|
||||
* reducers in Cilk library version 0.9.
|
||||
*
|
||||
* @see ReducersAnd
|
||||
* @see op_and_view
|
||||
*
|
||||
* @ingroup ReducersAnd
|
||||
*/
|
||||
template <typename Type, bool Align = false>
|
||||
struct op_and : public monoid_with_view<op_and_view<Type>, Align> {};
|
||||
|
||||
/** Deprecated bitwise and reducer class.
|
||||
*
|
||||
* reducer_opand is the same as @ref reducer<@ref op_and>, except that
|
||||
* reducer_opand is a proxy for the contained view, so that accumulator
|
||||
* variable update operations can be applied directly to the reducer. For
|
||||
* example, a value is anded with a `reducer<%op_and>` with `*r &= a`, but a
|
||||
* value can be anded with a `%reducer_opand` with `r &= a`.
|
||||
*
|
||||
* @deprecated Users are strongly encouraged to use `reducer<monoid>`
|
||||
* reducers rather than the old wrappers like reducer_opand.
|
||||
* The `reducer<monoid>` reducers show the reducer/monoid/view
|
||||
* architecture more clearly, are more consistent in their
|
||||
* implementation, and present a simpler model for new
|
||||
* user-implemented reducers.
|
||||
*
|
||||
* @note Implicit conversions are provided between `%reducer_opand`
|
||||
* and `reducer<%op_and>`. This allows incremental code
|
||||
* conversion: old code that used `%reducer_opand` can pass a
|
||||
* `%reducer_opand` to a converted function that now expects a
|
||||
* pointer or reference to a `reducer<%op_and>`, and vice
|
||||
* versa.
|
||||
*
|
||||
* @tparam Type The value type of the reducer.
|
||||
*
|
||||
* @see op_and
|
||||
* @see reducer
|
||||
* @see ReducersAnd
|
||||
*
|
||||
* @ingroup ReducersAnd
|
||||
*/
|
||||
template <typename Type>
|
||||
class reducer_opand : public reducer< op_and<Type, true> >
|
||||
{
|
||||
typedef reducer< op_and<Type, true> > base;
|
||||
using base::view;
|
||||
|
||||
public:
|
||||
/// The view type for the reducer.
|
||||
typedef typename base::view_type view_type;
|
||||
|
||||
/// The view’s rhs proxy type.
|
||||
typedef typename view_type::rhs_proxy rhs_proxy;
|
||||
|
||||
/// The view type for the reducer.
|
||||
typedef view_type View;
|
||||
|
||||
/// The monoid type for the reducer.
|
||||
typedef typename base::monoid_type Monoid;
|
||||
|
||||
/** @name Constructors
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Default constructor.
|
||||
*
|
||||
* Constructs the wrapper with the default initial value of `Type()`
|
||||
* (not the identity value).
|
||||
*/
|
||||
reducer_opand() : base(Type()) {}
|
||||
|
||||
/** Value constructor.
|
||||
*
|
||||
* Constructs the wrapper with a specified initial value.
|
||||
*/
|
||||
explicit reducer_opand(const Type& initial_value) : base(initial_value) {}
|
||||
|
||||
//@}
|
||||
|
||||
/** @name Forwarded functions
|
||||
* @details Functions that update the contained accumulator variable are
|
||||
* simply forwarded to the contained @ref op_and_view. */
|
||||
//@{
|
||||
|
||||
/// @copydoc op_and_view::operator&=(const Type&)
|
||||
reducer_opand& operator&=(const Type& x)
|
||||
{
|
||||
view() &= x;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// The legacy definition of reducer_opand::operator&() has different
|
||||
// behavior and a different return type than this definition. The legacy
|
||||
// version is defined as a member function, so this new version is defined
|
||||
// as a free function to give it a different signature, so that they won’t
|
||||
// end up sharing a single object file entry.
|
||||
|
||||
/// @copydoc op_and_view::operator&(const Type&) const
|
||||
friend rhs_proxy operator&(const reducer_opand& r, const Type& x)
|
||||
{
|
||||
return r.view() & x;
|
||||
}
|
||||
|
||||
/// @copydoc op_and_view::operator=(const rhs_proxy&)
|
||||
reducer_opand& operator=(const rhs_proxy& temp)
|
||||
{
|
||||
view() = temp;
|
||||
return *this;
|
||||
}
|
||||
//@}
|
||||
|
||||
/** @name Dereference
|
||||
* @details Dereferencing a wrapper is a no-op. It simply returns the
|
||||
* wrapper. Combined with the rule that the wrapper forwards view
|
||||
* operations to its contained view, this means that view operations can
|
||||
* be written the same way on reducers and wrappers, which is convenient
|
||||
* for incrementally converting old code using wrappers to use reducers
|
||||
* instead. That is:
|
||||
*
|
||||
* reducer< op_and<int> > r;
|
||||
* *r &= a; // *r returns the view
|
||||
* // operator &= is a view member function
|
||||
*
|
||||
* reducer_opand<int> w;
|
||||
* *w &= a; // *w returns the wrapper
|
||||
* // operator &= is a wrapper member function that
|
||||
* // calls the corresponding view function
|
||||
*/
|
||||
//@{
|
||||
reducer_opand& operator*() { return *this; }
|
||||
reducer_opand const& operator*() const { return *this; }
|
||||
|
||||
reducer_opand* operator->() { return this; }
|
||||
reducer_opand const* operator->() const { return this; }
|
||||
//@}
|
||||
|
||||
/** @name Upcast
|
||||
* @details In Cilk library 0.9, reducers were always cache-aligned. In
|
||||
* library 1.0, reducer cache alignment is optional. By default, reducers
|
||||
* are unaligned (i.e., just naturally aligned), but legacy wrappers
|
||||
* inherit from cache-aligned reducers for binary compatibility.
|
||||
*
|
||||
* This means that a wrapper will automatically be upcast to its aligned
|
||||
* reducer base class. The following conversion operators provide
|
||||
* pseudo-upcasts to the corresponding unaligned reducer class.
|
||||
*/
|
||||
//@{
|
||||
operator reducer< op_and<Type, false> >& ()
|
||||
{
|
||||
return *reinterpret_cast< reducer< op_and<Type, false> >* >(this);
|
||||
}
|
||||
operator const reducer< op_and<Type, false> >& () const
|
||||
{
|
||||
return *reinterpret_cast< const reducer< op_and<Type, false> >* >(this);
|
||||
}
|
||||
//@}
|
||||
};
|
||||
|
||||
/// @cond internal
|
||||
/** Metafunction specialization for reducer conversion.
|
||||
*
|
||||
* This specialization of the @ref legacy_reducer_downcast template class
|
||||
* defined in reducer.h causes the `reducer< op_and<Type> >` class to have an
|
||||
* `operator reducer_opand<Type>& ()` conversion operator that statically
|
||||
* downcasts the `reducer<op_and>` to the corresponding `reducer_opand` type.
|
||||
* (The reverse conversion, from `reducer_opand` to `reducer<op_and>`, is just
|
||||
* an upcast, which is provided for free by the language.)
|
||||
*
|
||||
* @ingroup ReducersAnd
|
||||
*/
|
||||
template <typename Type, bool Align>
|
||||
struct legacy_reducer_downcast<reducer<op_and<Type, Align> > >
|
||||
{
|
||||
typedef reducer_opand<Type> type;
|
||||
};
|
||||
/// @endcond
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
|
||||
/** @ingroup ReducersAdd
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name C language reducer macros
|
||||
*
|
||||
* These macros are used to declare and work with op_and reducers in C code.
|
||||
*
|
||||
* @see @ref page_reducers_in_c
|
||||
*/
|
||||
//@{
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Opand reducer type name.
|
||||
*
|
||||
* This macro expands into the identifier which is the name of the op_and
|
||||
* reducer type for a specified numeric type.
|
||||
*
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersAnd
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPAND_TYPE(tn) \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn)
|
||||
|
||||
/** Declare an op_and reducer object.
|
||||
*
|
||||
* This macro expands into a declaration of an op_and reducer object for a
|
||||
* specified numeric type. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPAND(my_reducer, ulong, ~0UL);
|
||||
*
|
||||
* @param obj The variable name to be used for the declared reducer object.
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
* @param v The initial value for the reducer. (A value which can be
|
||||
* assigned to the numeric type represented by @a tn.)
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersAnd
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPAND(obj,tn,v) \
|
||||
CILK_C_REDUCER_OPAND_TYPE(tn) obj = \
|
||||
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opand_reduce_,tn), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opand_identity_,tn), \
|
||||
__cilkrts_hyperobject_noop_destroy, v)
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** Declare the op_and reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into external function declarations for functions which
|
||||
* implement the reducer functionality for the op_and reducer type for a
|
||||
* specified numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPAND_DECLARATION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r); \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn);
|
||||
|
||||
/** Define the op_and reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into function definitions for functions which implement
|
||||
* the reducer functionality for the op_and reducer type for a specified
|
||||
* numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPAND_DEFINITION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r) \
|
||||
{ *(t*)l &= *(t*)r; } \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn) \
|
||||
{ *(t*)v = ~((t)0); }
|
||||
|
||||
//@{
|
||||
/** @def CILK_C_REDUCER_OPAND_INSTANCE
|
||||
* @brief Declare or define implementation functions for a reducer type.
|
||||
*
|
||||
* In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
|
||||
* will be defined, and this macro will generate reducer implementation
|
||||
* functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
|
||||
* this macro will expand into external declarations for the functions.
|
||||
*/
|
||||
#ifdef CILK_C_DEFINE_REDUCERS
|
||||
# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPAND_DEFINITION(t,tn)
|
||||
#else
|
||||
# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPAND_DECLARATION(t,tn)
|
||||
#endif
|
||||
//@}
|
||||
|
||||
/* Declare or define an instance of the reducer type and its functions for
|
||||
* each numeric type.
|
||||
*/
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(char, char)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned char, uchar)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(signed char, schar)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(wchar_t, wchar_t)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(short, short)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned short, ushort)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(int, int)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, uint)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, unsigned) /* alternate name */
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(long, long)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned long, ulong)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(long long, longlong)
|
||||
CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long, ulonglong)
|
||||
|
||||
//@endcond
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* REDUCER_OPAND_H_INCLUDED */
|
442
libcilkrts/include/cilk/reducer_opmul.h
Normal file
442
libcilkrts/include/cilk/reducer_opmul.h
Normal file
|
@ -0,0 +1,442 @@
|
|||
/* reducer_opmul.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_opmul.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel multiplication reductions.
|
||||
*
|
||||
* @ingroup ReducersMul
|
||||
*
|
||||
* @see ReducersMul
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OPMUL_H_INCLUDED
|
||||
#define REDUCER_OPMUL_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
|
||||
/** @defgroup ReducersMul Multiplication Reducers
|
||||
*
|
||||
* Multiplication reducers allow the computation of the product of a set of
|
||||
* values in parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file `reducers.md`, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redopmul_usage Usage Example
|
||||
*
|
||||
* cilk::reducer< cilk::op_mul<double> > r;
|
||||
* cilk_for (int i = 0; i != N; ++i) {
|
||||
* *r *= a[i];
|
||||
* }
|
||||
* double product;
|
||||
* r.move_out(product);
|
||||
*
|
||||
* @section redopmul_monoid The Monoid
|
||||
*
|
||||
* @subsection redopmul_monoid_values Value Set
|
||||
*
|
||||
* The value set of a multiplication reducer is the set of values of `Type`,
|
||||
* which is expected to be a builtin numeric type (or something like it, such
|
||||
* as `std::complex`).
|
||||
*
|
||||
* @subsection redopmul_monoid_operator Operator
|
||||
*
|
||||
* The operator of a multiplication reducer is the multiplication operation,
|
||||
* defined by the “`*`” binary operator on `Type`.
|
||||
*
|
||||
* @subsection redopmul_monoid_identity Identity
|
||||
*
|
||||
* The identity value of the reducer is the numeric value “`1`”. This is
|
||||
* expected to be the value of the expression `Type(1)`.
|
||||
*
|
||||
* @section redopmul_operations Operations
|
||||
*
|
||||
* @subsection redopmul_constructors Constructors
|
||||
*
|
||||
* reducer() // identity
|
||||
* reducer(const Type& value)
|
||||
* reducer(move_in(Type& variable))
|
||||
*
|
||||
* @subsection redopmul_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const Type& value)
|
||||
* const Type& = r.get_value() const
|
||||
* r.move_in(Type& variable)
|
||||
* r.move_out(Type& variable)
|
||||
*
|
||||
* @subsection redopmul_initial Initial Values
|
||||
*
|
||||
* If a multiplication reducer is constructed without an explicit initial
|
||||
* value, then its initial value will be its identity value, as long as `Type`
|
||||
* satisfies the requirements of @ref redopmul_types.
|
||||
*
|
||||
* @subsection redopmul_view_ops View Operations
|
||||
*
|
||||
* *r *= a
|
||||
* *r = *r * a
|
||||
* *r = *r * a1 * a2 … * an
|
||||
*
|
||||
* @section redopmul_floating_point Issues with Floating-Point Types
|
||||
*
|
||||
* Because of overflow and underflow issues, floating-point multiplication is
|
||||
* not really associative. For example, `(1e200 * 1e-200) * 1e-200 == 1e-200`,
|
||||
* but `1e200 * (1e-200 * 1e-200 == 0.
|
||||
*
|
||||
* In many cases, this won’t matter, but computations which have been
|
||||
* carefully ordered to control overflow and underflow may not deal well with
|
||||
* being reassociated. In general, you should be sure to understand the
|
||||
* floating-point behavior of your program before doing any transformation
|
||||
* that will reassociate its computations.
|
||||
*
|
||||
* @section redopmul_types Type and Operator Requirements
|
||||
*
|
||||
* `Type` must be `Copy Constructible`, `Default Constructible`, and
|
||||
* `Assignable`.
|
||||
*
|
||||
* The operator “`*=`” must be defined on `Type`, with `x *= a` having the same
|
||||
* meaning as `x = x * a`.
|
||||
*
|
||||
* The expression `Type(1)` must be a valid expression which yields the
|
||||
* identity value (the value of `Type` whose numeric value is `1`).
|
||||
*
|
||||
* @section redopmul_in_c Multiplication Reducers in C
|
||||
*
|
||||
* The @ref CILK_C_REDUCER_OPMUL and @ref CILK_C_REDUCER_OPMUL_TYPE macros can
|
||||
* be used to do multiplication reductions in C. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPMUL(r, double, 1);
|
||||
* CILK_C_REGISTER_REDUCER(r);
|
||||
* cilk_for(int i = 0; i != n; ++i) {
|
||||
* REDUCER_VIEW(r) *= a[i];
|
||||
* }
|
||||
* CILK_C_UNREGISTER_REDUCER(r);
|
||||
* printf("The product of the elements of a is %f\n", REDUCER_VIEW(r));
|
||||
*
|
||||
* See @ref reducers_c_predefined.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** The multiplication reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_mul<Type> >`. It holds the accumulator variable
|
||||
* for the reduction, and allows only multiplication operations to be
|
||||
* performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `*=` operation would be used in an expression like `*r *= a`, where
|
||||
* `r` is an op_mul reducer variable.
|
||||
*
|
||||
* @tparam Type The type of the contained accumulator variable. This will
|
||||
* be the value type of a monoid_with_view that is
|
||||
* instantiated with this view.
|
||||
*
|
||||
* @see ReducersMul
|
||||
* @see op_mul
|
||||
*
|
||||
* @ingroup ReducersMul
|
||||
*/
|
||||
template <typename Type>
|
||||
class op_mul_view : public scalar_view<Type>
|
||||
{
|
||||
typedef scalar_view<Type> base;
|
||||
|
||||
public:
|
||||
/** Class to represent the right-hand side of `*reducer = *reducer * value`.
|
||||
*
|
||||
* The only assignment operator for the op_mul_view class takes an
|
||||
* rhs_proxy as its operand. This results in the syntactic restriction
|
||||
* that the only expressions that can be assigned to an op_mul_view are
|
||||
* ones which generate an rhs_proxy — that is, expressions of the form
|
||||
* `op_mul_view * value ... * value`.
|
||||
*
|
||||
* @warning
|
||||
* The lhs and rhs views in such an assignment must be the same;
|
||||
* otherwise, the behavior will be undefined. (I.e., `v1 = v1 * x` is
|
||||
* legal; `v1 = v2 * x` is illegal.) This condition will be checked with a
|
||||
* runtime assertion when compiled in debug mode.
|
||||
*
|
||||
* @see op_mul_view
|
||||
*/
|
||||
class rhs_proxy {
|
||||
friend class op_mul_view;
|
||||
|
||||
const op_mul_view* m_view;
|
||||
Type m_value;
|
||||
|
||||
// Constructor is invoked only from op_mul_view::operator*().
|
||||
//
|
||||
rhs_proxy(const op_mul_view* view, const Type& value) : m_view(view), m_value(value) {}
|
||||
|
||||
rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
|
||||
rhs_proxy(); // Disable default constructor
|
||||
|
||||
public:
|
||||
/** Multiply by an additional rhs value. If `v` is an op_mul_view and
|
||||
* `a1` is a value, then the expression `v * a1` invokes the view’s
|
||||
* `operator*()` to create an rhs_proxy for `(v, a1)`; then
|
||||
* `v * a1 * a2` invokes the rhs_proxy’s `operator*()` to create a
|
||||
* new rhs_proxy for `(v, a1*a2)`. This allows the right-hand side of
|
||||
* an assignment to be not just `view * value`, but
|
||||
* `view * value * value ... * value`. The effect is that
|
||||
*
|
||||
* v = v * a1 * a2 ... * an;
|
||||
*
|
||||
* is evaluated as
|
||||
*
|
||||
* v = v * (a1 * a2 ... * an);
|
||||
*/
|
||||
rhs_proxy& operator*(const Type& x) { m_value *= x; return *this; }
|
||||
};
|
||||
|
||||
|
||||
/** Default/identity constructor. This constructor initializes the
|
||||
* contained value to `Type(1)`, which is expected to be the identity
|
||||
* value for multiplication on `Type`.
|
||||
*/
|
||||
op_mul_view() : base(Type(1)) {}
|
||||
|
||||
/** Construct with a specified initial value.
|
||||
*/
|
||||
explicit op_mul_view(const Type& v) : base(v) {}
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_mul monoid to combine the views
|
||||
* of two strands when the right strand merges with the left one. It
|
||||
* multiplies the value contained in the left-strand view by the value
|
||||
* contained in the right-strand view, and leaves the value in the
|
||||
* right-strand view undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_mul monoid to implement the monoid
|
||||
* reduce operation.
|
||||
*/
|
||||
void reduce(op_mul_view* right) { this->m_value *= right->m_value; }
|
||||
|
||||
/** @name Accumulator variable updates.
|
||||
*
|
||||
* These functions support the various syntaxes for multiplying the
|
||||
* accumulator variable contained in the view by some value.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Multiply the accumulator variable by @a x.
|
||||
*/
|
||||
op_mul_view& operator*=(const Type& x) { this->m_value *= x; return *this; }
|
||||
|
||||
/** Create an object representing `*this * x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator*(const Type& x) const { return rhs_proxy(this, x); }
|
||||
|
||||
/** Assign the result of a `view * value` expression to the view. Note that
|
||||
* this is the only assignment operator for this class.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
op_mul_view& operator=(const rhs_proxy& rhs) {
|
||||
__CILKRTS_ASSERT(this == rhs.m_view);
|
||||
this->m_value *= rhs.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
/** Monoid class for multiplication reductions. Instantiate the cilk::reducer
|
||||
* template class with an op_mul monoid to create a multiplication reducer
|
||||
* class. For example, to compute the product of a set of `double` values:
|
||||
*
|
||||
* cilk::reducer< cilk::op_mul<double> > r;
|
||||
*
|
||||
* @see ReducersMul
|
||||
* @see op_mul_view
|
||||
*
|
||||
* @ingroup ReducersMul
|
||||
*/
|
||||
template <typename Type>
|
||||
struct op_mul : public monoid_with_view< op_mul_view<Type> > {};
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
|
||||
/** @ingroup ReducersAdd
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name C language reducer macros
|
||||
*
|
||||
* These macros are used to declare and work with numeric op_mul reducers in
|
||||
* C code.
|
||||
*
|
||||
* @see @ref page_reducers_in_c
|
||||
*/
|
||||
//@{
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Opmul reducer type name.
|
||||
*
|
||||
* This macro expands into the identifier which is the name of the op_mul
|
||||
* reducer type for a specified numeric type.
|
||||
*
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersMul
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPMUL_TYPE(tn) \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opmul_,tn)
|
||||
|
||||
/** Declare an op_mul reducer object.
|
||||
*
|
||||
* This macro expands into a declaration of an op_mul reducer object for a
|
||||
* specified numeric type. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPMUL(my_reducer, double, 1.0);
|
||||
*
|
||||
* @param obj The variable name to be used for the declared reducer object.
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
* @param v The initial value for the reducer. (A value which can be
|
||||
* assigned to the numeric type represented by @a tn.)
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersMul
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPMUL(obj,tn,v) \
|
||||
CILK_C_REDUCER_OPMUL_TYPE(tn) obj = \
|
||||
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opmul_reduce_,tn), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opmul_identity_,tn), \
|
||||
__cilkrts_hyperobject_noop_destroy, v)
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** Declare the op_mul reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into external function declarations for functions which
|
||||
* implement the reducer functionality for the op_mul reducer type for a
|
||||
* specified numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r); \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn);
|
||||
|
||||
/** Define the op_mul reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into function definitions for functions which implement
|
||||
* the reducer functionality for the op_mul reducer type for a specified
|
||||
* numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r) \
|
||||
{ *(t*)l *= *(t*)r; } \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn) \
|
||||
{ *(t*)v = 1; }
|
||||
|
||||
//@{
|
||||
/** @def CILK_C_REDUCER_OPMUL_INSTANCE
|
||||
* @brief Declare or define implementation functions for a reducer type.
|
||||
*
|
||||
* In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
|
||||
* will be defined, and this macro will generate reducer implementation
|
||||
* functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
|
||||
* this macro will expand into external declarations for the functions.
|
||||
*/
|
||||
#ifdef CILK_C_DEFINE_REDUCERS
|
||||
# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPMUL_DEFINITION(t,tn)
|
||||
#else
|
||||
# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPMUL_DECLARATION(t,tn)
|
||||
#endif
|
||||
//@}
|
||||
|
||||
/* Declare or define an instance of the reducer type and its functions for each
|
||||
* numeric type.
|
||||
*/
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(char, char)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned char, uchar)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(signed char, schar)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(wchar_t, wchar_t)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(short, short)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned short, ushort)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(int, int)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, uint)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, unsigned) /* alternate name */
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(long, long)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long, ulong)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(long long, longlong)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long long, ulonglong)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(float, float)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(double, double)
|
||||
CILK_C_REDUCER_OPMUL_INSTANCE(long double, longdouble)
|
||||
|
||||
//@endcond
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* REDUCER_OPMUL_H_INCLUDED */
|
598
libcilkrts/include/cilk/reducer_opor.h
Normal file
598
libcilkrts/include/cilk/reducer_opor.h
Normal file
|
@ -0,0 +1,598 @@
|
|||
/* reducer_opor.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_opor.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel bitwise or reductions.
|
||||
*
|
||||
* @ingroup ReducersOr
|
||||
*
|
||||
* @see ReducersOr
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OPOR_H_INCLUDED
|
||||
#define REDUCER_OPOR_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
|
||||
/** @defgroup ReducersOr Bitwise Or Reducers
|
||||
*
|
||||
* Bitwise and reducers allow the computation of the bitwise and of a set of
|
||||
* values in parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file `reducers.md`, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redopor_usage Usage Example
|
||||
*
|
||||
* cilk::reducer< cilk::op_or<unsigned> > r;
|
||||
* cilk_for (int i = 0; i != N; ++i) {
|
||||
* *r |= a[i];
|
||||
* }
|
||||
* unsigned result;
|
||||
* r.move_out(result);
|
||||
*
|
||||
* @section redopor_monoid The Monoid
|
||||
*
|
||||
* @subsection redopor_monoid_values Value Set
|
||||
*
|
||||
* The value set of a bitwise or reducer is the set of values of `Type`, which
|
||||
* is expected to be a builtin integer type which has a representation as a
|
||||
* sequence of bits (or something like it, such as `bool` or `std::bitset`).
|
||||
*
|
||||
* @subsection redopor_monoid_operator Operator
|
||||
*
|
||||
* The operator of a bitwise or reducer is the bitwise or operator, defined by
|
||||
* the “`|`” binary operator on `Type`.
|
||||
*
|
||||
* @subsection redopor_monoid_identity Identity
|
||||
*
|
||||
* The identity value of the reducer is the value whose representation
|
||||
* contains all 0-bits. This is expected to be the value of the default
|
||||
* constructor `Type()`.
|
||||
*
|
||||
* @section redopor_operations Operations
|
||||
*
|
||||
* @subsection redopor_constructors Constructors
|
||||
*
|
||||
* reducer() // identity
|
||||
* reducer(const Type& value)
|
||||
* reducer(move_in(Type& variable))
|
||||
*
|
||||
* @subsection redopor_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const Type& value)
|
||||
* const Type& = r.get_value() const
|
||||
* r.move_in(Type& variable)
|
||||
* r.move_out(Type& variable)
|
||||
*
|
||||
* @subsection redopor_initial Initial Values
|
||||
*
|
||||
* If a bitwise or reducer is constructed without an explicit initial value,
|
||||
* then its initial value will be its identity value, as long as `Type`
|
||||
* satisfies the requirements of @ref redopor_types.
|
||||
*
|
||||
* @subsection redopor_view_ops View Operations
|
||||
*
|
||||
* *r |= a
|
||||
* *r = *r | a
|
||||
* *r = *r | a1 | a2 … | an
|
||||
*
|
||||
* @section redopor_types Type and Operator Requirements
|
||||
*
|
||||
* `Type` must be `Copy Constructible`, `Default Constructible`, and
|
||||
* `Assignable`.
|
||||
*
|
||||
* The operator “`|=`” must be defined on `Type`, with `x |= a` having the
|
||||
* same meaning as `x = x | a`.
|
||||
*
|
||||
* The expression `Type()` must be a valid expression which yields the
|
||||
* identity value (the value of `Type` whose representation consists of all
|
||||
* 0-bits).
|
||||
*
|
||||
* @section redopor_in_c Bitwise Or Reducers in C
|
||||
*
|
||||
* The @ref CILK_C_REDUCER_OPOR and @ref CILK_C_REDUCER_OPOR_TYPE macros can
|
||||
* be used to do bitwise or reductions in C. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPOR(r, uint, 0);
|
||||
* CILK_C_REGISTER_REDUCER(r);
|
||||
* cilk_for(int i = 0; i != n; ++i) {
|
||||
* REDUCER_VIEW(r) |= a[i];
|
||||
* }
|
||||
* CILK_C_UNREGISTER_REDUCER(r);
|
||||
* printf("The bitwise OR of the elements of a is %x\n", REDUCER_VIEW(r));
|
||||
*
|
||||
* See @ref reducers_c_predefined.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** The bitwise or reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_or<Type> >`. It holds the accumulator variable for
|
||||
* the reduction, and allows only `or` operations to be performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `|=` operation would be used in an expression like `*r |= a`, where
|
||||
* `r` is an opmod reducer variable.
|
||||
*
|
||||
* @tparam Type The type of the contained accumulator variable. This will
|
||||
* be the value type of a monoid_with_view that is
|
||||
* instantiated with this view.
|
||||
*
|
||||
* @see ReducersOr
|
||||
* @see op_or
|
||||
*
|
||||
* @ingroup ReducersOr
|
||||
*/
|
||||
template <typename Type>
|
||||
class op_or_view : public scalar_view<Type>
|
||||
{
|
||||
typedef scalar_view<Type> base;
|
||||
|
||||
public:
|
||||
/** Class to represent the right-hand side of `*reducer = *reducer | value`.
|
||||
*
|
||||
* The only assignment operator for the op_or_view class takes an
|
||||
* rhs_proxy as its operand. This results in the syntactic restriction
|
||||
* that the only expressions that can be assigned to an op_or_view are
|
||||
* ones which generate an rhs_proxy — that is, expressions of the form
|
||||
* `op_or_view | value ... | value`.
|
||||
*
|
||||
* @warning
|
||||
* The lhs and rhs views in such an assignment must be the same;
|
||||
* otherwise, the behavior will be undefined. (I.e., `v1 = v1 | x` is
|
||||
* legal; `v1 = v2 | x` is illegal.) This condition will be checked with
|
||||
* a runtime assertion when compiled in debug mode.
|
||||
*
|
||||
* @see op_or_view
|
||||
*/
|
||||
class rhs_proxy {
|
||||
friend class op_or_view;
|
||||
|
||||
const op_or_view* m_view;
|
||||
Type m_value;
|
||||
|
||||
// Constructor is invoked only from op_or_view::operator|().
|
||||
//
|
||||
rhs_proxy(const op_or_view* view, const Type& value) : m_view(view), m_value(value) {}
|
||||
|
||||
rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
|
||||
rhs_proxy(); // Disable default constructor
|
||||
|
||||
public:
|
||||
/** Bitwise or with an additional rhs value. If `v` is an op_or_view
|
||||
* and `a1` is a value, then the expression `v | a1` invokes the
|
||||
* view’s `operator|()` to create an rhs_proxy for `(v, a1)`; then
|
||||
* `v | a1 | a2` invokes the rhs_proxy’s `operator|()` to create a new
|
||||
* rhs_proxy for `(v, a1|a2)`. This allows the right-hand side of an
|
||||
* assignment to be not just `view | value`, but
|
||||
( `view | value | value ... | value`. The effect is that
|
||||
*
|
||||
* v = v | a1 | a2 ... | an;
|
||||
*
|
||||
* is evaluated as
|
||||
*
|
||||
* v = v | (a1 | a2 ... | an);
|
||||
*/
|
||||
rhs_proxy& operator|(const Type& x) { m_value |= x; return *this; }
|
||||
};
|
||||
|
||||
|
||||
/** Default/identity constructor. This constructor initializes the
|
||||
* contained value to `Type()`.
|
||||
*/
|
||||
op_or_view() : base() {}
|
||||
|
||||
/** Construct with a specified initial value.
|
||||
*/
|
||||
explicit op_or_view(const Type& v) : base(v) {}
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_or monoid to combine the views
|
||||
* of two strands when the right strand merges with the left one. It
|
||||
* “ors” the value contained in the left-strand view by the value
|
||||
* contained in the right-strand view, and leaves the value in the
|
||||
* right-strand view undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_or monoid to implement the monoid
|
||||
* reduce operation.
|
||||
*/
|
||||
void reduce(op_or_view* right) { this->m_value |= right->m_value; }
|
||||
|
||||
/** @name Accumulator variable updates.
|
||||
*
|
||||
* These functions support the various syntaxes for “oring” the
|
||||
* accumulator variable contained in the view with some value.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Or the accumulator variable with @a x.
|
||||
*/
|
||||
op_or_view& operator|=(const Type& x) { this->m_value |= x; return *this; }
|
||||
|
||||
/** Create an object representing `*this | x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator|(const Type& x) const { return rhs_proxy(this, x); }
|
||||
|
||||
/** Assign the result of a `view | value` expression to the view. Note that
|
||||
* this is the only assignment operator for this class.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
op_or_view& operator=(const rhs_proxy& rhs) {
|
||||
__CILKRTS_ASSERT(this == rhs.m_view);
|
||||
this->m_value |= rhs.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
/** Monoid class for bitwise or reductions. Instantiate the cilk::reducer
|
||||
* template class with an op_or monoid to create a bitwise or reducer
|
||||
* class. For example, to compute the bitwise or of a set of `unsigned long`
|
||||
* values:
|
||||
*
|
||||
* cilk::reducer< cilk::op_or<unsigned long> > r;
|
||||
*
|
||||
* @tparam Type The reducer value type.
|
||||
* @tparam Align If `false` (the default), reducers instantiated on this
|
||||
* monoid will be naturally aligned (the Cilk library 1.0
|
||||
* behavior). If `true`, reducers instantiated on this monoid
|
||||
* will be cache-aligned for binary compatibility with
|
||||
* reducers in Cilk library version 0.9.
|
||||
*
|
||||
* @see ReducersOr
|
||||
* @see op_or_view
|
||||
*
|
||||
* @ingroup ReducersOr
|
||||
*/
|
||||
template <typename Type, bool Align = false>
|
||||
struct op_or : public monoid_with_view<op_or_view<Type>, Align> {};
|
||||
|
||||
/** Deprecated bitwise or reducer class.
|
||||
*
|
||||
* reducer_opor is the same as @ref reducer<@ref op_or>, except that
|
||||
* reducer_opor is a proxy for the contained view, so that accumulator
|
||||
* variable update operations can be applied directly to the reducer. For
|
||||
* example, a value is ored with a `reducer<%op_or>` with `*r |= a`, but a
|
||||
* value can be ored with a `%reducer_opor` with `r |= a`.
|
||||
*
|
||||
* @deprecated Users are strongly encouraged to use `reducer<monoid>`
|
||||
* reducers rather than the old wrappers like reducer_opor.
|
||||
* The `reducer<monoid>` reducers show the reducer/monoid/view
|
||||
* architecture more clearly, are more consistent in their
|
||||
* implementation, and present a simpler model for new
|
||||
* user-implemented reducers.
|
||||
*
|
||||
* @note Implicit conversions are provided between `%reducer_opor`
|
||||
* and `reducer<%op_or>`. This allows incremental code
|
||||
* conversion: old code that used `%reducer_opor` can pass a
|
||||
* `%reducer_opor` to a converted function that now expects a
|
||||
* pointer or reference to a `reducer<%op_or>`, and vice
|
||||
* versa.
|
||||
*
|
||||
* @tparam Type The value type of the reducer.
|
||||
*
|
||||
* @see op_or
|
||||
* @see reducer
|
||||
* @see ReducersOr
|
||||
*
|
||||
* @ingroup ReducersOr
|
||||
*/
|
||||
template <typename Type>
|
||||
class reducer_opor : public reducer< op_or<Type, true> >
|
||||
{
|
||||
typedef reducer< op_or<Type, true> > base;
|
||||
using base::view;
|
||||
|
||||
public:
|
||||
/// The view type for the reducer.
|
||||
typedef typename base::view_type view_type;
|
||||
|
||||
/// The view’s rhs proxy type.
|
||||
typedef typename view_type::rhs_proxy rhs_proxy;
|
||||
|
||||
/// The view type for the reducer.
|
||||
typedef view_type View;
|
||||
|
||||
/// The monoid type for the reducer.
|
||||
typedef typename base::monoid_type Monoid;
|
||||
|
||||
/** @name Constructors
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Default (identity) constructor.
|
||||
*
|
||||
* Constructs the wrapper with the default initial value of `Type()`.
|
||||
*/
|
||||
reducer_opor() {}
|
||||
|
||||
/** Value constructor.
|
||||
*
|
||||
* Constructs the wrapper with a specified initial value.
|
||||
*/
|
||||
explicit reducer_opor(const Type& initial_value) : base(initial_value) {}
|
||||
|
||||
//@}
|
||||
|
||||
/** @name Forwarded functions
|
||||
* @details Functions that update the contained accumulator variable are
|
||||
* simply forwarded to the contained @ref op_and_view. */
|
||||
//@{
|
||||
|
||||
/// @copydoc op_or_view::operator|=(const Type&)
|
||||
reducer_opor& operator|=(const Type& x)
|
||||
{
|
||||
view() |= x; return *this;
|
||||
}
|
||||
|
||||
// The legacy definition of reducer_opor::operator|() has different
|
||||
// behavior and a different return type than this definition. The legacy
|
||||
// version is defined as a member function, so this new version is defined
|
||||
// as a free function to give it a different signature, so that they won’t
|
||||
// end up sharing a single object file entry.
|
||||
|
||||
/// @copydoc op_or_view::operator|(const Type&) const
|
||||
friend rhs_proxy operator|(const reducer_opor& r, const Type& x)
|
||||
{
|
||||
return r.view() | x;
|
||||
}
|
||||
|
||||
/// @copydoc op_and_view::operator=(const rhs_proxy&)
|
||||
reducer_opor& operator=(const rhs_proxy& temp)
|
||||
{
|
||||
view() = temp; return *this;
|
||||
}
|
||||
//@}
|
||||
|
||||
/** @name Dereference
|
||||
* @details Dereferencing a wrapper is a no-op. It simply returns the
|
||||
* wrapper. Combined with the rule that the wrapper forwards view
|
||||
* operations to its contained view, this means that view operations can
|
||||
* be written the same way on reducers and wrappers, which is convenient
|
||||
* for incrementally converting old code using wrappers to use reducers
|
||||
* instead. That is:
|
||||
*
|
||||
* reducer< op_and<int> > r;
|
||||
* *r &= a; // *r returns the view
|
||||
* // operator &= is a view member function
|
||||
*
|
||||
* reducer_opand<int> w;
|
||||
* *w &= a; // *w returns the wrapper
|
||||
* // operator &= is a wrapper member function that
|
||||
* // calls the corresponding view function
|
||||
*/
|
||||
//@{
|
||||
reducer_opor& operator*() { return *this; }
|
||||
reducer_opor const& operator*() const { return *this; }
|
||||
|
||||
reducer_opor* operator->() { return this; }
|
||||
reducer_opor const* operator->() const { return this; }
|
||||
//@}
|
||||
|
||||
/** @name Upcast
|
||||
* @details In Cilk library 0.9, reducers were always cache-aligned. In
|
||||
* library 1.0, reducer cache alignment is optional. By default, reducers
|
||||
* are unaligned (i.e., just naturally aligned), but legacy wrappers
|
||||
* inherit from cache-aligned reducers for binary compatibility.
|
||||
*
|
||||
* This means that a wrapper will automatically be upcast to its aligned
|
||||
* reducer base class. The following conversion operators provide
|
||||
* pseudo-upcasts to the corresponding unaligned reducer class.
|
||||
*/
|
||||
//@{
|
||||
operator reducer< op_or<Type, false> >& ()
|
||||
{
|
||||
return *reinterpret_cast< reducer< op_or<Type, false> >* >(this);
|
||||
}
|
||||
operator const reducer< op_or<Type, false> >& () const
|
||||
{
|
||||
return *reinterpret_cast< const reducer< op_or<Type, false> >* >(this);
|
||||
}
|
||||
//@}
|
||||
|
||||
};
|
||||
|
||||
/// @cond internal
|
||||
/** Metafunction specialization for reducer conversion.
|
||||
*
|
||||
* This specialization of the @ref legacy_reducer_downcast template class
|
||||
* defined in reducer.h causes the `reducer< op_or<Type> >` class to have an
|
||||
* `operator reducer_opor<Type>& ()` conversion operator that statically
|
||||
* downcasts the `reducer<op_or>` to the corresponding `reducer_opor` type.
|
||||
* (The reverse conversion, from `reducer_opor` to `reducer<op_or>`, is just
|
||||
* an upcast, which is provided for free by the language.)
|
||||
*
|
||||
* @ingroup ReducersOr
|
||||
*/
|
||||
template <typename Type, bool Align>
|
||||
struct legacy_reducer_downcast<reducer<op_or<Type, Align> > >
|
||||
{
|
||||
typedef reducer_opor<Type> type;
|
||||
};
|
||||
/// @endcond
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
/** @ingroup ReducersOr
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name C language reducer macros
|
||||
*
|
||||
* These macros are used to declare and work with op_or reducers in C code.
|
||||
*
|
||||
* @see @ref page_reducers_in_c
|
||||
*/
|
||||
//@{
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Opor reducer type name.
|
||||
*
|
||||
* This macro expands into the identifier which is the name of the op_or
|
||||
* reducer type for a specified numeric type.
|
||||
*
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersOr
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPOR_TYPE(tn) \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn)
|
||||
|
||||
/** Declare an op_or reducer object.
|
||||
*
|
||||
* This macro expands into a declaration of an op_or reducer object for a
|
||||
* specified numeric type. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPOR(my_reducer, ulong, 0);
|
||||
*
|
||||
* @param obj The variable name to be used for the declared reducer object.
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
* @param v The initial value for the reducer. (A value which can be
|
||||
* assigned to the numeric type represented by @a tn.)
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersOr
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPOR(obj,tn,v) \
|
||||
CILK_C_REDUCER_OPOR_TYPE(tn) obj = \
|
||||
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opor_reduce_,tn), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opor_identity_,tn), \
|
||||
__cilkrts_hyperobject_noop_destroy, v)
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** Declare the op_or reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into external function declarations for functions which
|
||||
* implement the reducer functionality for the op_or reducer type for a
|
||||
* specified numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPOR_DECLARATION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r); \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn);
|
||||
|
||||
/** Define the op_or reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into function definitions for functions which implement
|
||||
* the reducer functionality for the op_or reducer type for a specified
|
||||
* numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPOR_DEFINITION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r) \
|
||||
{ *(t*)l |= *(t*)r; } \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn) \
|
||||
{ *(t*)v = 0; }
|
||||
|
||||
//@{
|
||||
/** @def CILK_C_REDUCER_OPOR_INSTANCE
|
||||
* @brief Declare or define implementation functions for a reducer type.
|
||||
*
|
||||
* In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
|
||||
* will be defined, and this macro will generate reducer implementation
|
||||
* functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
|
||||
* this macro will expand into external declarations for the functions.
|
||||
*/
|
||||
#ifdef CILK_C_DEFINE_REDUCERS
|
||||
# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPOR_DEFINITION(t,tn)
|
||||
#else
|
||||
# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPOR_DECLARATION(t,tn)
|
||||
#endif
|
||||
//@}
|
||||
|
||||
/* Declare or define an instance of the reducer type and its functions for each
|
||||
* numeric type.
|
||||
*/
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(char, char)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned char, uchar)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(signed char, schar)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(wchar_t, wchar_t)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(short, short)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned short, ushort)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(int, int)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, uint)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, unsigned) /* alternate name */
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(long, long)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned long, ulong)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(long long, longlong)
|
||||
CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long, ulonglong)
|
||||
|
||||
//@endcond
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* REDUCER_OPOR_H_INCLUDED */
|
598
libcilkrts/include/cilk/reducer_opxor.h
Normal file
598
libcilkrts/include/cilk/reducer_opxor.h
Normal file
|
@ -0,0 +1,598 @@
|
|||
/* reducer_opxor.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_opxor.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel bitwise or reductions.
|
||||
*
|
||||
* @ingroup ReducersXor
|
||||
*
|
||||
* @see ReducersXor
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OPXOR_H_INCLUDED
|
||||
#define REDUCER_OPXOR_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
|
||||
/** @defgroup ReducersXor Bitwise Xor Reducers
|
||||
*
|
||||
* Bitwise and reducers allow the computation of the bitwise and of a set of
|
||||
* values in parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file `reducers.md`, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redopxor_usage Usage Example
|
||||
*
|
||||
* cilk::reducer< cilk::op_xor<unsigned> > r;
|
||||
* cilk_for (int i = 0; i != N; ++i) {
|
||||
* *r ^= a[i];
|
||||
* }
|
||||
* unsigned result;
|
||||
* r.move_out(result);
|
||||
*
|
||||
* @section redopxor_monoid The Monoid
|
||||
*
|
||||
* @subsection redopxor_monoid_values Value Set
|
||||
*
|
||||
* The value set of a bitwise xor reducer is the set of values of `Type`, which
|
||||
* is expected to be a builtin integer type which has a representation as a
|
||||
* sequence of bits (or something like it, such as `bool` or `std::bitset`).
|
||||
*
|
||||
* @subsection redopxor_monoid_operator Operator
|
||||
*
|
||||
* The operator of a bitwise xor reducer is the bitwise xor operator, defined
|
||||
* by the “`^`” binary operator on `Type`.
|
||||
*
|
||||
* @subsection redopxor_monoid_identity Identity
|
||||
*
|
||||
* The identity value of the reducer is the value whose representation
|
||||
* contains all 0-bits. This is expected to be the value of the default
|
||||
* constructor `Type()`.
|
||||
*
|
||||
* @section redopxor_operations Operations
|
||||
*
|
||||
* @subsection redopxor_constructors Constructors
|
||||
*
|
||||
* reducer() // identity
|
||||
* reducer(const Type& value)
|
||||
* reducer(move_in(Type& variable))
|
||||
*
|
||||
* @subsection redopxor_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const Type& value)
|
||||
* const Type& = r.get_value() const
|
||||
* r.move_in(Type& variable)
|
||||
* r.move_out(Type& variable)
|
||||
*
|
||||
* @subsection redopxor_initial Initial Values
|
||||
*
|
||||
* If a bitwise xor reducer is constructed without an explicit initial value,
|
||||
* then its initial value will be its identity value, as long as `Type`
|
||||
* satisfies the requirements of @ref redopxor_types.
|
||||
*
|
||||
* @subsection redopxor_view_ops View Operations
|
||||
*
|
||||
* *r ^= a
|
||||
* *r = *r ^ a
|
||||
* *r = *r ^ a1 ^ a2 … ^ an
|
||||
*
|
||||
* @section redopxor_types Type and Operator Requirements
|
||||
*
|
||||
* `Type` must be `Copy Constructible`, `Default Constructible`, and
|
||||
* `Assignable`.
|
||||
*
|
||||
* The operator “`^=`” must be defined on `Type`, with `x ^= a` having the
|
||||
* same meaning as `x = x ^ a`.
|
||||
*
|
||||
* The expression `Type()` must be a valid expression which yields the
|
||||
* identity value (the value of `Type` whose representation consists of all
|
||||
* 0-bits).
|
||||
*
|
||||
* @section redopxor_in_c Bitwise Xor Reducers in C
|
||||
*
|
||||
* The @ref CILK_C_REDUCER_OPXOR and @ref CILK_C_REDUCER_OPXOR_TYPE macros can
|
||||
* be used to do bitwise xor reductions in C. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPXOR(r, uint, 0);
|
||||
* CILK_C_REGISTER_REDUCER(r);
|
||||
* cilk_for(int i = 0; i != n; ++i) {
|
||||
* REDUCER_VIEW(r) ^= a[i];
|
||||
* }
|
||||
* CILK_C_UNREGISTER_REDUCER(r);
|
||||
* printf("The bitwise XOR of the elements of a is %x\n", REDUCER_VIEW(r));
|
||||
*
|
||||
* See @ref reducers_c_predefined.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** The bitwise xor reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_xor<Type> >`. It holds the accumulator variable
|
||||
* for the reduction, and allows only `xor` operations to be performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `^=` operation would be used in an expression like `*r ^= a`, where
|
||||
* `r` is an opmod reducer variable.
|
||||
*
|
||||
* @tparam Type The type of the contained accumulator variable. This will
|
||||
* be the value type of a monoid_with_view that is
|
||||
* instantiated with this view.
|
||||
*
|
||||
* @see ReducersXor
|
||||
* @see op_xor
|
||||
*
|
||||
* @ingroup ReducersXor
|
||||
*/
|
||||
template <typename Type>
|
||||
class op_xor_view : public scalar_view<Type>
|
||||
{
|
||||
typedef scalar_view<Type> base;
|
||||
|
||||
public:
|
||||
/** Class to represent the right-hand side of `*reducer = *reducer ^ value`.
|
||||
*
|
||||
* The only assignment operator for the op_xor_view class takes an
|
||||
* rhs_proxy as its operand. This results in the syntactic restriction
|
||||
* that the only expressions that can be assigned to an op_xor_view are
|
||||
* ones which generate an rhs_proxy — that is, expressions of the form
|
||||
* `op_xor_view ^ value ... ^ value`.
|
||||
*
|
||||
* @warning
|
||||
* The lhs and rhs views in such an assignment must be the same;
|
||||
* otherwise, the behavior will be undefined. (I.e., `v1 = v1 ^ x` is
|
||||
* legal; `v1 = v2 ^ x` is illegal.) This condition will be checked with
|
||||
* a runtime assertion when compiled in debug mode.
|
||||
*
|
||||
* @see op_xor_view
|
||||
*/
|
||||
class rhs_proxy {
|
||||
friend class op_xor_view;
|
||||
|
||||
const op_xor_view* m_view;
|
||||
Type m_value;
|
||||
|
||||
// Constructor is invoked only from op_xor_view::operator^().
|
||||
//
|
||||
rhs_proxy(const op_xor_view* view, const Type& value) : m_view(view), m_value(value) {}
|
||||
|
||||
rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
|
||||
rhs_proxy(); // Disable default constructor
|
||||
|
||||
public:
|
||||
/** Bitwise xor with an additional rhs value. If `v` is an op_xor_view
|
||||
* and `a1` is a value, then the expression `v ^ a1` invokes the
|
||||
* view’s `operator^()` to create an rhs_proxy for `(v, a1)`; then
|
||||
* `v ^ a1 ^ a2` invokes the rhs_proxy’s `operator^()` to create a new
|
||||
* rhs_proxy for `(v, a1^a2)`. This allows the right-hand side of an
|
||||
* assignment to be not just `view ^ value`, but
|
||||
( `view ^ value ^ value ... ^ value`. The effect is that
|
||||
*
|
||||
* v = v ^ a1 ^ a2 ... ^ an;
|
||||
*
|
||||
* is evaluated as
|
||||
*
|
||||
* v = v ^ (a1 ^ a2 ... ^ an);
|
||||
*/
|
||||
rhs_proxy& operator^(const Type& x) { m_value ^= x; return *this; }
|
||||
};
|
||||
|
||||
|
||||
/** Default/identity constructor. This constructor initializes the
|
||||
* contained value to `Type()`.
|
||||
*/
|
||||
op_xor_view() : base() {}
|
||||
|
||||
/** Construct with a specified initial value.
|
||||
*/
|
||||
explicit op_xor_view(const Type& v) : base(v) {}
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_xor monoid to combine the views
|
||||
* of two strands when the right strand merges with the left one. It
|
||||
* “xors” the value contained in the left-strand view by the value
|
||||
* contained in the right-strand view, and leaves the value in the
|
||||
* right-strand view undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_xor monoid to implement the monoid
|
||||
* reduce operation.
|
||||
*/
|
||||
void reduce(op_xor_view* right) { this->m_value ^= right->m_value; }
|
||||
|
||||
/** @name Accumulator variable updates.
|
||||
*
|
||||
* These functions support the various syntaxes for “xoring” the
|
||||
* accumulator variable contained in the view with some value.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Xor the accumulator variable with @a x.
|
||||
*/
|
||||
op_xor_view& operator^=(const Type& x) { this->m_value ^= x; return *this; }
|
||||
|
||||
/** Create an object representing `*this ^ x`.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
rhs_proxy operator^(const Type& x) const { return rhs_proxy(this, x); }
|
||||
|
||||
/** Assign the result of a `view ^ value` expression to the view. Note that
|
||||
* this is the only assignment operator for this class.
|
||||
*
|
||||
* @see rhs_proxy
|
||||
*/
|
||||
op_xor_view& operator=(const rhs_proxy& rhs) {
|
||||
__CILKRTS_ASSERT(this == rhs.m_view);
|
||||
this->m_value ^= rhs.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
/** Monoid class for bitwise xor reductions. Instantiate the cilk::reducer
|
||||
* template class with an op_xor monoid to create a bitwise xor reducer
|
||||
* class. For example, to compute the bitwise xor of a set of `unsigned long`
|
||||
* values:
|
||||
*
|
||||
* cilk::reducer< cilk::op_xor<unsigned long> > r;
|
||||
*
|
||||
* @tparam Type The reducer value type.
|
||||
* @tparam Align If `false` (the default), reducers instantiated on this
|
||||
* monoid will be naturally aligned (the Cilk library 1.0
|
||||
* behavior). If `true`, reducers instantiated on this monoid
|
||||
* will be cache-aligned for binary compatibility with
|
||||
* reducers in Cilk library version 0.9.
|
||||
*
|
||||
* @see ReducersXor
|
||||
* @see op_xor_view
|
||||
*
|
||||
* @ingroup ReducersXor
|
||||
*/
|
||||
template <typename Type, bool Align = false>
|
||||
struct op_xor : public monoid_with_view<op_xor_view<Type>, Align> {};
|
||||
|
||||
/** Deprecated bitwise xor reducer class.
|
||||
*
|
||||
* reducer_opxor is the same as @ref reducer<@ref op_xor>, except that
|
||||
* reducer_opxor is a proxy for the contained view, so that accumulator
|
||||
* variable update operations can be applied directly to the reducer. For
|
||||
* example, a value is xored with a `reducer<%op_xor>` with `*r ^= a`, but a
|
||||
* value can be xored with a `%reducer_opxor` with `r ^= a`.
|
||||
*
|
||||
* @deprecated Users are strongly encouraged to use `reducer<monoid>`
|
||||
* reducers rather than the old wrappers like reducer_opand.
|
||||
* The `reducer<monoid>` reducers show the reducer/monoid/view
|
||||
* architecture more clearly, are more consistent in their
|
||||
* implementation, and present a simpler model for new
|
||||
* user-implemented reducers.
|
||||
*
|
||||
* @note Implicit conversions are provided between `%reducer_opxor`
|
||||
* and `reducer<%op_xor>`. This allows incremental code
|
||||
* conversion: old code that used `%reducer_opxor` can pass a
|
||||
* `%reducer_opxor` to a converted function that now expects a
|
||||
* pointer or reference to a `reducer<%op_xor>`, and vice
|
||||
* versa.
|
||||
*
|
||||
* @tparam Type The value type of the reducer.
|
||||
*
|
||||
* @see op_xor
|
||||
* @see reducer
|
||||
* @see ReducersXor
|
||||
*
|
||||
* @ingroup ReducersXor
|
||||
*/
|
||||
template <typename Type>
|
||||
class reducer_opxor : public reducer< op_xor<Type, true> >
|
||||
{
|
||||
typedef reducer< op_xor<Type, true> > base;
|
||||
using base::view;
|
||||
|
||||
public:
|
||||
/// The view type for the reducer.
|
||||
typedef typename base::view_type view_type;
|
||||
|
||||
/// The view’s rhs proxy type.
|
||||
typedef typename view_type::rhs_proxy rhs_proxy;
|
||||
|
||||
/// The view type for the reducer.
|
||||
typedef view_type View;
|
||||
|
||||
/// The monoid type for the reducer.
|
||||
typedef typename base::monoid_type Monoid;
|
||||
|
||||
/** @name Constructors
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** Default (identity) constructor.
|
||||
*
|
||||
* Constructs the wrapper with the default initial value of `Type()`.
|
||||
*/
|
||||
reducer_opxor() {}
|
||||
|
||||
/** Value constructor.
|
||||
*
|
||||
* Constructs the wrapper with a specified initial value.
|
||||
*/
|
||||
explicit reducer_opxor(const Type& initial_value) : base(initial_value) {}
|
||||
|
||||
//@}
|
||||
|
||||
/** @name Forwarded functions
|
||||
* @details Functions that update the contained accumulator variable are
|
||||
* simply forwarded to the contained @ref op_and_view. */
|
||||
//@{
|
||||
|
||||
/// @copydoc op_xor_view::operator^=(const Type&)
|
||||
reducer_opxor& operator^=(const Type& x)
|
||||
{
|
||||
view() ^= x; return *this;
|
||||
}
|
||||
|
||||
// The legacy definition of reducer_opxor::operator^() has different
|
||||
// behavior and a different return type than this definition. The legacy
|
||||
// version is defined as a member function, so this new version is defined
|
||||
// as a free function to give it a different signature, so that they won’t
|
||||
// end up sharing a single object file entry.
|
||||
|
||||
/// @copydoc op_xor_view::operator^(const Type&) const
|
||||
friend rhs_proxy operator^(const reducer_opxor& r, const Type& x)
|
||||
{
|
||||
return r.view() ^ x;
|
||||
}
|
||||
|
||||
/// @copydoc op_and_view::operator=(const rhs_proxy&)
|
||||
reducer_opxor& operator=(const rhs_proxy& temp)
|
||||
{
|
||||
view() = temp; return *this;
|
||||
}
|
||||
//@}
|
||||
|
||||
/** @name Dereference
|
||||
* @details Dereferencing a wrapper is a no-op. It simply returns the
|
||||
* wrapper. Combined with the rule that the wrapper forwards view
|
||||
* operations to its contained view, this means that view operations can
|
||||
* be written the same way on reducers and wrappers, which is convenient
|
||||
* for incrementally converting old code using wrappers to use reducers
|
||||
* instead. That is:
|
||||
*
|
||||
* reducer< op_and<int> > r;
|
||||
* *r &= a; // *r returns the view
|
||||
* // operator &= is a view member function
|
||||
*
|
||||
* reducer_opand<int> w;
|
||||
* *w &= a; // *w returns the wrapper
|
||||
* // operator &= is a wrapper member function that
|
||||
* // calls the corresponding view function
|
||||
*/
|
||||
//@{
|
||||
reducer_opxor& operator*() { return *this; }
|
||||
reducer_opxor const& operator*() const { return *this; }
|
||||
|
||||
reducer_opxor* operator->() { return this; }
|
||||
reducer_opxor const* operator->() const { return this; }
|
||||
//@}
|
||||
|
||||
/** @name Upcast
|
||||
* @details In Cilk library 0.9, reducers were always cache-aligned. In
|
||||
* library 1.0, reducer cache alignment is optional. By default, reducers
|
||||
* are unaligned (i.e., just naturally aligned), but legacy wrappers
|
||||
* inherit from cache-aligned reducers for binary compatibility.
|
||||
*
|
||||
* This means that a wrapper will automatically be upcast to its aligned
|
||||
* reducer base class. The following conversion operators provide
|
||||
* pseudo-upcasts to the corresponding unaligned reducer class.
|
||||
*/
|
||||
//@{
|
||||
operator reducer< op_xor<Type, false> >& ()
|
||||
{
|
||||
return *reinterpret_cast< reducer< op_xor<Type, false> >* >(this);
|
||||
}
|
||||
operator const reducer< op_xor<Type, false> >& () const
|
||||
{
|
||||
return *reinterpret_cast< const reducer< op_xor<Type, false> >* >(this);
|
||||
}
|
||||
//@}
|
||||
|
||||
};
|
||||
|
||||
/// @cond internal
|
||||
/** Metafunction specialization for reducer conversion.
|
||||
*
|
||||
* This specialization of the @ref legacy_reducer_downcast template class
|
||||
* defined in reducer.h causes the `reducer< op_xor<Type> >` class to have an
|
||||
* `operator reducer_opxor<Type>& ()` conversion operator that statically
|
||||
* downcasts the `reducer<op_xor>` to the corresponding `reducer_opxor` type.
|
||||
* (The reverse conversion, from `reducer_opxor` to `reducer<op_xor>`, is just
|
||||
* an upcast, which is provided for free by the language.)
|
||||
*
|
||||
* @ingroup ReducersXor
|
||||
*/
|
||||
template <typename Type, bool Align>
|
||||
struct legacy_reducer_downcast<reducer<op_xor<Type, Align> > >
|
||||
{
|
||||
typedef reducer_opxor<Type> type;
|
||||
};
|
||||
/// @endcond
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
/** @ingroup ReducersXor
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name C language reducer macros
|
||||
*
|
||||
* These macros are used to declare and work with op_xor reducers in C code.
|
||||
*
|
||||
* @see @ref page_reducers_in_c
|
||||
*/
|
||||
//@{
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Opxor reducer type name.
|
||||
*
|
||||
* This macro expands into the identifier which is the name of the op_xor
|
||||
* reducer type for a specified numeric type.
|
||||
*
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersXor
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPXOR_TYPE(tn) \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn)
|
||||
|
||||
/** Declare an op_xor reducer object.
|
||||
*
|
||||
* This macro expands into a declaration of an op_xor reducer object for a
|
||||
* specified numeric type. For example:
|
||||
*
|
||||
* CILK_C_REDUCER_OPXOR(my_reducer, ulong, 0);
|
||||
*
|
||||
* @param obj The variable name to be used for the declared reducer object.
|
||||
* @param tn The @ref reducers_c_type_names "numeric type name" specifying
|
||||
* the type of the reducer.
|
||||
* @param v The initial value for the reducer. (A value which can be
|
||||
* assigned to the numeric type represented by @a tn.)
|
||||
*
|
||||
* @see @ref reducers_c_predefined
|
||||
* @see ReducersXor
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPXOR(obj,tn,v) \
|
||||
CILK_C_REDUCER_OPXOR_TYPE(tn) obj = \
|
||||
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opxor_reduce_,tn), \
|
||||
__CILKRTS_MKIDENT(cilk_c_reducer_opxor_identity_,tn), \
|
||||
__cilkrts_hyperobject_noop_destroy, v)
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/** Declare the op_xor reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into external function declarations for functions which
|
||||
* implement the reducer functionality for the op_xor reducer type for a
|
||||
* specified numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r); \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn);
|
||||
|
||||
/** Define the op_xor reducer functions for a numeric type.
|
||||
*
|
||||
* This macro expands into function definitions for functions which implement
|
||||
* the reducer functionality for the op_xor reducer type for a specified
|
||||
* numeric type.
|
||||
*
|
||||
* @param t The value type of the reducer.
|
||||
* @param tn The value “type name” identifier, used to construct the reducer
|
||||
* type name, function names, etc.
|
||||
*/
|
||||
#define CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) \
|
||||
typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \
|
||||
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r) \
|
||||
{ *(t*)l ^= *(t*)r; } \
|
||||
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn) \
|
||||
{ *(t*)v = 0; }
|
||||
|
||||
//@{
|
||||
/** @def CILK_C_REDUCER_OPXOR_INSTANCE
|
||||
* @brief Declare or define implementation functions for a reducer type.
|
||||
*
|
||||
* In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
|
||||
* will be defined, and this macro will generate reducer implementation
|
||||
* functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
|
||||
* this macro will expand into external declarations for the functions.
|
||||
*/
|
||||
#ifdef CILK_C_DEFINE_REDUCERS
|
||||
# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPXOR_DEFINITION(t,tn)
|
||||
#else
|
||||
# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \
|
||||
CILK_C_REDUCER_OPXOR_DECLARATION(t,tn)
|
||||
#endif
|
||||
//@}
|
||||
|
||||
/* Declare or define an instance of the reducer type and its functions for each
|
||||
* numeric type.
|
||||
*/
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(char, char)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char, uchar)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(signed char, schar)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t, wchar_t)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(short, short)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short, ushort)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(int, int)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, uint)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, unsigned) /* alternate name */
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(long, long)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long, ulong)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(long long, longlong)
|
||||
CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long, ulonglong)
|
||||
|
||||
//@endcond
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
//@}
|
||||
|
||||
//@}
|
||||
|
||||
#endif /* REDUCER_OPXOR_H_INCLUDED */
|
293
libcilkrts/include/cilk/reducer_ostream.h
Normal file
293
libcilkrts/include/cilk/reducer_ostream.h
Normal file
|
@ -0,0 +1,293 @@
|
|||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* reducer_ostream.h
|
||||
*
|
||||
* Purpose: Hyper-object to write to 'std::ostream's
|
||||
*
|
||||
* Classes: reducer_ostream
|
||||
*
|
||||
* Description:
|
||||
* ============
|
||||
* Output streams ('std::ostream's) are a convenient means of writing text to
|
||||
* files, the user console, or sockets. In a serial program, text is written
|
||||
* to an ostream in a specific, logical order. For example, computing while
|
||||
* traversing a data structure and printing them to an 'ostream' will result
|
||||
* in the values being printed in the order of traversal. In a parallel
|
||||
* version of the same program, however, different parts of the data structure
|
||||
* may be traversed in a different order, resulting in a non-deterministic
|
||||
* ordering of the stream. Worse, multiple strands may write to the same
|
||||
* stream simultaneously, resulting in a data race. Replacing the
|
||||
* 'std::ostream' with a 'cilk::reducer_ostream' will solve both problems: Data
|
||||
* will appeaer in the stream in the same order as it would for the serial
|
||||
* program, and there will be no races (no locks) on the common stream.
|
||||
*
|
||||
* Usage Example:
|
||||
* ==============
|
||||
* Assume we wish to traverse an array of objects, performing an operation on
|
||||
* each object and writing the result to a file. Without a reducer_ostream,
|
||||
* we have a race on the 'output' file stream:
|
||||
*..
|
||||
* void compute(std::ostream& os, double x)
|
||||
* {
|
||||
* // Perform some significant computation and print the result:
|
||||
* os << std::asin(x);
|
||||
* }
|
||||
*
|
||||
* int test()
|
||||
* {
|
||||
* const std::size_t ARRAY_SIZE = 1000000;
|
||||
* extern double myArray[ARRAY_SIZE];
|
||||
*
|
||||
* std::ofstream output("output.txt");
|
||||
* cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
|
||||
* {
|
||||
* compute(output, myArray[i]);
|
||||
* }
|
||||
*
|
||||
* return 0;
|
||||
* }
|
||||
*..
|
||||
* The race is solved by using a reducer_ostream to proxy the 'output' file:
|
||||
*..
|
||||
* void compute(cilk::reducer_ostream& os, double x)
|
||||
* {
|
||||
* // Perform some significant computation and print the result:
|
||||
* *os << std::asin(x);
|
||||
* }
|
||||
*
|
||||
* int test()
|
||||
* {
|
||||
* const std::size_t ARRAY_SIZE = 1000000;
|
||||
* extern double myArray[ARRAY_SIZE];
|
||||
*
|
||||
* std::ofstream output("output.txt");
|
||||
* cilk::reducer_ostream hyper_output(output);
|
||||
* cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
|
||||
* {
|
||||
* compute(hyper_output, myArray[i]);
|
||||
* }
|
||||
*
|
||||
* return 0;
|
||||
* }
|
||||
*..
|
||||
*
|
||||
* Limitations:
|
||||
* ============
|
||||
* There are two possible values for the formatting flags immediately after a
|
||||
* 'cilk_spawn' statement: they may either have the value that was set by the
|
||||
* spawn function, or they may have default values. Because of
|
||||
* non-determinism in the processor scheduling, there is no way to determine
|
||||
* which it will be. Similarly, the formatting flags after a 'cilk_sync' may
|
||||
* or may not have the same value as before the sync. Therefore, one must use
|
||||
* a disciplined coding style to avoid formatting errors. There are two
|
||||
* approaches to mitigating the problem: The first is to eliminate the
|
||||
* difference between the two possible outcomes by ensuring that the spawned
|
||||
* function always returns the flags to their initial state:
|
||||
*..
|
||||
* void compute(cilk::reducer_ostream& os, double x)
|
||||
* {
|
||||
* // Perform some significant computation and print the result:
|
||||
* int saveprec = os.precision(5);
|
||||
* os << std::asin(x);
|
||||
* os.precision(saveprec);
|
||||
* }
|
||||
*..
|
||||
* The second approach is to write your streaming operations such that they
|
||||
* don't depend on the previous state of the formatting flags by setting any
|
||||
* important flags before every block of output:
|
||||
*..
|
||||
* cilk_spawn compute(hyper_output, value);
|
||||
*
|
||||
* hyper_output->precision(2); // Don't depend on previous precision
|
||||
* *hyper_output << f();
|
||||
* *hyper_output << g();
|
||||
*..
|
||||
* Another concern is memory usage. A reducer_ostream will buffer as much text
|
||||
* as necessary to ensure that the order of output matches that of the serial
|
||||
* version of the program. If all spawn branches perform an equal amount of
|
||||
* output, then one can expect that half of the output before a sync will be
|
||||
* buffered in memory. This hyperobject is therefore not well suited for
|
||||
* serializing very large quantities of text output.
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_OSTREAM_H_INCLUDED
|
||||
#define REDUCER_OSTREAM_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/**
|
||||
* @brief Class 'reducer_ostream' is the representation of a hyperobject for
|
||||
* output text streaming.
|
||||
*/
|
||||
class reducer_ostream
|
||||
{
|
||||
public:
|
||||
/// Internal representation of the per-strand view of the data for reducer_ostream
|
||||
class View: public std::ostream
|
||||
{
|
||||
public:
|
||||
/// Type of the std::stream reducer_ostream is based on
|
||||
typedef std::ostream Base;
|
||||
|
||||
friend class reducer_ostream;
|
||||
|
||||
View():
|
||||
std::ostream(0)
|
||||
{
|
||||
Base::rdbuf(&strbuf_);
|
||||
};
|
||||
|
||||
private:
|
||||
void use_ostream (const std::ostream &os)
|
||||
{
|
||||
Base::rdbuf(os.rdbuf());
|
||||
Base::flags(os.flags()); // Copy formatting flags
|
||||
Base::setstate(os.rdstate()); // Copy error state
|
||||
}
|
||||
|
||||
private:
|
||||
std::stringbuf strbuf_;
|
||||
};
|
||||
|
||||
public:
|
||||
/// Definition of data view, operation, and identity for reducer_ostream
|
||||
struct Monoid: monoid_base< View >
|
||||
{
|
||||
static void reduce (View *left, View *right);
|
||||
};
|
||||
|
||||
private:
|
||||
// Hyperobject to serve up views
|
||||
reducer<Monoid> imp_;
|
||||
|
||||
// Methods that provide the API for the reducer
|
||||
public:
|
||||
|
||||
// Construct an initial 'reducer_ostream' from an 'std::ostream'. The
|
||||
// specified 'os' stream is used as the eventual destination for all
|
||||
// text streamed to this hyperobject.
|
||||
explicit reducer_ostream(const std::ostream &os);
|
||||
|
||||
// Return a modifiable reference to the underlying 'ostream' object.
|
||||
std::ostream& get_reference();
|
||||
|
||||
/**
|
||||
* Append data from some type to the reducer_ostream
|
||||
*
|
||||
* @param v Value to be appended to the reducer_ostream
|
||||
*/
|
||||
template<typename T>
|
||||
std::ostream &
|
||||
operator<< (const T &v)
|
||||
{
|
||||
return imp_.view() << v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append data from a std::ostream to the reducer_ostream
|
||||
*
|
||||
* @param _Pfn std::ostream to copy from
|
||||
*/
|
||||
std::ostream &
|
||||
operator<< (std::ostream &(*_Pfn)(std::ostream &))
|
||||
{
|
||||
View &v = imp_.view();
|
||||
|
||||
return ((*_Pfn)(v));
|
||||
}
|
||||
|
||||
reducer_ostream& operator*() { return *this; }
|
||||
reducer_ostream const& operator*() const { return *this; }
|
||||
|
||||
reducer_ostream* operator->() { return this; }
|
||||
reducer_ostream const* operator->() const { return this; }
|
||||
};
|
||||
|
||||
|
||||
// -------------------------------------------
|
||||
// class reducer_ostream::Monoid
|
||||
// -------------------------------------------
|
||||
|
||||
/**
|
||||
* Appends string from "right" reducer_basic_string onto the end of
|
||||
* the "left". When done, the "right" reducer_basic_string is empty.
|
||||
*/
|
||||
void
|
||||
reducer_ostream::Monoid::reduce(View *left, View *right)
|
||||
{
|
||||
left->operator<< (&right->strbuf_);
|
||||
}
|
||||
|
||||
// --------------------------
|
||||
// class reducer_ostream
|
||||
// --------------------------
|
||||
|
||||
/**
|
||||
* Construct a reducer_ostream which will write to the specified std::ostream
|
||||
*
|
||||
* @param os std::ostream to write to
|
||||
*/
|
||||
inline
|
||||
reducer_ostream::reducer_ostream(const std::ostream &os) :
|
||||
imp_()
|
||||
{
|
||||
View &v = imp_.view();
|
||||
|
||||
v.use_ostream(os);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a reference to the std::ostream
|
||||
*/
|
||||
inline
|
||||
std::ostream &
|
||||
reducer_ostream::get_reference()
|
||||
{
|
||||
View &v = imp_.view();
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // REDUCER_OSTREAM_H_INCLUDED
|
||||
|
729
libcilkrts/include/cilk/reducer_string.h
Normal file
729
libcilkrts/include/cilk/reducer_string.h
Normal file
|
@ -0,0 +1,729 @@
|
|||
/* reducer_string.h -*- C++ -*-
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file reducer_string.h
|
||||
*
|
||||
* @brief Defines classes for doing parallel string creation by appending.
|
||||
*
|
||||
* @ingroup ReducersString
|
||||
*
|
||||
* @see ReducersString
|
||||
*/
|
||||
|
||||
#ifndef REDUCER_STRING_H_INCLUDED
|
||||
#define REDUCER_STRING_H_INCLUDED
|
||||
|
||||
#include <cilk/reducer.h>
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
/** @defgroup ReducersString String Reducers
|
||||
*
|
||||
* String reducers allow the creation of a string by concatenating a set of
|
||||
* strings or characters in parallel.
|
||||
*
|
||||
* @ingroup Reducers
|
||||
*
|
||||
* You should be familiar with @ref pagereducers "Cilk reducers", described in
|
||||
* file reducers.md, and particularly with @ref reducers_using, before trying
|
||||
* to use the information in this file.
|
||||
*
|
||||
* @section redstring_usage Usage Example
|
||||
*
|
||||
* vector<Data> data;
|
||||
* void expensive_string_computation(const Data& x, string& s);
|
||||
* cilk::reducer<cilk::op_string> r;
|
||||
* cilk_for (int i = 0; i != data.size(); ++i) {
|
||||
* string temp;
|
||||
* expensive_string_computation(data[i], temp);
|
||||
* *r += temp;
|
||||
* }
|
||||
* string result;
|
||||
* r.move_out(result);
|
||||
*
|
||||
* @section redstring_monoid The Monoid
|
||||
*
|
||||
* @subsection redstring_monoid_values Value Set
|
||||
*
|
||||
* The value set of a string reducer is the set of values of the class
|
||||
* `std::basic_string<Char, Traits, Alloc>`, which we refer to as “the
|
||||
* reducer’s string type”.
|
||||
*
|
||||
* @subsection redstring_monoid_operator Operator
|
||||
*
|
||||
* The operator of a string reducer is the string concatenation operator,
|
||||
* defined by the “`+`” binary operator on the reducer’s string type.
|
||||
*
|
||||
* @subsection redstring_monoid_identity Identity
|
||||
*
|
||||
* The identity value of a string reducer is the empty string, which is the
|
||||
* value of the expression
|
||||
* `std::basic_string<Char, Traits, Alloc>([allocator])`.
|
||||
*
|
||||
* @section redstring_operations Operations
|
||||
*
|
||||
* In the operation descriptions below, the type name `String` refers to the
|
||||
* reducer’s string type, `std::basic_string<Char, Traits, Alloc>`.
|
||||
*
|
||||
* @subsection redstring_constructors Constructors
|
||||
*
|
||||
* Any argument list which is valid for a `std::basic_string` constructor is
|
||||
* valid for a string reducer constructor. The usual move-in constructor is
|
||||
* also provided:
|
||||
*
|
||||
* reducer(move_in(String& variable))
|
||||
*
|
||||
* @subsection redstring_get_set Set and Get
|
||||
*
|
||||
* r.set_value(const String& value)
|
||||
* const String& = r.get_value() const
|
||||
* r.move_in(String& variable)
|
||||
* r.move_out(String& variable)
|
||||
*
|
||||
* @subsection redstring_initial Initial Values
|
||||
*
|
||||
* A string reducer with no constructor arguments, or with only an allocator
|
||||
* argument, will initially contain the identity value, an empty string.
|
||||
*
|
||||
* @subsection redstring_view_ops View Operations
|
||||
*
|
||||
* *r += a
|
||||
* r->append(a)
|
||||
* r->append(a, b)
|
||||
* r->push_back(a)
|
||||
*
|
||||
* These operations on string reducer views are the same as the corresponding
|
||||
* operations on strings.
|
||||
*
|
||||
* @section redstring_performance Performance Considerations
|
||||
*
|
||||
* String reducers work by creating a string for each view, collecting those
|
||||
* strings in a list, and then concatenating them into a single result string
|
||||
* at the end of the computation. This last step takes place in serial code,
|
||||
* and necessarily takes time proportional to the length of the result string.
|
||||
* Thus, a parallel string reducer cannot actually speed up the time spent
|
||||
* directly creating the string. This trivial example would probably be slower
|
||||
* (because of reducer overhead) than the corresponding serial code:
|
||||
*
|
||||
* vector<string> a;
|
||||
* reducer<op_string> r;
|
||||
* cilk_for (int i = 0; i != a.length(); ++i) {
|
||||
* *r += a[i];
|
||||
* }
|
||||
* string result;
|
||||
* r.move_out(result);
|
||||
*
|
||||
* What a string reducer _can_ do is to allow the _remainder_ of the
|
||||
* computation to be done in parallel, without having to worry about managing
|
||||
* the string computation.
|
||||
*
|
||||
* The strings for new views are created (by the view identity constructor)
|
||||
* using the same allocator as the string that was created when the reducer
|
||||
* was constructed. Note that this allocator is determined when the reducer is
|
||||
* constructed. The following two examples may have very different behavior:
|
||||
*
|
||||
* string<Char, Traits, Allocator> a_string;
|
||||
*
|
||||
* reducer< op_string<Char, Traits, Allocator> reducer1(move_in(a_string));
|
||||
* ... parallel computation ...
|
||||
* reducer1.move_out(a_string);
|
||||
*
|
||||
* reducer< op_string<Char, Traits, Allocator> reducer2;
|
||||
* reducer2.move_in(a_string);
|
||||
* ... parallel computation ...
|
||||
* reducer2.move_out(a_string);
|
||||
*
|
||||
* * `reducer1` will be constructed with the same allocator as `a_string`,
|
||||
* because the string was specified in the constructor. The `move_in`
|
||||
* and `move_out` can therefore be done with a `swap` in constant time.
|
||||
* * `reducer2` will be constructed with a _default_ allocator of type
|
||||
* `Allocator`, which may not be the same as the allocator of `a_string`.
|
||||
* Therefore, the `move_in` and `move_out` may have to be done with a copy
|
||||
* in _O(N)_ time.
|
||||
*
|
||||
* (All instances of an allocator type with no internal state (like
|
||||
* `std::allocator`) are “the same”. You only need to worry about the “same
|
||||
* allocator” issue when you create string reducers with custom allocator
|
||||
* types.)
|
||||
*
|
||||
* @section redstring_types Type and Operator Requirements
|
||||
*
|
||||
* `std::basic_string<Char, Traits, Alloc>` must be a valid type.
|
||||
*/
|
||||
|
||||
namespace cilk {
|
||||
|
||||
/** @ingroup ReducersString */
|
||||
//@{
|
||||
|
||||
/** The string append reducer view class.
|
||||
*
|
||||
* This is the view class for reducers created with
|
||||
* `cilk::reducer< cilk::op_basic_string<Type, Traits, Allocator> >`. It holds
|
||||
* the accumulator variable for the reduction, and allows only append
|
||||
* operations to be performed on it.
|
||||
*
|
||||
* @note The reducer “dereference” operation (`reducer::operator *()`)
|
||||
* yields a reference to the view. Thus, for example, the view class’s
|
||||
* `append` operation would be used in an expression like
|
||||
* `r->append(a)`, where `r` is a string append reducer variable.
|
||||
*
|
||||
* @tparam Char The string element type (not the string type).
|
||||
* @tparam Traits The character traits type.
|
||||
* @tparam Alloc The string allocator type.
|
||||
*
|
||||
* @see ReducersString
|
||||
* @see op_basic_string
|
||||
*/
|
||||
template<typename Char, typename Traits, typename Alloc>
|
||||
class op_basic_string_view
|
||||
{
|
||||
typedef std::basic_string<Char, Traits, Alloc> string_type;
|
||||
typedef std::list<string_type> list_type;
|
||||
typedef typename string_type::size_type size_type;
|
||||
|
||||
// The view's value is represented by a list of strings and a single
|
||||
// string. The value is the concatenation of the strings in the list with
|
||||
// the single string at the end. All string operations apply to the single
|
||||
// string; reduce operations cause lists of partial strings from multiple
|
||||
// strands to be combined.
|
||||
//
|
||||
mutable string_type m_string;
|
||||
mutable list_type m_list;
|
||||
|
||||
// Before returning the value of the reducer, concatenate all the strings
|
||||
// in the list with the single string.
|
||||
//
|
||||
void flatten() const
|
||||
{
|
||||
if (m_list.empty()) return;
|
||||
|
||||
typename list_type::iterator i;
|
||||
|
||||
size_type len = m_string.size();
|
||||
for (i = m_list.begin(); i != m_list.end(); ++i)
|
||||
len += i->size();
|
||||
|
||||
string_type result(get_allocator());
|
||||
result.reserve(len);
|
||||
|
||||
for (i = m_list.begin(); i != m_list.end(); ++i)
|
||||
result += *i;
|
||||
m_list.clear();
|
||||
|
||||
result += m_string;
|
||||
result.swap(m_string);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/** @name Monoid support.
|
||||
*/
|
||||
//@{
|
||||
|
||||
/// Required by @ref monoid_with_view
|
||||
typedef string_type value_type;
|
||||
|
||||
/// Required by @ref op_string
|
||||
Alloc get_allocator() const
|
||||
{
|
||||
return m_string.get_allocator();
|
||||
}
|
||||
|
||||
/** Reduction operation.
|
||||
*
|
||||
* This function is invoked by the @ref op_basic_string monoid to combine
|
||||
* the views of two strands when the right strand merges with the left
|
||||
* one. It appends the value contained in the right-strand view to the
|
||||
* value contained in the left-strand view, and leaves the value in the
|
||||
* right-strand view undefined.
|
||||
*
|
||||
* @param right A pointer to the right-strand view. (`this` points to
|
||||
* the left-strand view.)
|
||||
*
|
||||
* @note Used only by the @ref op_basic_string monoid to implement the
|
||||
* monoid reduce operation.
|
||||
*/
|
||||
void reduce(op_basic_string_view* right)
|
||||
{
|
||||
if (!right->m_string.empty() || !right->m_list.empty()) {
|
||||
// (list, string) + (right_list, right_string) =>
|
||||
// (list + {string} + right_list, right_string)
|
||||
if (!m_string.empty()) {
|
||||
// simulate m_list.push_back(std::move(m_string))
|
||||
m_list.push_back(string_type(get_allocator()));
|
||||
m_list.back().swap(m_string);
|
||||
}
|
||||
m_list.splice(m_list.end(), right->m_list);
|
||||
m_string.swap(right->m_string);
|
||||
}
|
||||
}
|
||||
|
||||
//@}
|
||||
|
||||
/** @name Pass constructor arguments through to the string constructor.
|
||||
*/
|
||||
//@{
|
||||
|
||||
op_basic_string_view() : m_string() {}
|
||||
|
||||
template <typename T1>
|
||||
op_basic_string_view(const T1& x1) : m_string(x1) {}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
op_basic_string_view(const T1& x1, const T2& x2) : m_string(x1, x2) {}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
op_basic_string_view(const T1& x1, const T2& x2, const T3& x3) : m_string(x1, x2, x3) {}
|
||||
|
||||
template <typename T1, typename T2, typename T3, typename T4>
|
||||
op_basic_string_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
|
||||
m_string(x1, x2, x3, x4) {}
|
||||
|
||||
//@}
|
||||
|
||||
/** Move-in constructor.
|
||||
*/
|
||||
explicit op_basic_string_view(move_in_wrapper<value_type> w)
|
||||
: m_string(w.value().get_allocator())
|
||||
{
|
||||
m_string.swap(w.value());
|
||||
}
|
||||
|
||||
/** @name @ref reducer support.
|
||||
*/
|
||||
//@{
|
||||
|
||||
void view_move_in(string_type& s)
|
||||
{
|
||||
m_list.clear();
|
||||
if (m_string.get_allocator() == s.get_allocator())
|
||||
// Equal allocators. Do a (fast) swap.
|
||||
m_string.swap(s);
|
||||
else
|
||||
// Unequal allocators. Do a (slow) copy.
|
||||
m_string = s;
|
||||
s.clear();
|
||||
}
|
||||
|
||||
void view_move_out(string_type& s)
|
||||
{
|
||||
flatten();
|
||||
if (m_string.get_allocator() == s.get_allocator())
|
||||
// Equal allocators. Do a (fast) swap.
|
||||
m_string.swap(s);
|
||||
else
|
||||
// Unequal allocators. Do a (slow) copy.
|
||||
s = m_string;
|
||||
m_string.clear();
|
||||
}
|
||||
|
||||
void view_set_value(const string_type& s)
|
||||
{ m_list.clear(); m_string = s; }
|
||||
|
||||
string_type const& view_get_value() const
|
||||
{ flatten(); return m_string; }
|
||||
|
||||
string_type & view_get_reference()
|
||||
{ flatten(); return m_string; }
|
||||
|
||||
string_type const& view_get_reference() const
|
||||
{ flatten(); return m_string; }
|
||||
|
||||
//@}
|
||||
|
||||
/** @name View modifier operations.
|
||||
*
|
||||
* @details These simply wrap the corresponding operations on the underlying string.
|
||||
*/
|
||||
//@{
|
||||
|
||||
template <typename T>
|
||||
op_basic_string_view& operator +=(const T& x)
|
||||
{ m_string += x; return *this; }
|
||||
|
||||
template <typename T1>
|
||||
op_basic_string_view& append(const T1& x1)
|
||||
{ m_string.append(x1); return *this; }
|
||||
|
||||
template <typename T1, typename T2>
|
||||
op_basic_string_view& append(const T1& x1, const T2& x2)
|
||||
{ m_string.append(x1, x2); return *this; }
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
op_basic_string_view& append(const T1& x1, const T2& x2, const T3& x3)
|
||||
{ m_string.append(x1, x2, x3); return *this; }
|
||||
|
||||
void push_back(const Char x) { m_string.push_back(x); }
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
|
||||
/** String append monoid class. Instantiate the cilk::reducer template class
|
||||
* with an op_basic_string monoid to create a string append reducer class. For
|
||||
* example, to concatenate a collection of standard strings:
|
||||
*
|
||||
* cilk::reducer< cilk::op_basic_string<char> > r;
|
||||
*
|
||||
* @tparam Char The string element type (not the string type).
|
||||
* @tparam Traits The character traits type.
|
||||
* @tparam Alloc The string allocator type.
|
||||
* @tparam Align If `false` (the default), reducers instantiated on this
|
||||
* monoid will be naturally aligned (the Cilk library 1.0
|
||||
* behavior). If `true`, reducers instantiated on this monoid
|
||||
* will be cache-aligned for binary compatibility with
|
||||
* reducers in Cilk library version 0.9.
|
||||
*
|
||||
* @see ReducersString
|
||||
* @see op_basic_string_view
|
||||
* @see reducer_basic_string
|
||||
* @see op_string
|
||||
* @see op_wstring
|
||||
*/
|
||||
template<typename Char,
|
||||
typename Traits = std::char_traits<Char>,
|
||||
typename Alloc = std::allocator<Char>,
|
||||
bool Align = false>
|
||||
class op_basic_string :
|
||||
public monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
|
||||
{
|
||||
typedef monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
|
||||
base;
|
||||
Alloc m_allocator;
|
||||
|
||||
public:
|
||||
|
||||
/** View type of the monoid.
|
||||
*/
|
||||
typedef typename base::view_type view_type;
|
||||
|
||||
/** Constructor.
|
||||
*
|
||||
* There is no default constructor for string monoids, because the
|
||||
* allocator must always be specified.
|
||||
*
|
||||
* @param allocator The list allocator to be used when
|
||||
* identity-constructing new views.
|
||||
*/
|
||||
op_basic_string(const Alloc& allocator = Alloc()) : m_allocator(allocator)
|
||||
{}
|
||||
|
||||
/** Create an identity view.
|
||||
*
|
||||
* String view identity constructors take the string allocator as an
|
||||
* argument.
|
||||
*
|
||||
* @param v The address of the uninitialized memory in which the view
|
||||
* will be constructed.
|
||||
*/
|
||||
void identity(view_type *v) const { ::new((void*) v) view_type(m_allocator); }
|
||||
|
||||
/** @name Construct functions
|
||||
*
|
||||
* A string append reduction monoid must have a copy of the allocator of
|
||||
* the leftmost view’s string, so that it can use it in the `identity`
|
||||
* operation. This, in turn, requires that string reduction monoids have a
|
||||
* specialized `construct()` function.
|
||||
*
|
||||
* All string reducer monoid `construct()` functions first construct the
|
||||
* leftmost view, using the arguments that were passed in from the reducer
|
||||
* constructor. They then call the view’s `get_allocator()` function to
|
||||
* get the string allocator from the string in the leftmost view, and pass
|
||||
* that to the monoid constructor.
|
||||
*/
|
||||
//@{
|
||||
|
||||
static void construct(op_basic_string* monoid, view_type* view)
|
||||
{ provisional( new ((void*)view) view_type() ).confirm_if(
|
||||
new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
|
||||
|
||||
template <typename T1>
|
||||
static void construct(op_basic_string* monoid, view_type* view, const T1& x1)
|
||||
{ provisional( new ((void*)view) view_type(x1) ).confirm_if(
|
||||
new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
|
||||
|
||||
template <typename T1, typename T2>
|
||||
static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2)
|
||||
{ provisional( new ((void*)view) view_type(x1, x2) ).confirm_if(
|
||||
new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2,
|
||||
const T3& x3)
|
||||
{ provisional( new ((void*)view) view_type(x1, x2, x3) ).confirm_if(
|
||||
new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
|
||||
|
||||
template <typename T1, typename T2, typename T3, typename T4>
|
||||
static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2,
|
||||
const T3& x3, const T4& x4)
|
||||
{ provisional( new ((void*)view) view_type(x1, x2, x3, x4) ).confirm_if(
|
||||
new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
|
||||
/** Convenience typedef for 8-bit strings
|
||||
*/
|
||||
typedef op_basic_string<char> op_string;
|
||||
|
||||
/** Convenience typedef for 16-bit strings
|
||||
*/
|
||||
typedef op_basic_string<wchar_t> op_wstring;
|
||||
|
||||
|
||||
/** Deprecated string append reducer class.
|
||||
*
|
||||
* reducer_basic_string is the same as @ref reducer<@ref op_basic_string>,
|
||||
* except that reducer_basic_string is a proxy for the contained view, so that
|
||||
* accumulator variable update operations can be applied directly to the
|
||||
* reducer. For example, a value is appended to a `reducer<%op_basic_string>`
|
||||
* with `r->push_back(a)`, but a value can be appended to a `%reducer_opand`
|
||||
* with `r.push_back(a)`.
|
||||
*
|
||||
* @deprecated Users are strongly encouraged to use `reducer<monoid>`
|
||||
* reducers rather than the old wrappers like reducer_basic_string.
|
||||
* The `reducer<monoid>` reducers show the reducer/monoid/view
|
||||
* architecture more clearly, are more consistent in their
|
||||
* implementation, and present a simpler model for new
|
||||
* user-implemented reducers.
|
||||
*
|
||||
* @note Implicit conversions are provided between `%reducer_basic_string`
|
||||
* and `reducer<%op_basic_string>`. This allows incremental code
|
||||
* conversion: old code that used `%reducer_basic_string` can pass a
|
||||
* `%reducer_basic_string` to a converted function that now expects a
|
||||
* pointer or reference to a `reducer<%op_basic_string>`, and vice
|
||||
* versa.
|
||||
*
|
||||
* @tparam Char The string element type (not the string type).
|
||||
* @tparam Traits The character traits type.
|
||||
* @tparam Alloc The string allocator type.
|
||||
*
|
||||
* @see op_basic_string
|
||||
* @see reducer
|
||||
* @see ReducersString
|
||||
*/
|
||||
template<typename Char,
|
||||
typename Traits = std::char_traits<Char>,
|
||||
typename Alloc = std::allocator<Char> >
|
||||
class reducer_basic_string :
|
||||
public reducer< op_basic_string<Char, Traits, Alloc, true> >
|
||||
{
|
||||
typedef reducer< op_basic_string<Char, Traits, Alloc, true> > base;
|
||||
using base::view;
|
||||
public:
|
||||
|
||||
/// The reducer’s string type.
|
||||
typedef typename base::value_type string_type;
|
||||
|
||||
/// The reducer’s primitive component type.
|
||||
typedef Char basic_value_type;
|
||||
|
||||
/// The string size type.
|
||||
typedef typename string_type::size_type size_type;
|
||||
|
||||
/// The view type for the reducer.
|
||||
typedef typename base::view_type View;
|
||||
|
||||
/// The monoid type for the reducer.
|
||||
typedef typename base::monoid_type Monoid;
|
||||
|
||||
|
||||
/** @name Constructors
|
||||
*/
|
||||
//@{
|
||||
|
||||
/** @name Forward constructor calls to the base class.
|
||||
*
|
||||
* All basic_string constructor forms are supported.
|
||||
*/
|
||||
//@{
|
||||
reducer_basic_string() {}
|
||||
|
||||
template <typename T1>
|
||||
reducer_basic_string(const T1& x1) :
|
||||
base(x1) {}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
reducer_basic_string(const T1& x1, const T2& x2) :
|
||||
base(x1, x2) {}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
reducer_basic_string(const T1& x1, const T2& x2, const T3& x3) :
|
||||
base(x1, x2, x3) {}
|
||||
|
||||
template <typename T1, typename T2, typename T3, typename T4>
|
||||
reducer_basic_string(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
|
||||
base(x1, x2, x3, x4) {}
|
||||
//@}
|
||||
|
||||
/** Allow mutable access to the string within the current view.
|
||||
*
|
||||
* @warning If this method is called before the parallel calculation is
|
||||
* complete, the string returned by this method will be a
|
||||
* partial result.
|
||||
*
|
||||
* @returns A mutable reference to the string within the current view.
|
||||
*/
|
||||
string_type &get_reference()
|
||||
{ return view().view_get_reference(); }
|
||||
|
||||
/** Allow read-only access to the string within the current view.
|
||||
*
|
||||
* @warning If this method is called before the parallel calculation is
|
||||
* complete, the string returned by this method will be a
|
||||
* partial result.
|
||||
*
|
||||
* @returns A const reference to the string within the current view.
|
||||
*/
|
||||
string_type const &get_reference() const
|
||||
{ return view().view_get_reference(); }
|
||||
|
||||
/** @name Append to the string.
|
||||
*
|
||||
* These operations are simply forwarded to the view.
|
||||
*/
|
||||
//@{
|
||||
void append(const Char *ptr)
|
||||
{ view().append(ptr); }
|
||||
void append(const Char *ptr, size_type count)
|
||||
{ view().append(ptr, count); }
|
||||
void append(const string_type &str, size_type offset, size_type count)
|
||||
{ view().append(str, offset, count); }
|
||||
void append(const string_type &str)
|
||||
{ view().append(str); }
|
||||
void append(size_type count, Char ch)
|
||||
{ view().append(count, ch); }
|
||||
|
||||
// Append to the string
|
||||
reducer_basic_string<Char, Traits, Alloc> &operator+=(Char ch)
|
||||
{ view() += ch; return *this; }
|
||||
reducer_basic_string<Char, Traits, Alloc> &operator+=(const Char *ptr)
|
||||
{ view() += ptr; return *this; }
|
||||
reducer_basic_string<Char, Traits, Alloc> &operator+=(const string_type &right)
|
||||
{ view() += right; return *this; }
|
||||
//@}
|
||||
|
||||
/** @name Dereference
|
||||
* @details Dereferencing a wrapper is a no-op. It simply returns the
|
||||
* wrapper. Combined with the rule that the wrapper forwards view
|
||||
* operations to its contained view, this means that view operations can
|
||||
* be written the same way on reducers and wrappers, which is convenient
|
||||
* for incrementally converting old code using wrappers to use reducers
|
||||
* instead. That is:
|
||||
*
|
||||
* reducer<op_string> r;
|
||||
* r->push_back(a); // r-> returns the view
|
||||
* // push_back() is a view member function
|
||||
*
|
||||
* reducer_string w;
|
||||
* w->push_back(a); // *w returns the wrapper
|
||||
* // push_back() is a wrapper member function
|
||||
* // that calls the corresponding view function
|
||||
*/
|
||||
//@{
|
||||
reducer_basic_string& operator*() { return *this; }
|
||||
reducer_basic_string const& operator*() const { return *this; }
|
||||
|
||||
reducer_basic_string* operator->() { return this; }
|
||||
reducer_basic_string const* operator->() const { return this; }
|
||||
//@}
|
||||
|
||||
/** @name Upcast
|
||||
* @details In Cilk library 0.9, reducers were always cache-aligned. In
|
||||
* library 1.0, reducer cache alignment is optional. By default, reducers
|
||||
* are unaligned (i.e., just naturally aligned), but legacy wrappers
|
||||
* inherit from cache-aligned reducers for binary compatibility.
|
||||
*
|
||||
* This means that a wrapper will automatically be upcast to its aligned
|
||||
* reducer base class. The following conversion operators provide
|
||||
* pseudo-upcasts to the corresponding unaligned reducer class.
|
||||
*/
|
||||
//@{
|
||||
operator reducer< op_basic_string<Char, Traits, Alloc, false> >& ()
|
||||
{
|
||||
return *reinterpret_cast< reducer<
|
||||
op_basic_string<Char, Traits, Alloc, false> >*
|
||||
>(this);
|
||||
}
|
||||
operator const reducer< op_basic_string<Char, Traits, Alloc, false> >& () const
|
||||
{
|
||||
return *reinterpret_cast< const reducer<
|
||||
op_basic_string<Char, Traits, Alloc, false> >*
|
||||
>(this);
|
||||
}
|
||||
//@}
|
||||
};
|
||||
|
||||
|
||||
/** Convenience typedef for 8-bit strings
|
||||
*/
|
||||
typedef reducer_basic_string<char> reducer_string;
|
||||
|
||||
/** Convenience typedef for 16-bit strings
|
||||
*/
|
||||
typedef reducer_basic_string<wchar_t> reducer_wstring;
|
||||
|
||||
/// @cond internal
|
||||
|
||||
/// @cond internal
|
||||
/** Metafunction specialization for reducer conversion.
|
||||
*
|
||||
* This specialization of the @ref legacy_reducer_downcast template class
|
||||
* defined in reducer.h causes the `reducer< op_basic_string<Char> >` class to
|
||||
* have an `operator reducer_basic_string<Char>& ()` conversion operator that
|
||||
* statically downcasts the `reducer<op_basic_string>` to the corresponding
|
||||
* `reducer_basic_string` type. (The reverse conversion, from
|
||||
* `reducer_basic_string` to `reducer<op_basic_string>`, is just an upcast,
|
||||
* which is provided for free by the language.)
|
||||
*
|
||||
* @ingroup ReducersString
|
||||
*/
|
||||
template<typename Char, typename Traits, typename Alloc, bool Align>
|
||||
struct legacy_reducer_downcast<
|
||||
reducer<op_basic_string<Char, Traits, Alloc, Align> > >
|
||||
{
|
||||
typedef reducer_basic_string<Char, Traits, Alloc> type;
|
||||
};
|
||||
|
||||
/// @endcond
|
||||
|
||||
//@}
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // REDUCER_STRING_H_INCLUDED
|
108
libcilkrts/include/cilktools/cilkscreen.h
Normal file
108
libcilkrts/include/cilktools/cilkscreen.h
Normal file
|
@ -0,0 +1,108 @@
|
|||
/* cilkscreen.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_CILKSCREEN_H
|
||||
#define INCLUDED_CILKSCREEN_H
|
||||
|
||||
#include <cilk/cilk_api.h>
|
||||
|
||||
/*
|
||||
* Cilkscreen "functions". These macros generate metadata in your application
|
||||
* to notify Cilkscreen of program state changes
|
||||
*/
|
||||
|
||||
#if ! defined(CILK_STUB) && defined(__INTEL_COMPILER)
|
||||
# define __cilkscreen_metacall(annotation,expr) \
|
||||
__notify_zc_intrinsic((char *)annotation, expr)
|
||||
#else
|
||||
# define __cilkscreen_metacall(annotation,expr) ((void)annotation, (void)(expr))
|
||||
#endif
|
||||
|
||||
/* Call once when a user thread enters a spawning function */
|
||||
#define __cilkscreen_enable_instrumentation() \
|
||||
__cilkscreen_metacall("cilkscreen_enable_instrumentation", 0)
|
||||
|
||||
/* Call once when a user thread exits a spawning function */
|
||||
#define __cilkscreen_disable_instrumentation() \
|
||||
__cilkscreen_metacall("cilkscreen_disable_instrumentation", 0)
|
||||
|
||||
/* Call to temporarily disable cilkscreen instrumentation */
|
||||
#define __cilkscreen_enable_checking() \
|
||||
__cilkscreen_metacall("cilkscreen_enable_checking", 0)
|
||||
|
||||
/* Call to re-enable temporarily-disabled cilkscreen instrumentation */
|
||||
#define __cilkscreen_disable_checking() \
|
||||
__cilkscreen_metacall("cilkscreen_disable_checking", 0)
|
||||
|
||||
/* Inform cilkscreen that memory from begin to end can be reused without
|
||||
* causing races (e.g., for memory that comes from a memory allocator) */
|
||||
#define __cilkscreen_clean(begin, end) \
|
||||
do { \
|
||||
void *__data[2] = { (begin), (end) }; \
|
||||
__cilkscreen_metacall("cilkscreen_clean", &__data); \
|
||||
} while(0)
|
||||
|
||||
/* Inform cilkscreen that a lock is being acquired.
|
||||
* If the lock type is not a handle, then the caller should take its address
|
||||
* and pass the pointer to the lock. Otherwise, the caller should pass the
|
||||
* lock handle directly.
|
||||
*/
|
||||
#define __cilkscreen_acquire_lock(lock) \
|
||||
__cilkscreen_metacall("cilkscreen_acquire_lock", (lock))
|
||||
|
||||
#define __cilkscreen_release_lock(lock) \
|
||||
__cilkscreen_metacall("cilkscreen_release_lock", (lock))
|
||||
|
||||
/*
|
||||
* Metacall data
|
||||
*
|
||||
* A metacall is a way to pass data to a function implemented by a tool.
|
||||
* Metacalls are always instrumented when the tool is loaded.
|
||||
*/
|
||||
|
||||
// Tool code for Cilkscreen
|
||||
#define METACALL_TOOL_CILKSCREEN 1
|
||||
|
||||
// Metacall codes implemented by Cilkscreen
|
||||
#define CS_METACALL_PUTS 0 // Write string to the Cilkscreen log
|
||||
|
||||
#define __cilkscreen_puts(text) \
|
||||
__cilkrts_metacall(METACALL_TOOL_CILKSCREEN, CS_METACALL_PUTS, (void *)(const char *)text)
|
||||
|
||||
#endif /* defined(INCLUDED_CILKSCREEN_H) */
|
278
libcilkrts/include/cilktools/cilkview.h
Normal file
278
libcilkrts/include/cilktools/cilkview.h
Normal file
|
@ -0,0 +1,278 @@
|
|||
/* cilkview.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_CILKVIEW_H
|
||||
#define INCLUDED_CILKVIEW_H
|
||||
|
||||
#include <cilk/cilk_api.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef _WINBASE_
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
unsigned long __stdcall GetTickCount();
|
||||
__CILKRTS_END_EXTERN_C
|
||||
# endif
|
||||
#endif // _WIN32
|
||||
|
||||
#if defined __unix__ || defined __APPLE__ || defined __VXWORKS__
|
||||
# include <sys/time.h>
|
||||
#endif // defined __unix__ || defined __APPLE__
|
||||
|
||||
/// @brief Return the system clock with millisecond resolution
|
||||
///
|
||||
/// This function returns a long integer representing the number of
|
||||
/// milliseconds since an arbitrary starting point, e.g., since the system was
|
||||
/// started or since the Unix Epoch. The result is meaningless by itself, but
|
||||
/// the difference between two sequential calls to __cilkview_getticks()
|
||||
/// represents the time interval that elapsed between them (in ms).
|
||||
static inline unsigned long long __cilkview_getticks()
|
||||
{
|
||||
#if __INTEL_COMPILER > 1200
|
||||
// When inlined, prevent code motion around this call
|
||||
__notify_zc_intrinsic((void*) "test_getticks_start", 0);
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
// Return milliseconds elapsed since the system started
|
||||
return GetTickCount();
|
||||
#elif defined(__unix__) || defined(__APPLE__) || defined __VXWORKS__
|
||||
// Return milliseconds elapsed since the Unix Epoch
|
||||
// (1-Jan-1970 00:00:00.000 UTC)
|
||||
struct timeval t;
|
||||
gettimeofday(&t, 0);
|
||||
return t.tv_sec * 1000ULL + t.tv_usec / 1000;
|
||||
#else
|
||||
# error test_getticks() not implemented for this OS
|
||||
#endif
|
||||
|
||||
#if __INTEL_COMPILER > 1200
|
||||
// When inlined, prevent code motion around this call
|
||||
__notify_zc_intrinsic((void*) "test_getticks_end", 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int size; // Size of structure in bytes
|
||||
unsigned int status; // 1 = success, 0 = failure
|
||||
unsigned long long time; // Time in milliseconds
|
||||
unsigned long long work;
|
||||
unsigned long long span;
|
||||
unsigned long long burdened_span;
|
||||
unsigned long long spawns;
|
||||
unsigned long long syncs;
|
||||
unsigned long long strands;
|
||||
unsigned long long atomic_ins;
|
||||
unsigned long long frames;
|
||||
} cilkview_data_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
cilkview_data_t *start; // Values at start of interval
|
||||
cilkview_data_t *end; // Values at end of interval
|
||||
const char *label; // Name for this interval
|
||||
unsigned int flags; // What to do - see flags below
|
||||
} cilkview_report_t;
|
||||
|
||||
// What __cilkview_report should do. The flags can be ORed together
|
||||
enum
|
||||
{
|
||||
CV_REPORT_WRITE_TO_LOG = 1, // Write parallelism report to the log (xml or text)
|
||||
CV_REPORT_WRITE_TO_RESULTS = 2 // Write parallelism data to results file
|
||||
};
|
||||
|
||||
#ifndef CILKVIEW_NO_REPORT
|
||||
static void __cilkview_do_report(cilkview_data_t *start,
|
||||
cilkview_data_t *end,
|
||||
const char *label,
|
||||
unsigned int flags);
|
||||
#endif /* CILKVIEW_NO_REPORT */
|
||||
|
||||
/*
|
||||
* Metacall data
|
||||
*
|
||||
* A metacall is a way to pass data to a function implemented by a tool.
|
||||
* Metacalls are always instrumented when the tool is loaded.
|
||||
*/
|
||||
|
||||
// Tool code for Cilkview
|
||||
#define METACALL_TOOL_CILKVIEW 2
|
||||
|
||||
// Metacall codes implemented by Cilkview
|
||||
enum
|
||||
{
|
||||
CV_METACALL_PUTS,
|
||||
CV_METACALL_QUERY,
|
||||
CV_METACALL_START,
|
||||
CV_METACALL_STOP,
|
||||
CV_METACALL_RESET,
|
||||
CV_METACALL_USE_DEFAULT_GRAIN,
|
||||
CV_METACALL_CONNECTED,
|
||||
CV_METACALL_SUSPEND,
|
||||
CV_METACALL_RESUME,
|
||||
CV_METACALL_REPORT
|
||||
};
|
||||
|
||||
#if ! defined(CILK_STUB) && defined(__INTEL_COMPILER)
|
||||
# define __cilkview_metacall(code,data) \
|
||||
__cilkrts_metacall(METACALL_TOOL_CILKVIEW, code, data)
|
||||
#else
|
||||
# define __cilkview_metacall(annotation,expr) (annotation, (void) (expr))
|
||||
#endif
|
||||
|
||||
// Write arbitrary string to the log
|
||||
#define __cilkview_puts(arg) \
|
||||
__cilkview_metacall(CV_METACALL_PUTS, arg)
|
||||
|
||||
// Retrieve the Cilkview performance counters. The parameter must be a
|
||||
// cilkview_data_t
|
||||
#define __cilkview_query(d) \
|
||||
do { \
|
||||
d.size = sizeof(d); \
|
||||
d.status = 0; \
|
||||
__cilkview_metacall(CV_METACALL_QUERY, &d); \
|
||||
if (0 == d.status) \
|
||||
d.time = __cilkview_getticks(); \
|
||||
} while (0)
|
||||
|
||||
// Write report to log or results file. If end is NULL, Cilkview will
|
||||
// use the current values.
|
||||
#define __cilkview_report(start, end, label, flags) \
|
||||
__cilkview_do_report(start, end, label, flags)
|
||||
|
||||
// Control the workspan performance counters for the final report
|
||||
#define __cilkview_workspan_start() \
|
||||
__cilkview_metacall(CV_METACALL_START, 0)
|
||||
#define __cilkview_workspan_stop() \
|
||||
__cilkview_metacall(CV_METACALL_STOP, 0)
|
||||
#define __cilkview_workspan_reset() \
|
||||
__cilkview_metacall(CV_METACALL_RESET, 0)
|
||||
#define __cilkview_workspan_suspend() \
|
||||
__cilkview_metacall(CV_METACALL_SUSPEND, 0)
|
||||
#define __cilkview_workspan_resume() \
|
||||
__cilkview_metacall(CV_METACALL_RESUME, 0)
|
||||
|
||||
#define __cilkview_use_default_grain_size() \
|
||||
__cilkview_metacall(CV_METACALL_USE_DEFAULT, 0)
|
||||
|
||||
// Sets the int is_connected to 1 if Cilkview is active
|
||||
#define __cilkview_connected(is_connected) \
|
||||
__cilkview_metacall(CV_METACALL_CONNECTED, &is_connected)
|
||||
|
||||
|
||||
#ifndef CILKVIEW_NO_REPORT
|
||||
|
||||
// Stop Microsoft include files from complaining about getenv and fopen
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 1786) // Suppress warnings that getenv, fopen are deprecated
|
||||
#endif
|
||||
|
||||
static void __cilkview_do_report(cilkview_data_t *start,
|
||||
cilkview_data_t *end,
|
||||
const char *label,
|
||||
unsigned int flags)
|
||||
{
|
||||
int under_cilkview = 0;
|
||||
unsigned long long elapsed_ms;
|
||||
int worker_count = 0;
|
||||
char *nworkers;
|
||||
char *outfile;
|
||||
FILE *f;
|
||||
|
||||
// Check whether we're running under Cilkview
|
||||
__cilkview_connected(under_cilkview);
|
||||
|
||||
// If we're running under Cilkview, let it do those things that need
|
||||
// to be done
|
||||
if (under_cilkview)
|
||||
{
|
||||
cilkview_report_t d = {start, end, label, flags};
|
||||
__cilkview_metacall(CV_METACALL_REPORT, &d);
|
||||
return;
|
||||
}
|
||||
|
||||
// We're not running under Cilkview.
|
||||
//
|
||||
// If we weren't asked to write to the results file, we're done.
|
||||
if (0 == (flags & CV_REPORT_WRITE_TO_RESULTS))
|
||||
return;
|
||||
|
||||
// Calculate the elapse milliseconds
|
||||
if (NULL == end)
|
||||
elapsed_ms = __cilkview_getticks() - start->time;
|
||||
else
|
||||
elapsed_ms = end->time - start->time;
|
||||
|
||||
// Determine how many workers we're using for this trial run
|
||||
nworkers = getenv("CILK_NWORKERS");
|
||||
if (NULL != nworkers)
|
||||
worker_count = atoi(nworkers);
|
||||
if (0 == worker_count)
|
||||
worker_count = 16;
|
||||
|
||||
// Open the output file and write the trial data to it
|
||||
outfile = getenv("CILKVIEW_OUTFILE");
|
||||
if (NULL == outfile)
|
||||
outfile = (char *)"cilkview.out";
|
||||
|
||||
f = fopen(outfile, "a");
|
||||
if (NULL == f)
|
||||
fprintf(stderr, "__cilkview_do_report: unable to append to file %s\n", outfile);
|
||||
else
|
||||
{
|
||||
fprintf(f, "%s trial %d %f\n", label,
|
||||
worker_count,
|
||||
((float)elapsed_ms) / 1000.0f);
|
||||
fclose(f);
|
||||
}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif // CILKVIEW_NO_REPORT
|
||||
|
||||
|
||||
#endif /* ! defined(INCLUDED_CILKVIEW_H) */
|
92
libcilkrts/include/cilktools/fake_mutex.h
Normal file
92
libcilkrts/include/cilktools/fake_mutex.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
/* fake_mutex.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Cilkscreen fake mutexes are provided to indicate to the Cilkscreen race
|
||||
* detector that a race should be ignored.
|
||||
*
|
||||
* NOTE: This class does not provide mutual exclusion. You should use the
|
||||
* mutual exclusion constructs provided by TBB or your operating system to
|
||||
* protect against real data races.
|
||||
*/
|
||||
|
||||
#ifndef FAKE_MUTEX_H_INCLUDED
|
||||
#define FAKE_MUTEX_H_INCLUDED
|
||||
|
||||
#include <cilktools/cilkscreen.h>
|
||||
|
||||
namespace cilkscreen
|
||||
{
|
||||
class fake_mutex
|
||||
{
|
||||
public:
|
||||
fake_mutex() : locked(false)
|
||||
{
|
||||
}
|
||||
|
||||
~fake_mutex()
|
||||
{
|
||||
__CILKRTS_ASSERT(! locked);
|
||||
}
|
||||
|
||||
// Wait until mutex is available, then enter
|
||||
void lock()
|
||||
{
|
||||
__cilkscreen_acquire_lock(&locked);
|
||||
__CILKRTS_ASSERT(! locked);
|
||||
locked = true;
|
||||
}
|
||||
|
||||
// A fake mutex is always available
|
||||
bool try_lock() { lock(); return true; }
|
||||
|
||||
// Releases the mutex
|
||||
void unlock()
|
||||
{
|
||||
__CILKRTS_ASSERT(locked);
|
||||
locked = false;
|
||||
__cilkscreen_release_lock(&locked);
|
||||
}
|
||||
|
||||
private:
|
||||
bool locked;
|
||||
};
|
||||
|
||||
} // namespace cilk
|
||||
|
||||
#endif // FAKE_MUTEX_H_INCLUDED
|
86
libcilkrts/include/cilktools/lock_guard.h
Normal file
86
libcilkrts/include/cilktools/lock_guard.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* lock_guard.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2011-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Lock guard patterned after the std::lock_guard class template proposed in
|
||||
* the C++ 0x draft standard.
|
||||
*
|
||||
* An object of type lock_guard controls the ownership of a mutex object
|
||||
* within a scope. A lock_guard object maintains ownership of a mutex object
|
||||
* throughout the lock_guard object's lifetime. The behavior of a program is
|
||||
* undefined if the mutex referenced by pm does not exist for the entire
|
||||
* lifetime of the lock_guard object.
|
||||
*/
|
||||
|
||||
#ifndef LOCK_GUARD_H_INCLUDED
|
||||
#define LOCK_GUARD_H_INCLUDED
|
||||
|
||||
#include <cilk/cilk.h>
|
||||
|
||||
namespace cilkscreen
|
||||
{
|
||||
template <class Mutex>
|
||||
class lock_guard
|
||||
{
|
||||
public:
|
||||
typedef Mutex mutex_type;
|
||||
|
||||
explicit lock_guard(mutex_type &m) : pm(m)
|
||||
{
|
||||
pm.lock();
|
||||
locked = true;
|
||||
}
|
||||
|
||||
~lock_guard()
|
||||
{
|
||||
locked = false;
|
||||
pm.unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
lock_guard(lock_guard const&);
|
||||
lock_guard& operator=(lock_guard const&);
|
||||
|
||||
private:
|
||||
// exposition only:
|
||||
mutex_type ±
|
||||
bool locked;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // LOCK_GUARD_H_INCLUDED
|
639
libcilkrts/include/internal/abi.h
Normal file
639
libcilkrts/include/internal/abi.h
Normal file
|
@ -0,0 +1,639 @@
|
|||
/*
|
||||
* abi.h
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* @file abi.h
|
||||
*
|
||||
* @brief Defines the application binary interface between the compiler and
|
||||
* the Intel Cilk Plus runtime.
|
||||
*/
|
||||
|
||||
#ifndef CILK_INTERNAL_ABI_H
|
||||
#define CILK_INTERNAL_ABI_H
|
||||
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <stddef.h> // Needed to define size_t
|
||||
|
||||
/**
|
||||
* Jump buffers are OS and architecture dependent
|
||||
*/
|
||||
#if ! defined(_MSC_VER)
|
||||
/* Non-Windows - only need 5 registers for the jump buffer for both IA32 and Intel64 */
|
||||
typedef void *__CILK_JUMP_BUFFER[5];
|
||||
|
||||
/** OS-specific implementation of setjmp */
|
||||
# define CILK_SETJMP(X) __builtin_setjmp(X)
|
||||
/** OS-specific implementation of longjmp */
|
||||
# define CILK_LONGJMP(X) __builtin_longjmp(X,1)
|
||||
#else
|
||||
/* Windows - things are a little more complicated */
|
||||
# if defined(_M_X64)
|
||||
/* Intel64 - Use an OS-defined jump buffer */
|
||||
# include <setjmp.h>
|
||||
typedef jmp_buf __CILK_JUMP_BUFFER;
|
||||
|
||||
# define CILK_SETJMP(X) setjmp(X)
|
||||
# define CILK_LONGJMP(X) longjmp(X, 1)
|
||||
# elif defined(_M_IX86)
|
||||
/**
|
||||
* Windows x86 - Use a simplified version of the Windows jump buffer for x86
|
||||
* setjmp is provided by __cilkrts_setjmp which passes jump buffer in EAX and
|
||||
* destination in EDX longjmp is provided by an internal routine which uses
|
||||
* this structure
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
unsigned long Ebp;
|
||||
unsigned long Ebx;
|
||||
unsigned long Edi;
|
||||
unsigned long Esi;
|
||||
unsigned long Esp;
|
||||
unsigned long Eip;
|
||||
unsigned long Registration;
|
||||
unsigned long TryLevel;
|
||||
} __CILK_JUMP_BUFFER;
|
||||
|
||||
# else
|
||||
# error Unexpected architecture - Need to define __CILK_JUMP_BUFFER
|
||||
# endif /* _M_X64 */
|
||||
|
||||
#endif /* defined(_MSC_VER) */
|
||||
|
||||
/* struct tags */
|
||||
typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; ///< struct tag for stack frame
|
||||
|
||||
// Forwarded declarations
|
||||
typedef struct global_state_t global_state_t; ///< Forwarded declaration for global state
|
||||
typedef struct local_state local_state; ///< Forwarded declaration for local state
|
||||
typedef struct cilkred_map cilkred_map; ///< Forward declaration for reducer map
|
||||
|
||||
/// Forwarded declaration for system-dependent worker state
|
||||
typedef struct __cilkrts_worker_sysdep_state
|
||||
__cilkrts_worker_sysdep_state;
|
||||
|
||||
/**
|
||||
* The worker struct contains per-worker information that needs to be
|
||||
* visible to the compiler, or rooted here.
|
||||
*
|
||||
* For 32-bit Windows we need to be aligning the structures on 4-byte
|
||||
* boundaries to match where ICL is allocating the birthrank and rank
|
||||
* in the __cilkrts_stack_frame. It's 4-byte aligned instead of 8-byte
|
||||
* aligned. This is OK because the compiler is dealing with the 64-bit
|
||||
* quantities as two 32-bit values. So change the packing to be on
|
||||
* 4-byte boundaries.
|
||||
*
|
||||
* The fields of the worker struct can be classified as either local
|
||||
* or shared.
|
||||
*
|
||||
* Local: This field is only accessed by the thread bound to this
|
||||
* worker struct. Local fields can be freely accessed without
|
||||
* acquiring locks.
|
||||
*
|
||||
* Shared: This field may be accessed by multiple worker threads.
|
||||
* Accesses to shared fields usually requires locks, except in
|
||||
* special situations where one can prove that locks are
|
||||
* unnecessary.
|
||||
*
|
||||
* The fields of the worker struct can also be classified as
|
||||
* "read-only" if the field does not change after it is initialized.
|
||||
* Otherwise, the field is "read/write". Read-only fields do not
|
||||
* require locks to access (ignoring the synchronization that might be
|
||||
* needed for initialization if this can occur in parallel).
|
||||
*
|
||||
* Finally, we explicitly classify some fields as "synchronization"
|
||||
* fields if they are used as part of a synchronization protocol in
|
||||
* the runtime. These variables are generally shared and read/write.
|
||||
* Mostly, this category includes lock variables and other variables
|
||||
* that are involved in synchronization protocols (i.e., the THE
|
||||
* protocol).
|
||||
*/
|
||||
#if defined(_MSC_VER) && defined(_M_IX86)
|
||||
#pragma pack(push, 4)
|
||||
#endif
|
||||
|
||||
struct __cilkrts_worker {
|
||||
/**
|
||||
* T, H, and E pointers in the THE protocol See "The implementation of
|
||||
* the Cilk-5 multithreaded language", PLDI 1998:
|
||||
* http://portal.acm.org/citation.cfm?doid=277652.277725
|
||||
*
|
||||
* Synchronization fields. [shared read/write]
|
||||
*/
|
||||
__cilkrts_stack_frame *volatile *volatile tail;
|
||||
__cilkrts_stack_frame *volatile *volatile head; /**< @copydoc tail */
|
||||
__cilkrts_stack_frame *volatile *volatile exc; /**< @copydoc tail */
|
||||
|
||||
/**
|
||||
* Addition to the THE protocol to allow us to protect some set of
|
||||
* entries in the tail queue from stealing. Normally, this is set
|
||||
* beyond the end of the task queue, indicating that all entries are
|
||||
* available for stealing. During exception handling, protected_tail
|
||||
* may be set to the first entry in the task queue, indicating that
|
||||
* stealing is not allowed.
|
||||
*
|
||||
* Synchronization field.
|
||||
*/
|
||||
__cilkrts_stack_frame *volatile *volatile protected_tail;
|
||||
|
||||
/**
|
||||
* Limit of the Lazy Task Queue, to detect queue overflow
|
||||
* [local read-only]
|
||||
*/
|
||||
__cilkrts_stack_frame *volatile *ltq_limit;
|
||||
|
||||
/**
|
||||
* Worker id.
|
||||
* [local read-only]
|
||||
*/
|
||||
int32_t self;
|
||||
|
||||
/**
|
||||
* Global state of the runtime system, opaque to the client.
|
||||
* [local read-only]
|
||||
*/
|
||||
global_state_t *g;
|
||||
|
||||
/**
|
||||
* Additional per-worker state of the runtime system that we want
|
||||
* to maintain hidden from the client.
|
||||
* [shared read-only]
|
||||
*/
|
||||
local_state *l;
|
||||
|
||||
/**
|
||||
* Map from reducer names to reducer values.
|
||||
* [local read/write]
|
||||
*/
|
||||
cilkred_map *reducer_map;
|
||||
|
||||
/**
|
||||
* A slot that points to the currently executing Cilk frame.
|
||||
* [local read/write]
|
||||
*/
|
||||
__cilkrts_stack_frame *current_stack_frame;
|
||||
|
||||
/**
|
||||
* Reserved space for a pointer.
|
||||
* Used to be __cilkrts_stack_frame *volatile *volatile saved_protected_tail;
|
||||
*/
|
||||
void* reserved;
|
||||
|
||||
/**
|
||||
* System-dependent part of the worker state
|
||||
* [local read-only]
|
||||
*/
|
||||
__cilkrts_worker_sysdep_state *sysdep;
|
||||
|
||||
#if __CILKRTS_ABI_VERSION >= 1
|
||||
/**
|
||||
* Per-worker pedigree information used to support scheduling-independent
|
||||
* pseudo-random numbers.
|
||||
* [local read/write]
|
||||
*/
|
||||
__cilkrts_pedigree pedigree;
|
||||
#endif /* __CILKRTS_ABI_VERSION >= 1 */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Every spawning function has a frame descriptor. A spawning function
|
||||
* is a function that spawns or detaches. Only spawning functions
|
||||
* are visible to the Cilk runtime.
|
||||
*/
|
||||
struct __cilkrts_stack_frame
|
||||
{
|
||||
/**
|
||||
* flags is an integer with values defined below. Client code
|
||||
* initializes flags to CILK_FRAME_VERSION before the first Cilk
|
||||
* operation.
|
||||
*
|
||||
* The low 24-bits of the 'flags' field are the flags, proper. The high
|
||||
* 8-bits are the version number.
|
||||
*
|
||||
* IMPORTANT: bits in this word are set and read by the PARENT ONLY,
|
||||
* not by a spawned child. In particular, the STOLEN and UNSYNCHED
|
||||
* bits are set on a steal and are read before a sync. Since there
|
||||
* is no synchronization (locking) on this word, any attempt to set
|
||||
* or read these bits asynchronously in a child would result in a race.
|
||||
*/
|
||||
uint32_t flags;
|
||||
|
||||
/** Not currently used. Not initialized by Intel compiler. */
|
||||
int32_t size;
|
||||
|
||||
/**
|
||||
* call_parent points to the __cilkrts_stack_frame of the closest
|
||||
* ancestor spawning function, including spawn helpers, of this frame.
|
||||
* It forms a linked list ending at the first stolen frame.
|
||||
*/
|
||||
__cilkrts_stack_frame *call_parent;
|
||||
|
||||
/**
|
||||
* The client copies the worker from TLS here when initializing
|
||||
* the structure. The runtime ensures that the field always points
|
||||
* to the __cilkrts_worker which currently "owns" the frame.
|
||||
*/
|
||||
__cilkrts_worker *worker;
|
||||
|
||||
/**
|
||||
* Unix: Pending exception after sync. The sync continuation
|
||||
* must call __cilkrts_rethrow to handle the pending exception.
|
||||
*
|
||||
* Windows: the handler that _would_ have been registered if our
|
||||
* handler were not there. We maintain this for unwinding purposes.
|
||||
* Win32: the value of this field is only defined in spawn helper
|
||||
* functions
|
||||
*
|
||||
* Win64: except_data must be filled in for all functions with a
|
||||
* __cilkrts_stack_frame
|
||||
*/
|
||||
void *except_data;
|
||||
|
||||
/**
|
||||
* Before every spawn and nontrivial sync the client function
|
||||
* saves its continuation here.
|
||||
*/
|
||||
__CILK_JUMP_BUFFER ctx;
|
||||
|
||||
#if __CILKRTS_ABI_VERSION >= 1
|
||||
/**
|
||||
* Architecture-specific floating point state. mxcsr and fpcsr should be
|
||||
* set when CILK_SETJMP is called in client code. Note that the Win64
|
||||
* jmpbuf for the Intel64 architecture already contains this information
|
||||
* so there is no need to use these fields on that OS/architecture.
|
||||
*/
|
||||
uint32_t mxcsr;
|
||||
uint16_t fpcsr; /**< @copydoc mxcsr */
|
||||
|
||||
|
||||
/**
|
||||
* reserved is not used at this time. Client code should initialize it
|
||||
* to 0 before the first Cilk operation
|
||||
*/
|
||||
uint16_t reserved;
|
||||
|
||||
/**
|
||||
* Pedigree information to support scheduling-independent pseudo-random
|
||||
* numbers. There are two views of this information. The copy in a
|
||||
* spawning function is used to stack the rank and communicate to the
|
||||
* runtime on a steal or continuation. The copy in a spawn helper is
|
||||
* immutable once the function is detached and is a node in the pedigree.
|
||||
* The union is used to make clear which view we're using.
|
||||
*
|
||||
* In the detach sequence Client code should:
|
||||
* - copy the worker pedigree into the spawn helper's pedigree
|
||||
* - copy the worker pedigree into the call parent's pedigree
|
||||
* - set the worker's rank to 0
|
||||
* - set the worker's pedigree.next to the spawn helper's pedigree
|
||||
*/
|
||||
union
|
||||
{
|
||||
__cilkrts_pedigree spawn_helper_pedigree; /* Used in spawn helpers */
|
||||
__cilkrts_pedigree parent_pedigree; /* Used in spawning funcs */
|
||||
};
|
||||
#endif /* __CILKRTS_ABI_VERSION >= 1 */
|
||||
};
|
||||
|
||||
/*
|
||||
* Restore previous structure packing for 32-bit Windows
|
||||
*/
|
||||
#if defined(_MSC_VER) && defined(_M_IX86)
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
|
||||
/* Values of the flags bitfield */
|
||||
/** CILK_FRAME_STOLEN is set if the frame has ever been stolen. */
|
||||
#define CILK_FRAME_STOLEN 0x01
|
||||
|
||||
/**
|
||||
* CILK_FRAME_UNSYNCHED is set if the frame has been stolen and
|
||||
* is has not yet executed _Cilk_sync. It is technically a misnomer in that a
|
||||
* frame can have this flag set even if all children have returned.
|
||||
*/
|
||||
#define CILK_FRAME_UNSYNCHED 0x02
|
||||
|
||||
/**
|
||||
* Is this frame detached (spawned)? If so the runtime needs
|
||||
* to undo-detach in the slow path epilogue.
|
||||
*/
|
||||
#define CILK_FRAME_DETACHED 0x04
|
||||
|
||||
/**
|
||||
* CILK_FRAME_EXCEPTION_PROBED is set if the frame has been probed in the
|
||||
* exception handler first pass
|
||||
*/
|
||||
#define CILK_FRAME_EXCEPTION_PROBED 0x08
|
||||
|
||||
/** Is this frame receiving an exception after sync? */
|
||||
#define CILK_FRAME_EXCEPTING 0x10
|
||||
|
||||
/**
|
||||
* Is the pedigree unsynched? That is, has a synch occurred that is not
|
||||
* yet represented in the pedigree?
|
||||
*/
|
||||
#define CILK_FRAME_SF_PEDIGREE_UNSYNCHED 0x20
|
||||
|
||||
/** Is this the last (oldest) Cilk frame? */
|
||||
#define CILK_FRAME_LAST 0x80
|
||||
|
||||
/**
|
||||
* Is this frame in the epilogue, or more generally after the last
|
||||
* sync when it can no longer do any Cilk operations?
|
||||
*/
|
||||
#define CILK_FRAME_EXITING 0x0100
|
||||
|
||||
/** Is this frame suspended? (used for debugging) */
|
||||
#define CILK_FRAME_SUSPENDED 0x8000
|
||||
|
||||
/** Used by Windows exception handling to indicate that __cilkrts_leave_frame should do nothing */
|
||||
#define CILK_FRAME_UNWINDING 0x10000
|
||||
|
||||
/*
|
||||
* The low 24-bits of the 'flags' field are the flags, proper. The high 8-bits
|
||||
* are the version number.
|
||||
*/
|
||||
|
||||
/** ABI version left shifted to the high byte */
|
||||
#define CILK_FRAME_VERSION (__CILKRTS_ABI_VERSION << 24)
|
||||
|
||||
/** Mask for the flags field to isolate the version bits */
|
||||
#define CILK_FRAME_VERSION_MASK 0xFF000000
|
||||
|
||||
/** Mask for the flags field to isolate the flag bits */
|
||||
#define CILK_FRAME_FLAGS_MASK 0x00FFFFFF
|
||||
|
||||
/** Convenience macro to provide access the version portion of the flags field */
|
||||
#define CILK_FRAME_VERSION_VALUE(_flags) (((_flags) & CILK_FRAME_VERSION_MASK) >> 24)
|
||||
|
||||
/** Any undefined bits are reserved and must be zero ("MBZ" = "Must Be Zero") */
|
||||
#define CILK_FRAME_MBZ (~ (CILK_FRAME_STOLEN | \
|
||||
CILK_FRAME_UNSYNCHED | \
|
||||
CILK_FRAME_DETACHED | \
|
||||
CILK_FRAME_EXCEPTION_PROBED | \
|
||||
CILK_FRAME_EXCEPTING | \
|
||||
CILK_FRAME_SF_PEDIGREE_UNSYNCHED | \
|
||||
CILK_FRAME_LAST | \
|
||||
CILK_FRAME_EXITING | \
|
||||
CILK_FRAME_SUSPENDED | \
|
||||
CILK_FRAME_UNWINDING | \
|
||||
CILK_FRAME_VERSION_MASK))
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Call __cilkrts_enter_frame to initialize an ABI 0 frame descriptor.
|
||||
* Initialize the frame descriptor before spawn or detach. A function that
|
||||
* conditionally does Cilk operations need not initialize the frame descriptor
|
||||
* in a code path that never uses it.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be initialized.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_enter_frame(__cilkrts_stack_frame* sf);
|
||||
|
||||
/**
|
||||
* Call __cilkrts_enter_frame to initialize an ABI 1 frame descriptor.
|
||||
* Initialize the frame descriptor before spawn or detach. A function that
|
||||
* conditionally does Cilk operations need not initialize the frame descriptor
|
||||
* in a code path that never uses it.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be initialized.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_enter_frame_1(__cilkrts_stack_frame* sf);
|
||||
|
||||
/**
|
||||
* __cilkrts_enter_frame_fast is the same as __cilkrts_enter_frame, except it
|
||||
* assumes that the thread has already been bound to a worker.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be initialized.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_enter_frame_fast(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* __cilkrts_enter_frame_fast_1 is the same as __cilkrts_enter_frame_1,
|
||||
* except it assumes that the thread has already been bound to a worker.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be initialized.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* Call leave_frame before leaving a frame, after sync. This function
|
||||
* returns except in a spawn wrapper where the parent has been stolen.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be left.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_leave_frame(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* Wait for any spawned children of this function to complete before
|
||||
* continuing. This function will only return when the join counter
|
||||
* has gone to 0. Other workers will re-enter the scheduling loop to
|
||||
* attempt to steal additional work.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame that is to be synched.
|
||||
*/
|
||||
CILK_ABI(void) __cilkrts_sync(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* Called when an exception is escaping a spawn * wrapper.
|
||||
* The stack frame's except_data field is the C++ runtime
|
||||
* exception object. If NULL (temporary workaround) the
|
||||
* currently caught exception should be rethrown. If this
|
||||
* function returns normal exit functions must be called;
|
||||
* undo-detach will have been done.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the function that
|
||||
* is raising an exception.
|
||||
*/
|
||||
CILK_ABI_THROWS(void)
|
||||
__cilkrts_return_exception(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* Called to re-raise an exception.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the function that
|
||||
* is raising an exception.
|
||||
*/
|
||||
CILK_ABI_THROWS(void) __cilkrts_rethrow(__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* Called at the beginning of a spawning function to get the worker
|
||||
* that this function is running on. This worker will be used to
|
||||
* initialize the __cilkrts_stack_frame.
|
||||
*
|
||||
* @return The __cilkrts_worker that the function is running on.
|
||||
* @return NULL if this thread is not yet bound to a worker.
|
||||
*/
|
||||
CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker(void);
|
||||
|
||||
/**
|
||||
* Similar to __cilkrts_get_tls_worker, but assumes that TLS has been
|
||||
* initialized.
|
||||
*
|
||||
* @return The __cilkrts_worker that the function is running on.
|
||||
* @return NULL if this thread is not yet bound to a worker.
|
||||
*/
|
||||
CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker_fast(void);
|
||||
|
||||
/**
|
||||
* Binds a thread to the runtime by associating a __cilkrts_worker with
|
||||
* it. Called if __cilkrts_get_tls_worker returns NULL. This function will
|
||||
* initialize the runtime the first time it is called.
|
||||
*
|
||||
* This function is versioned by the ABI version number. The runtime
|
||||
* will export all previous versions. This prevents using an application
|
||||
* built with a newer compiler against an old runtime.
|
||||
*
|
||||
* @return The __cilkrts_worker bound to the thread the function is running
|
||||
* on.
|
||||
*/
|
||||
CILK_ABI(__cilkrts_worker_ptr) __cilkrts_bind_thread_1(void);
|
||||
|
||||
typedef uint32_t cilk32_t; /**< 32-bit unsigned type for cilk_for loop indicies */
|
||||
|
||||
typedef uint64_t cilk64_t; /**< 64-bit unsigned type for cilk_for loop indicies */
|
||||
|
||||
/**
|
||||
* Signature for the lambda function generated for the body of a cilk_for loop
|
||||
* which uses 32-bit indicies
|
||||
*/
|
||||
typedef void (*__cilk_abi_f32_t)(void *data, cilk32_t low, cilk32_t high);
|
||||
|
||||
/**
|
||||
* Signature for the lambda function generated for the body of a cilk_for lop
|
||||
* which uses 64-bit indicies
|
||||
*/
|
||||
typedef void (*__cilk_abi_f64_t)(void *data, cilk64_t low, cilk64_t high);
|
||||
|
||||
/**
|
||||
* @brief cilk_for implementation for 32-bit indexes.
|
||||
*
|
||||
* @param body The lambda function for the body of the cilk_for. The lambda
|
||||
* function will be called to execute each grain of work.
|
||||
* @param data Data passed by the compiler into the lambda function. Provides
|
||||
* access to data outside the cilk_for body.
|
||||
* @param count Number of steps in the loop.
|
||||
* @param grain This parameter allows the compiler to pass a value from a
|
||||
* \#pragam(grainsize) statement to allow the user to control the grainsize. If
|
||||
* there isn't a \#pragma(grainsize) immediately preceeding cilk_for loop, Pass
|
||||
* 0 to specify that the runtime should calculate the grainsize using its own
|
||||
* hueristicts.
|
||||
*/
|
||||
CILK_ABI_THROWS(void) __cilkrts_cilk_for_32(__cilk_abi_f32_t body,
|
||||
void *data,
|
||||
cilk32_t count,
|
||||
int grain);
|
||||
|
||||
/**
|
||||
* @brief cilk_for implementation for 64-bit indexes.
|
||||
*
|
||||
* @copydetails __cilkrts_cilk_for_32
|
||||
*/
|
||||
CILK_ABI_THROWS(void) __cilkrts_cilk_for_64(__cilk_abi_f64_t body,
|
||||
void *data,
|
||||
cilk64_t count,
|
||||
int grain);
|
||||
|
||||
/**
|
||||
* @brief Allocate memory for variable length arrays. If the frame is
|
||||
* sync'd, the memory will be allocated on the stack, otherwise it will
|
||||
* be allocated from the heap.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the function allocating the
|
||||
* memory.
|
||||
* @param size The number of bytes requested.
|
||||
* @param distance_from_sp_to_alloca_area ?.
|
||||
* @param align Alignment required. Always >= minimum stack alignment,
|
||||
* >= ptr_size, and always a power of 2.
|
||||
* @param needs_tag Non-zero if the pointer being returned needs to be
|
||||
* tagged
|
||||
*
|
||||
* @return The address of the memory block allocated.
|
||||
*/
|
||||
|
||||
CILK_ABI(__cilkrts_void_ptr)
|
||||
__cilkrts_stack_alloc(__cilkrts_stack_frame *sf,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align,
|
||||
uint32_t needs_tag);
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated by _cilkrts_stack_alloc() for variable length
|
||||
* arrays.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the function allocating the
|
||||
* memory.
|
||||
* @param p Pointer to the memory block to be freed.
|
||||
* @param size The number of bytes requested.
|
||||
* @param distance_from_sp_to_alloca_area ?.
|
||||
* @param align Alignment required. Always >= minimum stack alignment,
|
||||
* >= ptr_size, and always a power of 2.
|
||||
* @param know_from_stack Non-zero if the pointer is known to have been
|
||||
* allocated on the stack and has no tag.
|
||||
*/
|
||||
CILK_ABI(void)
|
||||
__cilkrts_stack_free(__cilkrts_stack_frame *sf,
|
||||
void *p,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align,
|
||||
uint32_t known_from_stack);
|
||||
|
||||
/**
|
||||
* @brief System-dependent code to save floating point control information
|
||||
* to an ABI 1 or higher @c __cilkrts_stack_frame. If possible (and necessary)
|
||||
* the code to save the floating point control information should be inlined.
|
||||
*
|
||||
* Note that this function does *not* save the current floating point
|
||||
* registers. It saves the floating point control words that control
|
||||
* precision and rounding and stuff like that.
|
||||
*
|
||||
* This function will be a noop for architectures that don't have warts
|
||||
* like the floating point control words, or where the information is
|
||||
* already being saved by the setjmp.
|
||||
*
|
||||
* @param sf @c __cilkrts_stack_frame for the frame we're saving the
|
||||
* floating point control information in.
|
||||
*/
|
||||
CILK_ABI(void)
|
||||
__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
#endif /* include guard */
|
477
libcilkrts/include/internal/cilk_fake.h
Normal file
477
libcilkrts/include/internal/cilk_fake.h
Normal file
|
@ -0,0 +1,477 @@
|
|||
/* cilk_fake.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2011-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk_fake.h
|
||||
*
|
||||
* @brief Macros to simulate a compiled Cilk program.
|
||||
*
|
||||
* Used carefully, these macros can be used to create a Cilk program with a
|
||||
* non-Cilk compiler by manually inserting the code necessary for interacting
|
||||
* with the Cilk runtime library. They are not intended to be pretty (you
|
||||
* wouldn't want to write a whole program using these macros), but they are
|
||||
* useful for experiments. They also work well as an illustration of what the
|
||||
* compiler generates.
|
||||
*
|
||||
* Details of the mechanisms used in these macros are described in
|
||||
* design-notes/CilkPlusABI.docx
|
||||
*
|
||||
* Example 1: fib in C++
|
||||
* ---------------------
|
||||
*
|
||||
* #include <internal/cilk_fake.h>
|
||||
*
|
||||
* int fib(int n)
|
||||
* {
|
||||
* CILK_FAKE_PROLOG();
|
||||
*
|
||||
* if (n < 2)
|
||||
* return n;
|
||||
*
|
||||
* int a, b;
|
||||
* CILK_FAKE_SPAWN_R(a, fib(n - 1));
|
||||
* b = fib(n - 2);
|
||||
* CILK_FAKE_SYNC();
|
||||
*
|
||||
* return a + b;
|
||||
* }
|
||||
*
|
||||
*
|
||||
* Example 2: fib in C
|
||||
* -------------------
|
||||
*
|
||||
* #include <internal/cilk_fake.h>
|
||||
*
|
||||
* int fib(int n);
|
||||
*
|
||||
* void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n)
|
||||
* {
|
||||
* CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf);
|
||||
* *a = fib(n - 1);
|
||||
* CILK_FAKE_SPAWN_HELPER_EPILOG();
|
||||
* }
|
||||
*
|
||||
* int fib(int n)
|
||||
* {
|
||||
* CILK_FAKE_PROLOG();
|
||||
*
|
||||
* if (n < 2)
|
||||
* return n;
|
||||
*
|
||||
* int a, b;
|
||||
* CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n));
|
||||
* b = fib(n - 2);
|
||||
* CILK_FAKE_SYNC();
|
||||
*
|
||||
* CILK_FAKE_EPILOG();
|
||||
* return a + b;
|
||||
* }
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_FAKE_DOT_H
|
||||
#define INCLUDED_CILK_FAKE_DOT_H
|
||||
|
||||
// This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already
|
||||
// defined but is less than 1, then the data structures in <internal/abi.h>
|
||||
// will not match the expectations of facilities in this header. Therefore,
|
||||
// for successful compilation, __CILKRTS_ABI_VERSION must either be not
|
||||
// defined, or defined to be 1 or greater.
|
||||
#ifndef __CILKRTS_ABI_VERSION
|
||||
// ABI version was not specified. Set it to 1.
|
||||
# define __CILKRTS_ABI_VERSION 1
|
||||
#elif __CILKRTS_ABI_VERSION < 1
|
||||
// ABI version was specified but was too old. Fail compilation.
|
||||
# error cilk_fake.h requirs an ABI version of 1 or greater
|
||||
#endif
|
||||
|
||||
#include <internal/abi.h>
|
||||
|
||||
// alloca is defined in malloc.h on Windows, alloca.h on Linux
|
||||
#ifndef _MSC_VER
|
||||
#include <alloca.h>
|
||||
#else
|
||||
#include <malloc.h>
|
||||
// Define offsetof
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
|
||||
// Allows use of a different version that the one defined in abi.h
|
||||
#define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24)
|
||||
|
||||
/* Initialize frame. To be called when worker is known */
|
||||
__CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf,
|
||||
__cilkrts_worker *w)
|
||||
{
|
||||
sf->call_parent = w->current_stack_frame;
|
||||
sf->worker = w;
|
||||
sf->flags = CILK_FAKE_VERSION_FLAG;
|
||||
w->current_stack_frame = sf;
|
||||
}
|
||||
|
||||
/* Initialize frame. To be called when worker is not known */
|
||||
__CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_worker* w = __cilkrts_get_tls_worker();
|
||||
uint32_t last_flag = 0;
|
||||
if (! w) {
|
||||
w = __cilkrts_bind_thread_1();
|
||||
last_flag = CILK_FRAME_LAST;
|
||||
}
|
||||
__cilk_fake_enter_frame_fast(sf, w);
|
||||
sf->flags |= last_flag;
|
||||
}
|
||||
|
||||
/* Initialize frame. To be called within the spawn helper */
|
||||
__CILKRTS_INLINE void __cilk_fake_helper_enter_frame(
|
||||
__cilkrts_stack_frame *sf,
|
||||
__cilkrts_stack_frame *parent_sf)
|
||||
{
|
||||
sf->worker = 0;
|
||||
sf->call_parent = parent_sf;
|
||||
}
|
||||
|
||||
/* Called from the spawn helper to push the parent continuation on the task
|
||||
* deque so that it can be stolen.
|
||||
*/
|
||||
__CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
/* Initialize spawn helper frame.
|
||||
* call_parent was saved in __cilk_fake_helper_enter_frame */
|
||||
__cilkrts_stack_frame *parent = sf->call_parent;
|
||||
__cilkrts_worker *w = parent->worker;
|
||||
__cilk_fake_enter_frame_fast(sf, w);
|
||||
|
||||
/* Append a node to the pedigree */
|
||||
sf->spawn_helper_pedigree = w->pedigree;
|
||||
parent->parent_pedigree = w->pedigree;
|
||||
w->pedigree.rank = 0;
|
||||
w->pedigree.parent = &sf->spawn_helper_pedigree;
|
||||
|
||||
/* Push parent onto the task deque */
|
||||
__cilkrts_stack_frame *volatile *tail = w->tail;
|
||||
*tail++ = sf->call_parent;
|
||||
/* The stores must be separated by a store fence (noop on x86)
|
||||
* or the second store is a release (st8.rel on Itanium) */
|
||||
w->tail = tail;
|
||||
sf->flags |= CILK_FRAME_DETACHED;
|
||||
}
|
||||
|
||||
/* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */
|
||||
static int __cilk_fake_dummy = 8;
|
||||
|
||||
/* The following macro is used to force the compiler into generating a frame
|
||||
* pointer. We never change the value of __cilk_fake_dummy, so the alloca()
|
||||
* is never called, but we need the 'if' statement and the __cilk_fake_dummy
|
||||
* variable so that the compiler does not attempt to optimize it away.
|
||||
*/
|
||||
#define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \
|
||||
if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \
|
||||
(sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \
|
||||
} while (0)
|
||||
|
||||
#ifndef CILK_FAKE_NO_SHRINKWRAP
|
||||
/* "shrink-wrap" optimization enabled. Do not initialize frame on entry,
|
||||
* except to clear worker pointer. Instead, defer initialization until
|
||||
* the first spawn.
|
||||
*/
|
||||
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0))
|
||||
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \
|
||||
if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \
|
||||
} while (0)
|
||||
#else
|
||||
/* "shrink-wrap" optimization disabled. Initialize frame immediately on
|
||||
* entry. Do not initialize frame on spawn.
|
||||
*/
|
||||
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \
|
||||
__cilk_fake_enter_frame(&(sf))
|
||||
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf))
|
||||
#endif
|
||||
|
||||
/* Prologue of a spawning function. Declares and initializes the stack
|
||||
* frame.
|
||||
*/
|
||||
#define CILK_FAKE_PROLOG() \
|
||||
__cilk_fake_stack_frame __cilk_sf; \
|
||||
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
|
||||
CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf)
|
||||
|
||||
/* Prologue of a spawning function where the current worker is already known.
|
||||
* Declares and initializes the stack frame without looking up the worker from
|
||||
* TLS.
|
||||
*/
|
||||
#define CILK_FAKE_PROLOG_FAST(w) \
|
||||
__cilk_fake_stack_frame __cilk_sf; \
|
||||
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
|
||||
__cilk_fake_enter_frame_fast(&__cilk_sf, (w))
|
||||
|
||||
/* Simulate a cilk_sync */
|
||||
#define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf)
|
||||
|
||||
/* Epilog at the end of a spawning function. Does a sync and calls the
|
||||
* runtime for leaving the frame.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
// Epilogue is run automatically by __cilk_fake_stack_frame destructor.
|
||||
# define CILK_FAKE_EPILOG() ((void) __cilk_sf)
|
||||
#else
|
||||
# define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf)
|
||||
#endif // C
|
||||
|
||||
/* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and
|
||||
* __cilk_fake_stack_frame destructor body.
|
||||
*/
|
||||
#define CILK_FAKE_CLEANUP_FRAME(sf) do { \
|
||||
if (! (sf).worker) break; \
|
||||
CILK_FAKE_SYNC_IMP(sf); \
|
||||
CILK_FAKE_POP_FRAME(sf); \
|
||||
if ((sf).flags != CILK_FAKE_VERSION_FLAG) \
|
||||
__cilkrts_leave_frame(&(sf)); \
|
||||
} while (0)
|
||||
|
||||
/* Implementation of CILK_FAKE_SYNC with sf argument */
|
||||
#define CILK_FAKE_SYNC_IMP(sf) do { \
|
||||
if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \
|
||||
(sf).parent_pedigree = (sf).worker->pedigree; \
|
||||
CILK_FAKE_SAVE_FP(sf); \
|
||||
if (! CILK_SETJMP((sf).ctx)) \
|
||||
__cilkrts_sync(&(sf)); \
|
||||
} \
|
||||
++(sf).worker->pedigree.rank; \
|
||||
} while (0)
|
||||
|
||||
/* Save the floating-point control registers.
|
||||
* The definition of CILK_FAKE_SAVE_FP is compiler specific (and
|
||||
* architecture specific on Windows)
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
# define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr)
|
||||
# define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr)
|
||||
# if defined(_M_IX86)
|
||||
/* Windows x86 */
|
||||
# define CILK_FAKE_SAVE_FP(sf) do { \
|
||||
__asm \
|
||||
{ \
|
||||
mov eax, sf \
|
||||
stmxcsr [eax+MXCSR_OFFSET] \
|
||||
fnstcw [eax+FPCSR_OFFSET] \
|
||||
} \
|
||||
} while (0)
|
||||
# elif defined(_M_X64)
|
||||
/* Windows Intel64 - Not needed - saved by setjmp call */
|
||||
# define CILK_FAKE_SAVE_FP(sf) ((void) sf)
|
||||
# else
|
||||
# error "Unknown architecture"
|
||||
# endif /* Microsoft architecture specifics */
|
||||
#else
|
||||
/* Non-Windows */
|
||||
# define CILK_FAKE_SAVE_FP(sf) do { \
|
||||
__asm__ ( "stmxcsr %0\n\t" \
|
||||
"fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/* Call the spawn helper as part of a fake spawn */
|
||||
#define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \
|
||||
CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \
|
||||
CILK_FAKE_SAVE_FP(__cilk_sf); \
|
||||
if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \
|
||||
helper; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Body of a spawn helper function. In addition to the worker and the
|
||||
* expression to spawn, pass it any number of statements to be executed before
|
||||
* detaching.
|
||||
*/
|
||||
#define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \
|
||||
CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \
|
||||
__VA_ARGS__; \
|
||||
__cilk_fake_detach(&__cilk_sf); \
|
||||
expr; \
|
||||
CILK_FAKE_SPAWN_HELPER_EPILOG()
|
||||
|
||||
/* Prolog for a spawn helper function */
|
||||
#define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \
|
||||
__cilk_fake_spawn_helper_stack_frame __cilk_sf; \
|
||||
__cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf))
|
||||
|
||||
/* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG
|
||||
* and the __cilk_fake_spawn_helper_frame destructor.
|
||||
*/
|
||||
#define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \
|
||||
if (! (sf).worker) break; \
|
||||
CILK_FAKE_POP_FRAME(sf); \
|
||||
__cilkrts_leave_frame(&(sf)); \
|
||||
} while (0)
|
||||
|
||||
/* Epilog to execute at the end of a spawn helper */
|
||||
#ifdef __cplusplus
|
||||
// Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor
|
||||
# define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf)
|
||||
#else
|
||||
# define CILK_FAKE_SPAWN_HELPER_EPILOG() \
|
||||
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf)
|
||||
#endif
|
||||
|
||||
/* Pop the current frame off of the call chain */
|
||||
#define CILK_FAKE_POP_FRAME(sf) do { \
|
||||
(sf).worker->current_stack_frame = (sf).call_parent; \
|
||||
(sf).call_parent = 0; \
|
||||
} while (0)
|
||||
|
||||
#ifdef _WIN32
|
||||
/* define macros for synching functions before allowing them to propagate. */
|
||||
# define CILK_FAKE_EXCEPT_BEGIN \
|
||||
if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) {
|
||||
|
||||
# define CILK_FAKE_EXCEPT_END \
|
||||
} else { \
|
||||
assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\
|
||||
== CILK_FRAME_EXCEPTING); \
|
||||
__cilkrts_rethrow(&__cilk_sf); \
|
||||
exit(0); \
|
||||
}
|
||||
#else
|
||||
# define CILK_EXCEPT_BEGIN {
|
||||
# define CILK_EXCEPT_END }
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
// The following definitions depend on C++ features.
|
||||
|
||||
// Wrap a functor (probably a lambda), so that a call to it cannot be
|
||||
// inlined.
|
||||
template <typename F>
|
||||
class __cilk_fake_noinline_wrapper
|
||||
{
|
||||
F&& m_fn;
|
||||
public:
|
||||
__cilk_fake_noinline_wrapper(F&& fn) : m_fn(static_cast<F&&>(fn)) { }
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(noinline) void operator()(__cilkrts_stack_frame *sf);
|
||||
#else
|
||||
void operator()(__cilkrts_stack_frame *sf) __attribute__((noinline));
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void __cilk_fake_noinline_wrapper<F>::operator()(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
m_fn(sf);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline
|
||||
__cilk_fake_noinline_wrapper<F> __cilk_fake_make_noinline_wrapper(F&& fn)
|
||||
{
|
||||
return __cilk_fake_noinline_wrapper<F>(static_cast<F&&>(fn));
|
||||
}
|
||||
|
||||
// Simulate "_Cilk_spawn expr", where expr must be a function call.
|
||||
//
|
||||
// Note: this macro does not correctly construct function arguments.
|
||||
// According to the ABI specification, function arguments should be evaluated
|
||||
// before the detach and destroyed after the detach. This macro both
|
||||
// evaluates and destroys them after the detach. This means that if any part
|
||||
// of the function argument expression depends on a value that is modified in
|
||||
// the continuation of the spawn, race will occur between the continuation and
|
||||
// the argument evaluation.
|
||||
//
|
||||
// To work around this problem, this macro accepts an arbitrary list of
|
||||
// declarations and statements (separated by semicolons) that are evaluated
|
||||
// before the detach. Thus, to simulate:
|
||||
//
|
||||
// _Cilk_spawn f(expr);
|
||||
//
|
||||
// one would write:
|
||||
//
|
||||
// CILK_FAKE_SPAWN(f(arg), auto arg = expr);
|
||||
//
|
||||
// Despite appearing in the reverse order, the 'arg' variable is created and
|
||||
// initialized before the detach and the call to f(arg) occurs after the
|
||||
// detach.
|
||||
#define CILK_FAKE_SPAWN(expr, ...) \
|
||||
CILK_FAKE_CALL_SPAWN_HELPER( \
|
||||
CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf))
|
||||
|
||||
// Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints.
|
||||
#define CILK_FAKE_SPAWN_R(ret, expr, ...) \
|
||||
CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__)
|
||||
|
||||
// Create a spawn helper as a C++11 lambda function. In addition to the
|
||||
// expression to spawn, this macro takes a any number of statements to be
|
||||
// executed before detaching.
|
||||
#define CILK_FAKE_SPAWN_HELPER(expr, ...) \
|
||||
__cilk_fake_make_noinline_wrapper([&](__cilkrts_stack_frame *parent_sf) { \
|
||||
CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \
|
||||
})
|
||||
|
||||
// C++ version of a __cilkrts_stack_frame for a spawning function.
|
||||
// This struct is identical to __cilkrts_stack_frame except that the
|
||||
// destructor automatically does frame cleanup.
|
||||
struct __cilk_fake_stack_frame : __cilkrts_stack_frame
|
||||
{
|
||||
// Extension of __cilkrts_stack_frame with constructor and destructor
|
||||
__cilk_fake_stack_frame() { }
|
||||
__forceinline ~__cilk_fake_stack_frame() {
|
||||
CILK_FAKE_CLEANUP_FRAME(*this);
|
||||
}
|
||||
};
|
||||
|
||||
// C++ version of a __cilkrts_stack_frame for a spawn helper.
|
||||
// This struct is identical to __cilkrts_stack_frame except that the
|
||||
// destructor automatically does frame cleanup.
|
||||
struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame
|
||||
{
|
||||
// Extension of __cilkrts_stack_frame with constructor and destructor
|
||||
__cilk_fake_spawn_helper_stack_frame() { worker = 0; }
|
||||
__forceinline ~__cilk_fake_spawn_helper_stack_frame() {
|
||||
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this);
|
||||
}
|
||||
};
|
||||
#else
|
||||
// For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are
|
||||
// identical to __cilkrts_stack_frame. Frame cleanup must be performed
|
||||
// excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG)
|
||||
typedef __cilkrts_stack_frame __cilk_fake_stack_frame;
|
||||
typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame;
|
||||
#endif
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)
|
47
libcilkrts/include/internal/cilk_version.h
Normal file
47
libcilkrts/include/internal/cilk_version.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
// cilk_version.h
|
||||
//
|
||||
// @copyright
|
||||
// Copyright (C) 2009-2013, Intel Corporation
|
||||
// All rights reserved.
|
||||
//
|
||||
// @copyright
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in
|
||||
// the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Intel Corporation nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// @copyright
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
// DO NOT EDIT THIS FILE!
|
||||
//
|
||||
// It was automatically generated by cilkrts/include/internal/Makefile
|
||||
|
||||
#define VERSION_MAJOR 2
|
||||
#define VERSION_MINOR 0
|
||||
#define VERSION_BUILD 3902
|
||||
#define VERSION_REV 0
|
||||
#define VERSION_STRING "2,0,3902,0"
|
||||
#define VERSION_HASH "b4e38f4f7e3e"
|
||||
#define VERSION_BRANCH "v14.0"
|
||||
#define TBB_REV_NUMBER ""
|
||||
#define VERSION_YEAR "2013"
|
99
libcilkrts/include/internal/metacall.h
Normal file
99
libcilkrts/include/internal/metacall.h
Normal file
|
@ -0,0 +1,99 @@
|
|||
// -*- C++ -*-
|
||||
|
||||
/*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* metacall.h
|
||||
*
|
||||
* This is an internal header file defining part of the metacall
|
||||
* interface used by Cilkscreen. It is not a stable API and is
|
||||
* subject to change without notice.
|
||||
*/
|
||||
|
||||
// Provides the enum of metacall kinds. This is used by Cilkscreen and the
|
||||
// runtime, and will probably be used by any future ptools.
|
||||
|
||||
#pragma once
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum
|
||||
{
|
||||
// Notify Cilkscreen to stop/start instrumenting code
|
||||
HYPER_DISABLE_INSTRUMENTATION = 0,
|
||||
HYPER_ENABLE_INSTRUMENTATION = 1,
|
||||
|
||||
// Write 0 in *(char *)arg if the p-tool is sequential. The Cilk runtime
|
||||
// system invokes this metacall to know whether to spawn worker threads.
|
||||
HYPER_ZERO_IF_SEQUENTIAL_PTOOL = 2,
|
||||
|
||||
// Write 0 in *(char *)arg if the runtime must force reducers to
|
||||
// call the reduce() method even if no actual stealing occurs.
|
||||
HYPER_ZERO_IF_FORCE_REDUCE = 3,
|
||||
|
||||
// Inform cilkscreen about the current stack pointer.
|
||||
HYPER_ESTABLISH_C_STACK = 4,
|
||||
|
||||
// Inform Cilkscreen about the current worker
|
||||
HYPER_ESTABLISH_WORKER = 5,
|
||||
|
||||
// Tell tools to ignore a block of memory. Parameter is a 2 element
|
||||
// array: void *block[2] = {_begin, _end}; _end is 1 beyond the end
|
||||
// of the block to be ignored. Essentially, if p is a pointer to an
|
||||
// array, _begin = &p[0], _end = &p[max]
|
||||
HYPER_IGNORE_MEMORY_BLOCK = 6
|
||||
|
||||
// If you add metacalls here, remember to update BOTH workspan.cpp AND
|
||||
// cilkscreen-common.cpp!
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int tool; // Specifies tool metacall is for
|
||||
// (eg. system=0, cilkscreen=1, cilkview=2).
|
||||
// All tools should understand system codes.
|
||||
// Tools should ignore all other codes, except
|
||||
// their own.
|
||||
|
||||
unsigned int code; // Tool-specific code specifies what to do and how to
|
||||
// interpret data
|
||||
|
||||
void *data;
|
||||
} metacall_data_t;
|
||||
|
||||
#define METACALL_TOOL_SYSTEM 0
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
41
libcilkrts/include/internal/rev.mk
Normal file
41
libcilkrts/include/internal/rev.mk
Normal file
|
@ -0,0 +1,41 @@
|
|||
#########################################################################
|
||||
#
|
||||
# @copyright
|
||||
# Copyright (C) 2011-2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###########################################################################
|
||||
|
||||
# DO NOT EDIT THIS FILE!
|
||||
#
|
||||
# It was automatically generated by cilkrts/include/internal/Makefile
|
||||
|
||||
CILK_REVISION = 3902
|
61
libcilkrts/mk/cilk-version.mk
Normal file
61
libcilkrts/mk/cilk-version.mk
Normal file
|
@ -0,0 +1,61 @@
|
|||
#########################################################################
|
||||
#
|
||||
# @copyright
|
||||
# Copyright (C) 2009-2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###########################################################################
|
||||
# cilk-version.mk
|
||||
#
|
||||
# The one place we look up information from the code management system
|
||||
#
|
||||
# Note that the build number is *only* valid on the build machines
|
||||
|
||||
ifeq ($(wildcard $(TOP)/../.hg),)
|
||||
# If this is the open source release, there is no Mercurial repository,
|
||||
# so set some reasonable defaults.
|
||||
CILK_VERSION_MAJOR := 2
|
||||
CILK_VERSION_MINOR := 0
|
||||
CILK_VERSION_BUILD := 1
|
||||
CILK_VERSION_REV := 0
|
||||
|
||||
CILK_VERSION_HASH := 000000000000
|
||||
CILK_VERSION_BRANCH := oss
|
||||
else
|
||||
CILK_VERSION_MAJOR := 2
|
||||
CILK_VERSION_MINOR := 0
|
||||
CILK_VERSION_BUILD := $(firstword $(subst +, ,$(shell hg id --num)))
|
||||
CILK_VERSION_REV := 0
|
||||
|
||||
CILK_VERSION_HASH := $(firstword $(subst +, ,$(shell hg id --id)))
|
||||
CILK_VERSION_BRANCH := $(shell hg id --branch)
|
||||
endif
|
||||
|
51
libcilkrts/runtime/acknowledgements.dox
Normal file
51
libcilkrts/runtime/acknowledgements.dox
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* acknowledgements.dox
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* This file contains acknowledgements of community contributions to the
|
||||
* Cilk Plus runtime.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @mainpage
|
||||
*
|
||||
* @section Acknowledgements Acknowledgements
|
||||
*
|
||||
* Modifications to build the Cilk Plus runtime for VxWorks provided by
|
||||
* Brian Kuhl of Wind River.
|
||||
*/
|
139
libcilkrts/runtime/bug.cpp
Normal file
139
libcilkrts/runtime/bug.cpp
Normal file
|
@ -0,0 +1,139 @@
|
|||
/* bug.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "bug.h"
|
||||
|
||||
#include <exception>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef _WIN32
|
||||
# include "windows-clean.h"
|
||||
# include "internal/abi.h"
|
||||
# include "cilktools/cilkscreen.h"
|
||||
# include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
COMMON_PORTABLE const char *const __cilkrts_assertion_failed =
|
||||
"%s:%d: cilk assertion failed: %s\n";
|
||||
|
||||
COMMON_PORTABLE void __cilkrts_bug(const char *fmt,...) cilk_nothrow
|
||||
{
|
||||
#if defined (_WIN32) && defined(_DEBUG)
|
||||
_CRTIMP void __cdecl _wassert(__in_z const wchar_t * _Message,
|
||||
__in_z const wchar_t *_File,
|
||||
__in unsigned _Line);
|
||||
char message[256];
|
||||
wchar_t wmessage[256];
|
||||
va_list l;
|
||||
va_start(l, fmt);
|
||||
_vsnprintf_s(message, 256, _TRUNCATE, fmt, l);
|
||||
va_end(l);
|
||||
_snwprintf_s(wmessage, 256, _TRUNCATE, _CRT_WIDE("%S"),
|
||||
message); /* widen */
|
||||
|
||||
// Force asserts to go to stderr and the debugger. This isn't polite, but
|
||||
// we're about to kill the app anyway and it will prevent our tests from
|
||||
// hanging
|
||||
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE| _CRTDBG_MODE_DEBUG);
|
||||
_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
|
||||
|
||||
_wassert(wmessage, _CRT_WIDE(__FILE__), __LINE__);
|
||||
|
||||
// If there's a debugger attached, give it a chance to look at the failure
|
||||
if (IsDebuggerPresent())
|
||||
DebugBreak();
|
||||
|
||||
abort();
|
||||
/* __asm int 3 */
|
||||
#else
|
||||
/* To reduce user confusion, write all user-generated output
|
||||
before the system-generated error message. */
|
||||
va_list l;
|
||||
fflush(NULL);
|
||||
va_start(l, fmt);
|
||||
vfprintf(stderr, fmt, l);
|
||||
va_end(l);
|
||||
fflush(stderr);
|
||||
|
||||
#ifndef _WIN32
|
||||
abort();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
exit(1);
|
||||
}
|
||||
|
||||
COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void)
|
||||
{
|
||||
bool uncaught = std::uncaught_exception();
|
||||
CILK_ASSERT(!uncaught);
|
||||
}
|
||||
|
||||
COMMON_SYSDEP void abort_because_rts_is_corrupted(void)
|
||||
{
|
||||
__cilkrts_bug("The Cilk Plus runtime system detected a corruption "
|
||||
"in its data structures. This is most likely caused "
|
||||
"by an application bug. Aborting execution.\n");
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...)
|
||||
{
|
||||
char message[2048];
|
||||
va_list l;
|
||||
|
||||
// Cilkscreen shouldn't watch this
|
||||
__cilkscreen_disable_checking();
|
||||
|
||||
va_start(l, fmt);
|
||||
_vsnprintf_s(message, 2048, _TRUNCATE, fmt, l);
|
||||
va_end(l);
|
||||
OutputDebugStringA (message);
|
||||
|
||||
// Re-enable Cilkscreen
|
||||
__cilkscreen_enable_checking();
|
||||
}
|
||||
#endif
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
/* End bug.cpp */
|
141
libcilkrts/runtime/bug.h
Normal file
141
libcilkrts/runtime/bug.h
Normal file
|
@ -0,0 +1,141 @@
|
|||
/* bug.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file bug.h
|
||||
*
|
||||
* @brief Support for reporting bugs and debugging.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_BUG_DOT_H
|
||||
#define INCLUDED_BUG_DOT_H
|
||||
|
||||
#include "rts-common.h"
|
||||
#include <cilk/common.h>
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Flush all output, write error message to stderr and abort the execution.
|
||||
* On Windows the error is also written to the debugger.
|
||||
*
|
||||
* @param fmt printf-style format string. Any remaining parameters will be
|
||||
* be interpreted based on the format string text.
|
||||
*/
|
||||
COMMON_PORTABLE NORETURN __cilkrts_bug(const char *fmt,...) cilk_nothrow;
|
||||
|
||||
#ifndef CILK_ASSERT
|
||||
|
||||
/** Standard text for failed assertion */
|
||||
COMMON_PORTABLE extern const char *const __cilkrts_assertion_failed;
|
||||
|
||||
/**
|
||||
* Macro to assert an invariant that must be true. If the statement evalutes
|
||||
* to false, __cilkrts_bug will be called to report the failure and terminate
|
||||
* the application.
|
||||
*/
|
||||
#define CILK_ASSERT(ex) \
|
||||
(__builtin_expect((ex) != 0, 1) ? (void)0 : \
|
||||
__cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, #ex))
|
||||
|
||||
#define CILK_ASSERT_MSG(ex, msg) \
|
||||
(__builtin_expect((ex) != 0, 1) ? (void)0 : \
|
||||
__cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, \
|
||||
#ex "\n " msg))
|
||||
#endif // CILK_ASSERT
|
||||
|
||||
/**
|
||||
* Assert that there is no uncaught exception.
|
||||
*
|
||||
* Not valid on Windows or Android.
|
||||
*
|
||||
* On Android, calling std::uncaught_exception with the stlport library causes
|
||||
* a seg fault. Since we're not supporting exceptions there at this point,
|
||||
* just don't do the check. It works with the GNU STL library, but that's
|
||||
* GPL V3 licensed.
|
||||
*/
|
||||
COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void);
|
||||
#if defined(_WIN32) || defined(ANDROID)
|
||||
# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION()
|
||||
#else
|
||||
# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION() \
|
||||
cilkbug_assert_no_uncaught_exception()
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Call __cilkrts_bug with a standard message that the runtime state is
|
||||
* corrupted and the application is being terminated.
|
||||
*/
|
||||
COMMON_SYSDEP void abort_because_rts_is_corrupted(void);
|
||||
|
||||
// Debugging aids
|
||||
#ifndef _DEBUG
|
||||
# define DBGPRINTF(_fmt, ...)
|
||||
#elif defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Write debugging output. On windows this is written to the debugger.
|
||||
*
|
||||
* @param fmt printf-style format string. Any remaining parameters will be
|
||||
* be interpreted based on the format string text.
|
||||
*/
|
||||
COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...) cilk_nothrow;
|
||||
|
||||
/**
|
||||
* Macro to write debugging output which will be elided if this is not a
|
||||
* debug build. The macro is currently always elided on non-Windows builds.
|
||||
*
|
||||
* @param _fmt printf-style format string. Any remaining parameters will be
|
||||
* be interpreted based on the format string text.
|
||||
*/
|
||||
# define DBGPRINTF(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__)
|
||||
|
||||
#else /* if _DEBUG && !_WIN32 */
|
||||
/* Non-Windows debug logging. Someday we should make GetCurrentFiber()
|
||||
* and GetWorkerFiber() do something.
|
||||
*/
|
||||
# include <stdio.h>
|
||||
__CILKRTS_INLINE void* GetCurrentFiber() { return 0; }
|
||||
__CILKRTS_INLINE void* GetWorkerFiber(__cilkrts_worker* w) { return 0; }
|
||||
# define DBGPRINTF(_fmt, ...) fprintf(stderr, _fmt, __VA_ARGS__)
|
||||
#endif // _DEBUG
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_BUG_DOT_H)
|
57
libcilkrts/runtime/c_reducers.c
Normal file
57
libcilkrts/runtime/c_reducers.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
/* c_reducers.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/* Implementation of C reducers */
|
||||
|
||||
// Disable warning about integer conversions losing significant bits.
|
||||
// The code is correct as is.
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning(disable:2259)
|
||||
#endif
|
||||
|
||||
#define CILK_C_DEFINE_REDUCERS
|
||||
|
||||
#include <cilk/reducer_opadd.h>
|
||||
#include <cilk/reducer_opand.h>
|
||||
#include <cilk/reducer_opmul.h>
|
||||
#include <cilk/reducer_opor.h>
|
||||
#include <cilk/reducer_opxor.h>
|
||||
#include <cilk/reducer_min_max.h>
|
||||
|
||||
/* End reducer_opadd.c */
|
406
libcilkrts/runtime/cilk-abi-cilk-for.cpp
Normal file
406
libcilkrts/runtime/cilk-abi-cilk-for.cpp
Normal file
|
@ -0,0 +1,406 @@
|
|||
/* cilk-abi-cilk-for.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2011, 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/* Implementation of cilk_for ABI.
|
||||
*
|
||||
* This file must be C++, not C, in order to handle C++ exceptions correctly
|
||||
* from within the body of the cilk_for loop
|
||||
*/
|
||||
|
||||
#include "internal/abi.h"
|
||||
#include "metacall_impl.h"
|
||||
#include "global_state.h"
|
||||
|
||||
// Icky macros to determine if we're compiled with optimization. Based on
|
||||
// the declaration of __CILKRTS_ASSERT in common.h
|
||||
#if defined(_WIN32)
|
||||
# if defined (_DEBUG)
|
||||
# define CILKRTS_OPTIMIZED 0 // Assumes /MDd is always used with /Od
|
||||
# else
|
||||
# define CILKRTS_OPTIMIZED 1
|
||||
# endif // defined(_DEBUG)
|
||||
#else
|
||||
# if defined(__OPTIMIZE__)
|
||||
# define CILKRTS_OPTIMIZED 1
|
||||
# else
|
||||
# define CILKRTS_OPTIMIZED 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
template <typename count_t>
|
||||
static inline int grainsize(int req, count_t count)
|
||||
{
|
||||
// A positive requested grain size comes from the user. A very high grain
|
||||
// size risks losing parallelism, but the user told us what they want for
|
||||
// grainsize. Who are we to argue?
|
||||
if (req > 0)
|
||||
return req;
|
||||
|
||||
// At present, a negative requested grain size is treated the same way as
|
||||
// a zero grain size, i.e., the runtime computes the actual grainsize
|
||||
// using a hueristic. In the future, the compiler may give us additional
|
||||
// information about the size of the cilk_for body by passing a negative
|
||||
// grain size.
|
||||
|
||||
// Avoid generating a zero grainsize, even for empty loops.
|
||||
if (count < 1)
|
||||
return 1;
|
||||
|
||||
global_state_t* g = cilkg_get_global_state();
|
||||
if (g->under_ptool)
|
||||
{
|
||||
// Grainsize = 1, when running under PIN, and when the grainsize has
|
||||
// not explicitly been set by the user.
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Divide loop count by 8 times the worker count and round up.
|
||||
const int Px8 = g->P * 8;
|
||||
count_t n = (count + Px8 - 1) / Px8;
|
||||
|
||||
// 2K should be enough to amortize the cost of the cilk_for. Any
|
||||
// larger grainsize risks losing parallelism.
|
||||
if (n > 2048)
|
||||
return 2048;
|
||||
return (int) n; // n <= 2048, so no loss of precision on cast to int
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* call_cilk_for_loop_body
|
||||
*
|
||||
* Centralizes the code to call the loop body. The compiler should be
|
||||
* inlining this code
|
||||
*
|
||||
* low - Low loop index we're considering in this portion of the algorithm
|
||||
* high - High loop index we're considering in this portion of the algorithm
|
||||
* body - lambda function for the cilk_for loop body
|
||||
* data - data used by the lambda function
|
||||
* w - __cilkrts_worker we're currently executing on
|
||||
* loop_root_pedigree - __cilkrts_pedigree node we generated for the root of
|
||||
* the cilk_for loop to flatten out the internal nodes
|
||||
*/
|
||||
template <typename count_t, typename F>
|
||||
inline static
|
||||
void call_cilk_for_loop_body(count_t low, count_t high,
|
||||
F body, void *data,
|
||||
__cilkrts_worker *w,
|
||||
__cilkrts_pedigree *loop_root_pedigree)
|
||||
{
|
||||
// Cilkscreen should not report this call in a stack trace
|
||||
NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
|
||||
|
||||
// The worker is only valid until the first spawn. Fetch the
|
||||
// __cilkrts_stack_frame out of the worker, since it will be stable across
|
||||
// steals. The sf pointer actually points to the *parent's*
|
||||
// __cilkrts_stack_frame, since this function is a non-spawning function
|
||||
// and therefore has no cilk stack frame of its own.
|
||||
__cilkrts_stack_frame *sf = w->current_stack_frame;
|
||||
|
||||
// Save the pedigree node pointed to by the worker. We'll need to restore
|
||||
// that when we exit since the spawn helpers in the cilk_for call tree
|
||||
// will assume that it's valid
|
||||
const __cilkrts_pedigree *saved_next_pedigree_node = w->pedigree.parent;
|
||||
|
||||
// Add the leaf pedigree node to the chain. The parent is the root node
|
||||
// to flatten the tree regardless of the DAG branches in the cilk_for
|
||||
// divide-and-conquer recursion.
|
||||
//
|
||||
// The rank is initialized to the low index. The user is
|
||||
// expected to call __cilkrts_bump_loop_rank at the end of the cilk_for
|
||||
// loop body.
|
||||
__cilkrts_pedigree loop_leaf_pedigree;
|
||||
|
||||
loop_leaf_pedigree.rank = (uint64_t)low;
|
||||
loop_leaf_pedigree.parent = loop_root_pedigree;
|
||||
|
||||
// The worker's pedigree always starts with a rank of 0
|
||||
w->pedigree.rank = 0;
|
||||
w->pedigree.parent = &loop_leaf_pedigree;
|
||||
|
||||
// Call the compiler generated cilk_for loop body lambda function
|
||||
body(data, low, high);
|
||||
|
||||
// The loop body may have included spawns, so we must refetch the worker
|
||||
// from the __cilkrts_stack_frame, which is stable regardless of which
|
||||
// worker we're executing on.
|
||||
w = sf->worker;
|
||||
|
||||
// Restore the pedigree chain. It must be valid because the spawn helpers
|
||||
// generated by the cilk_for implementation will access it.
|
||||
w->pedigree.parent = saved_next_pedigree_node;
|
||||
}
|
||||
|
||||
/* capture_spawn_arg_stack_frame
|
||||
*
|
||||
* Efficiently get the address of the caller's __cilkrts_stack_frame. The
|
||||
* preconditons are that 'w' is the worker at the time of the call and
|
||||
* 'w->current_stack_frame' points to the __cilkrts_stack_frame within the
|
||||
* spawn helper. This function should be called only within the argument list
|
||||
* of a function that is being spawned because that is the only situation in
|
||||
* which these preconditions hold. This function returns the worker
|
||||
* (unchanged) after storing the captured stack frame pointer is stored in the
|
||||
* sf argument.
|
||||
*
|
||||
* The purpose of this function is to get the caller's stack frame in a
|
||||
* context where the caller's worker is known but its stack frame is not
|
||||
* necessarily initialized. The "shrink wrap" optimization delays
|
||||
* initializing the contents of a spawning function's '__cilkrts_stack_frame'
|
||||
* as well as the 'current_stack_frame' pointer within the worker. By calling
|
||||
* this function within a spawning function's argument list, we can ensure
|
||||
* that these initializations have occured but that a detach (which would
|
||||
* invalidate the worker pointer in the caller) has not yet occured. Once the
|
||||
* '__cilkrts_stack_frame' has been retrieved in this way, it is stable for the
|
||||
* remainder of the caller's execution, and becomes an efficient way to get
|
||||
* the worker (much more efficient than calling '__cilkrts_get_tls_worker()'),
|
||||
* even after a spawn or sync.
|
||||
*/
|
||||
inline __cilkrts_worker*
|
||||
capture_spawn_arg_stack_frame(__cilkrts_stack_frame* &sf, __cilkrts_worker* w)
|
||||
{
|
||||
// Get current stack frame
|
||||
sf = w->current_stack_frame;
|
||||
#ifdef __INTEL_COMPILER
|
||||
# if __INTEL_COMPILER <= 1300 && __INTEL_COMPILER_BUILD_DATE < 20130101
|
||||
// In older compilers 'w->current_stack_frame' points to the
|
||||
// spawn-helper's stack frame. In newer compiler's however, it points
|
||||
// directly to the pointer's stack frame. (This change was made to avoid
|
||||
// having the spawn helper in the frame list when evaluating function
|
||||
// arguments, thus avoiding corruption when those arguments themselves
|
||||
// contain cilk_spawns.)
|
||||
|
||||
// w->current_stack_frame is the spawn helper's stack frame.
|
||||
// w->current_stack_frame->call_parent is the caller's stack frame.
|
||||
sf = sf->call_parent;
|
||||
# endif
|
||||
#endif
|
||||
return w;
|
||||
}
|
||||
|
||||
/*
|
||||
* cilk_for_recursive
|
||||
*
|
||||
* Templatized function to implement the recursive divide-and-conquer
|
||||
* algorithm that's how we implement a cilk_for.
|
||||
*
|
||||
* low - Low loop index we're considering in this portion of the algorithm
|
||||
* high - High loop index we're considering in this portion of the algorithm
|
||||
* body - lambda function for the cilk_for loop body
|
||||
* data - data used by the lambda function
|
||||
* grain - grain size (0 if it should be computed)
|
||||
* w - __cilkrts_worker we're currently executing on
|
||||
* loop_root_pedigree - __cilkrts_pedigree node we generated for the root of
|
||||
* the cilk_for loop to flatten out the internal nodes
|
||||
*/
|
||||
template <typename count_t, typename F>
|
||||
static
|
||||
void cilk_for_recursive(count_t low, count_t high,
|
||||
F body, void *data, int grain,
|
||||
__cilkrts_worker *w,
|
||||
__cilkrts_pedigree *loop_root_pedigree)
|
||||
{
|
||||
tail_recurse:
|
||||
// Cilkscreen should not report this call in a stack trace
|
||||
// This needs to be done everytime the worker resumes
|
||||
NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
|
||||
|
||||
count_t count = high - low;
|
||||
// Invariant: count > 0, grain >= 1
|
||||
if (count > grain)
|
||||
{
|
||||
// Invariant: count >= 2
|
||||
count_t mid = low + count / 2;
|
||||
// The worker is valid only until the first spawn and is expensive to
|
||||
// retrieve (using '__cilkrts_get_tls_worker') after the spawn. The
|
||||
// '__cilkrts_stack_frame' is more stable, but isn't initialized until
|
||||
// the first spawn. Thus, we want to grab the address of the
|
||||
// '__cilkrts_stack_frame' after it is initialized but before the
|
||||
// spawn detaches. The only place we can do that is within the
|
||||
// argument list of the spawned function, hence the call to
|
||||
// capture_spawn_arg_stack_frame().
|
||||
__cilkrts_stack_frame *sf;
|
||||
_Cilk_spawn cilk_for_recursive(low, mid, body, data, grain,
|
||||
capture_spawn_arg_stack_frame(sf, w),
|
||||
loop_root_pedigree);
|
||||
w = sf->worker;
|
||||
low = mid;
|
||||
|
||||
goto tail_recurse;
|
||||
}
|
||||
|
||||
// Call the cilk_for loop body lambda function passed in by the compiler to
|
||||
// execute one grain
|
||||
call_cilk_for_loop_body(low, high, body, data, w, loop_root_pedigree);
|
||||
}
|
||||
|
||||
static void noop() { }
|
||||
|
||||
/*
|
||||
* cilk_for_root
|
||||
*
|
||||
* Templatized function to implement the top level of a cilk_for loop.
|
||||
*
|
||||
* body - lambda function for the cilk_for loop body
|
||||
* data - data used by the lambda function
|
||||
* count - trip count for loop
|
||||
* grain - grain size (0 if it should be computed)
|
||||
*/
|
||||
template <typename count_t, typename F>
|
||||
static void cilk_for_root(F body, void *data, count_t count, int grain)
|
||||
{
|
||||
// Cilkscreen should not report this call in a stack trace
|
||||
NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
|
||||
|
||||
// Pedigree computation:
|
||||
//
|
||||
// If the last pedigree node on entry to the _Cilk_for has value X,
|
||||
// then at the start of each iteration of the loop body, the value of
|
||||
// the last pedigree node should be 0, the value of the second-to-last
|
||||
// node should equal the loop counter, and the value of the
|
||||
// third-to-last node should be X. On return from the _Cilk_for, the
|
||||
// value of the last pedigree should be incremented to X+2. The
|
||||
// pedigree within the loop is thus flattened, such that the depth of
|
||||
// recursion does not affect the results either inside or outside of
|
||||
// the loop. Note that the pedigree after the loop exists is the same
|
||||
// as if a single spawn and sync were executed within this function.
|
||||
|
||||
// TBD: Since the shrink-wrap optimization was turned on in the compiler,
|
||||
// it is not possible to get the current stack frame without actually
|
||||
// forcing a call to bind-thread. This spurious spawn is a temporary
|
||||
// stopgap until the correct intrinsics are added to give us total control
|
||||
// over frame initialization.
|
||||
_Cilk_spawn noop();
|
||||
|
||||
// Fetch the current worker. From that we can get the current stack frame
|
||||
// which will be constant even if we're stolen
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
__cilkrts_stack_frame *sf = w->current_stack_frame;
|
||||
|
||||
// Decrement the rank by one to undo the pedigree change from the
|
||||
// _Cilk_spawn
|
||||
--w->pedigree.rank;
|
||||
|
||||
// Save the current worker pedigree into loop_root_pedigree, which will be
|
||||
// the root node for our flattened pedigree.
|
||||
__cilkrts_pedigree loop_root_pedigree = w->pedigree;
|
||||
|
||||
// Don't splice the loop_root node in yet. It will be done when we
|
||||
// call the loop body lambda function
|
||||
// w->pedigree.rank = 0;
|
||||
// w->pedigree.next = &loop_root_pedigree;
|
||||
|
||||
/* Spawn is necessary at top-level to force runtime to start up.
|
||||
* Runtime must be started in order to call the grainsize() function.
|
||||
*/
|
||||
int gs = grainsize(grain, count);
|
||||
cilk_for_recursive((count_t) 0, count, body, data, gs, w,
|
||||
&loop_root_pedigree);
|
||||
|
||||
// Need to refetch the worker after calling a spawning function.
|
||||
w = sf->worker;
|
||||
|
||||
// Restore the pedigree in the worker.
|
||||
w->pedigree = loop_root_pedigree;
|
||||
|
||||
// Bump the worker pedigree.
|
||||
++w->pedigree.rank;
|
||||
|
||||
// Implicit sync will increment the pedigree leaf rank again, for a total
|
||||
// of two increments. If the noop spawn above is removed, then we'll need
|
||||
// to re-enable the following code:
|
||||
// // If this is an optimized build, then the compiler will have optimized
|
||||
// // out the increment of the worker's pedigree in the implied sync. We
|
||||
// // need to add one to make the pedigree_loop test work correctly.
|
||||
// #if CILKRTS_OPTIMIZED
|
||||
// ++sf->worker->pedigree.rank;
|
||||
// #endif
|
||||
}
|
||||
|
||||
// Use extern "C" to suppress name mangling of __cilkrts_cilk_for_32 and
|
||||
// __cilkrts_cilk_for_64.
|
||||
extern "C" {
|
||||
|
||||
/*
|
||||
* __cilkrts_cilk_for_32
|
||||
*
|
||||
* Implementation of cilk_for for 32-bit trip counts (regardless of processor
|
||||
* word size). Assumes that the range is 0 - count.
|
||||
*
|
||||
* body - lambda function for the cilk_for loop body
|
||||
* data - data used by the lambda function
|
||||
* count - trip count for loop
|
||||
* grain - grain size (0 if it should be computed)
|
||||
*/
|
||||
|
||||
CILK_ABI_THROWS_VOID __cilkrts_cilk_for_32(__cilk_abi_f32_t body, void *data,
|
||||
cilk32_t count, int grain)
|
||||
{
|
||||
// Cilkscreen should not report this call in a stack trace
|
||||
NOTIFY_ZC_INTRINSIC((char *)"cilkscreen_hide_call", 0);
|
||||
|
||||
// Check for an empty range here as an optimization - don't need to do any
|
||||
// __cilkrts_stack_frame initialization
|
||||
if (count > 0)
|
||||
cilk_for_root(body, data, count, grain);
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_cilk_for_64
|
||||
*
|
||||
* Implementation of cilk_for for 64-bit trip counts (regardless of processor
|
||||
* word size). Assumes that the range is 0 - count.
|
||||
*
|
||||
* body - lambda function for the cilk_for loop body
|
||||
* data - data used by the lambda function
|
||||
* count - trip count for loop
|
||||
* grain - grain size (0 if it should be computed)
|
||||
*/
|
||||
CILK_ABI_THROWS_VOID __cilkrts_cilk_for_64(__cilk_abi_f64_t body, void *data,
|
||||
cilk64_t count, int grain)
|
||||
{
|
||||
// Check for an empty range here as an optimization - don't need to do any
|
||||
// __cilkrts_stack_frame initialization
|
||||
if (count > 0)
|
||||
cilk_for_root(body, data, count, grain);
|
||||
}
|
||||
|
||||
} // end extern "C"
|
||||
|
||||
/* End cilk-abi-cilk-for.cpp */
|
83
libcilkrts/runtime/cilk-abi-vla-internal.c
Normal file
83
libcilkrts/runtime/cilk-abi-vla-internal.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* cilk-abi-vla-internal.c -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* These functions are provided in their own compilation unit so I can debug
|
||||
* them. cilk-abi-vla.c must always be compiled with optimization on so that
|
||||
* inlining occurs.
|
||||
*/
|
||||
|
||||
#include "internal/abi.h"
|
||||
#include "cilk-abi-vla-internal.h"
|
||||
#include "bug.h"
|
||||
#include "full_frame.h"
|
||||
#include "local_state.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bug.h"
|
||||
|
||||
void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
|
||||
size_t full_size,
|
||||
uint32_t align)
|
||||
{
|
||||
return malloc(full_size);
|
||||
}
|
||||
|
||||
void vla_internal_heap_free(void *t, size_t size)
|
||||
{
|
||||
free(t);
|
||||
}
|
||||
|
||||
void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
|
||||
size_t full_size)
|
||||
{
|
||||
// The __cilkrts_stack_frame must be initialized
|
||||
CILK_ASSERT(sf->worker);
|
||||
|
||||
#if 1
|
||||
// Add full_size to ff->sync_sp so that when we return, the VLA will no
|
||||
// longer be allocated on the stack
|
||||
__cilkrts_adjust_stack(sf->worker->l->frame_ff, full_size);
|
||||
#else
|
||||
// Inline __cilkrts_adjust_stack for Kevin
|
||||
full_frame *ff = sf->worker->l->frame_ff;
|
||||
ff->sync_sp = ff->sync_sp + full_size;
|
||||
#endif
|
||||
}
|
90
libcilkrts/runtime/cilk-abi-vla-internal.h
Normal file
90
libcilkrts/runtime/cilk-abi-vla-internal.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
/* cilk-abi-vla-internal.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk-abi-vla-internal.h
|
||||
*
|
||||
* @brief Allocation/deallocation function for use with Variable Length
|
||||
* Arrays in spawning functions.
|
||||
*
|
||||
* These should be the only functions in the Cilk runtime allocating memory
|
||||
* from the standard C runtime heap. This memory will be provided to user
|
||||
* code for use in VLAs, when the memory cannot be allocated from the stack.
|
||||
*
|
||||
* While these functions are simply passthroughs to malloc and free at the
|
||||
* moment, once we've got the basics of VLA allocations working we'll make
|
||||
* them do fancier tricks.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Allocate memory from the heap for use by a Variable Length Array in
|
||||
* a spawning function.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the spawning function containing
|
||||
* the VLA.
|
||||
* @param full_size The number of bytes to be allocated, including any tags
|
||||
* needed to identify this as allocated from the heap.
|
||||
* @param align Any alignment necessary for the allocation.
|
||||
*/
|
||||
|
||||
void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
|
||||
size_t full_size,
|
||||
uint32_t align);
|
||||
|
||||
/**
|
||||
* @brief Deallocate memory from the heap used by a Variable Length Array in
|
||||
* a spawning function.
|
||||
*
|
||||
* @param t The address of the memory block to be freed.
|
||||
* @param size The size of the memory block to be freed.
|
||||
*/
|
||||
|
||||
void vla_internal_heap_free(void *t,
|
||||
size_t size);
|
||||
|
||||
/**
|
||||
* @brief Deallocate memory from the original stack. We'll do this by adding
|
||||
* full_size to ff->sync_sp. So after the sync, the Variable Length Array
|
||||
* will no longer be allocated on the stack.
|
||||
*
|
||||
* @param sf The __cilkrts_stack_frame for the spawning function that is
|
||||
* deallocating a VLA.
|
||||
* @param full_size The size of the VLA, including any alignment and tags.
|
||||
*/
|
||||
void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
|
||||
size_t full_size);
|
733
libcilkrts/runtime/cilk-abi.c
Normal file
733
libcilkrts/runtime/cilk-abi.c
Normal file
|
@ -0,0 +1,733 @@
|
|||
/* Cilk_abi.c -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk-abi.c
|
||||
*
|
||||
* @brief cilk-abi.c implements all of the entrypoints to the Intel Cilk
|
||||
* Plus runtime.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Define this macro so that compiliation of this file generates the
|
||||
* non-inlined versions of certain functions in cilk_api.h.
|
||||
*/
|
||||
#include "internal/abi.h"
|
||||
#include "cilk/cilk_api.h"
|
||||
#include "cilk/cilk_undocumented.h"
|
||||
#include "cilktools/cilkscreen.h"
|
||||
|
||||
#include "global_state.h"
|
||||
#include "os.h"
|
||||
#include "os_mutex.h"
|
||||
#include "bug.h"
|
||||
#include "local_state.h"
|
||||
#include "full_frame.h"
|
||||
#include "pedigrees.h"
|
||||
#include "scheduler.h"
|
||||
#include "sysdep.h"
|
||||
#include "except.h"
|
||||
#include "cilk_malloc.h"
|
||||
#include "record-replay.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Some versions of icc don't support limits.h on Linux if
|
||||
gcc 4.3 or newer is installed. */
|
||||
#include <limits.h>
|
||||
|
||||
/* Declare _ReturnAddress compiler intrinsic */
|
||||
void * _ReturnAddress(void);
|
||||
#pragma intrinsic(_ReturnAddress)
|
||||
|
||||
#include "sysdep-win.h" // Needed for sysdep_init_module()
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#include "metacall_impl.h"
|
||||
#include "reducer_impl.h"
|
||||
#include "cilk-ittnotify.h"
|
||||
#include "cilk-tbb-interop.h"
|
||||
|
||||
#define TBB_INTEROP_DATA_DELAYED_UNTIL_BIND (void *)-1
|
||||
|
||||
/**
|
||||
* __cilkrts_bind_thread is a versioned entrypoint. The runtime should be
|
||||
* exporting copies of __cilkrts_bind_version for the current and all previous
|
||||
* versions of the ABI.
|
||||
*
|
||||
* This macro should always be set to generate a version to match the current
|
||||
* version; __CILKRTS_ABI_VERSION.
|
||||
*/
|
||||
#define BIND_THREAD_RTN __cilkrts_bind_thread_1
|
||||
|
||||
static inline
|
||||
void enter_frame_internal(__cilkrts_stack_frame *sf, uint32_t version)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
if (w == 0) { /* slow path */
|
||||
w = BIND_THREAD_RTN();
|
||||
|
||||
sf->flags = CILK_FRAME_LAST | (version << 24);
|
||||
CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == CILK_FRAME_LAST);
|
||||
} else {
|
||||
sf->flags = (version << 24);
|
||||
CILK_ASSERT((sf->flags & CILK_FRAME_FLAGS_MASK) == 0);
|
||||
}
|
||||
sf->call_parent = w->current_stack_frame;
|
||||
sf->worker = w;
|
||||
w->current_stack_frame = sf;
|
||||
}
|
||||
|
||||
CILK_ABI_VOID __cilkrts_enter_frame(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
enter_frame_internal(sf, 0);
|
||||
}
|
||||
|
||||
CILK_ABI_VOID __cilkrts_enter_frame_1(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
enter_frame_internal(sf, 1);
|
||||
sf->reserved = 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
void enter_frame_fast_internal(__cilkrts_stack_frame *sf, uint32_t version)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker_fast();
|
||||
sf->flags = version << 24;
|
||||
sf->call_parent = w->current_stack_frame;
|
||||
sf->worker = w;
|
||||
w->current_stack_frame = sf;
|
||||
}
|
||||
|
||||
CILK_ABI_VOID __cilkrts_enter_frame_fast(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
enter_frame_fast_internal(sf, 0);
|
||||
}
|
||||
|
||||
CILK_ABI_VOID __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
enter_frame_fast_internal(sf, 1);
|
||||
sf->reserved = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* A component of the THE protocol. __cilkrts_undo_detach checks whether
|
||||
* this frame's parent has been stolen. If it hasn't, the frame can return
|
||||
* normally. If the parent has been stolen, of if we suspect it might be,
|
||||
* then __cilkrts_leave_frame() needs to call into the runtime.
|
||||
*
|
||||
* @note __cilkrts_undo_detach() is comparing the exception pointer against
|
||||
* the tail pointer. The exception pointer is modified when another worker
|
||||
* is considering whether it can steal a frame. The head pointer is updated
|
||||
* to match when the worker lock is taken out and the thief is sure that
|
||||
* it can complete the steal. If the steal cannot be completed, the thief
|
||||
* will restore the exception pointer.
|
||||
*
|
||||
* @return true if undo-detach failed.
|
||||
*/
|
||||
static int __cilkrts_undo_detach(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_worker *w = sf->worker;
|
||||
__cilkrts_stack_frame *volatile *t = w->tail;
|
||||
|
||||
/* DBGPRINTF("%d - __cilkrts_undo_detach - sf %p\n", w->self, sf); */
|
||||
|
||||
--t;
|
||||
w->tail = t;
|
||||
/* On x86 the __sync_fetch_and_<op> family includes a
|
||||
full memory barrier. In theory the sequence in the
|
||||
second branch of the #if should be faster, but on
|
||||
most x86 it is not. */
|
||||
#if defined __i386__ || defined __x86_64__
|
||||
__sync_fetch_and_and(&sf->flags, ~CILK_FRAME_DETACHED);
|
||||
#else
|
||||
__cilkrts_fence(); /* membar #StoreLoad */
|
||||
sf->flags &= ~CILK_FRAME_DETACHED;
|
||||
#endif
|
||||
|
||||
return __builtin_expect(t < w->exc, 0);
|
||||
}
|
||||
|
||||
CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_worker *w = sf->worker;
|
||||
|
||||
/* DBGPRINTF("%d-%p __cilkrts_leave_frame - sf %p, flags: %x\n", w->self, GetWorkerFiber(w), sf, sf->flags); */
|
||||
|
||||
#ifdef _WIN32
|
||||
/* if leave frame was called from our unwind handler, leave_frame should
|
||||
proceed no further. */
|
||||
if (sf->flags & CILK_FRAME_UNWINDING)
|
||||
{
|
||||
/* DBGPRINTF("%d - __cilkrts_leave_frame - aborting due to UNWINDING flag\n", w->self); */
|
||||
|
||||
// If this is the frame of a spawn helper (indicated by the
|
||||
// CILK_FRAME_DETACHED flag) we must update the pedigree. The pedigree
|
||||
// points to nodes allocated on the stack. Failing to update it will
|
||||
// result in a accvio/segfault if the pedigree is walked. This must happen
|
||||
// for all spawn helper frames, even if we're processing an exception
|
||||
if ((sf->flags & CILK_FRAME_DETACHED))
|
||||
{
|
||||
update_pedigree_on_leave_frame(w, sf);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
/* ensure the caller popped itself */
|
||||
CILK_ASSERT(w->current_stack_frame != sf);
|
||||
#endif
|
||||
|
||||
/* The exiting function should have checked for zero flags,
|
||||
so there is no check for flags == 0 here. */
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
if (__builtin_expect(sf->flags & (CILK_FRAME_EXITING|CILK_FRAME_UNSYNCHED), 0))
|
||||
__cilkrts_bug("W%u: function exiting with invalid flags %02x\n",
|
||||
w->self, sf->flags);
|
||||
#endif
|
||||
|
||||
/* Must return normally if (1) the active function was called
|
||||
and not spawned, or (2) the parent has never been stolen. */
|
||||
if ((sf->flags & CILK_FRAME_DETACHED)) {
|
||||
/* DBGPRINTF("%d - __cilkrts_leave_frame - CILK_FRAME_DETACHED\n", w->self); */
|
||||
|
||||
#ifndef _WIN32
|
||||
if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
|
||||
// Pedigree will be updated in __cilkrts_leave_frame. We need the
|
||||
// pedigree before the update for record/replay
|
||||
// update_pedigree_on_leave_frame(w, sf);
|
||||
__cilkrts_return_exception(sf);
|
||||
/* If return_exception returns the caller is attached.
|
||||
leave_frame is called from a cleanup (destructor)
|
||||
for the frame object. The caller will reraise the
|
||||
exception. */
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// During replay, check whether w was the last worker to continue
|
||||
replay_wait_for_steal_if_parent_was_stolen(w);
|
||||
|
||||
// Attempt to undo the detach
|
||||
if (__builtin_expect(__cilkrts_undo_detach(sf), 0)) {
|
||||
// The update of pedigree for leaving the frame occurs
|
||||
// inside this call if it does not return.
|
||||
__cilkrts_c_THE_exception_check(w, sf);
|
||||
}
|
||||
|
||||
update_pedigree_on_leave_frame(w, sf);
|
||||
|
||||
/* This path is taken when undo-detach wins the race with stealing.
|
||||
Otherwise this strand terminates and the caller will be resumed
|
||||
via setjmp at sync. */
|
||||
if (__builtin_expect(sf->flags & CILK_FRAME_FLAGS_MASK, 0))
|
||||
__cilkrts_bug("W%u: frame won undo-detach race with flags %02x\n",
|
||||
w->self, sf->flags);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
sf->flags |= CILK_FRAME_EXITING;
|
||||
#endif
|
||||
|
||||
if (__builtin_expect(sf->flags & CILK_FRAME_LAST, 0))
|
||||
__cilkrts_c_return_from_initial(w); /* does return */
|
||||
else if (sf->flags & CILK_FRAME_STOLEN)
|
||||
__cilkrts_return(w); /* does return */
|
||||
|
||||
/* DBGPRINTF("%d-%p __cilkrts_leave_frame - returning, StackBase: %p\n", w->self, GetWorkerFiber(w)); */
|
||||
}
|
||||
|
||||
/* Caller must have called setjmp. */
|
||||
CILK_ABI_VOID __cilkrts_sync(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_worker *w = sf->worker;
|
||||
/* DBGPRINTF("%d-%p __cilkrts_sync - sf %p\n", w->self, GetWorkerFiber(w), sf); */
|
||||
if (__builtin_expect(!(sf->flags & CILK_FRAME_UNSYNCHED), 0))
|
||||
__cilkrts_bug("W%u: double sync %p\n", w->self, sf);
|
||||
#ifndef _WIN32
|
||||
if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
|
||||
__cilkrts_c_sync_except(w, sf);
|
||||
}
|
||||
#endif
|
||||
|
||||
__cilkrts_c_sync(w, sf);
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_get_sf
|
||||
*
|
||||
* Debugging aid to provide access to the current __cilkrts_stack_frame.
|
||||
*
|
||||
* Not documented!
|
||||
*/
|
||||
|
||||
CILK_API_VOID_PTR
|
||||
__cilkrts_get_sf(void)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
if (0 == w)
|
||||
return NULL;
|
||||
|
||||
return w->current_stack_frame;
|
||||
}
|
||||
|
||||
/* Call with global lock held */
|
||||
static __cilkrts_worker *find_free_worker(global_state_t *g)
|
||||
{
|
||||
__cilkrts_worker *w = 0;
|
||||
int i;
|
||||
|
||||
// Scan the non-system workers looking for one which is free so we can
|
||||
// use it.
|
||||
for (i = g->P - 1; i < g->total_workers; ++i) {
|
||||
w = g->workers[i];
|
||||
CILK_ASSERT(WORKER_SYSTEM != w->l->type);
|
||||
if (w->l->type == WORKER_FREE) {
|
||||
w->l->type = WORKER_USER;
|
||||
w->l->team = w;
|
||||
return w;
|
||||
}
|
||||
}
|
||||
|
||||
// If we ran out of workers, create a new one. It doesn't actually belong
|
||||
// to the Cilk global state so nobody will ever try to steal from it.
|
||||
w = (__cilkrts_worker *)__cilkrts_malloc(sizeof(*w));
|
||||
__cilkrts_cilkscreen_ignore_block(w, w+1);
|
||||
make_worker(g, -1, w);
|
||||
w->l->type = WORKER_USER;
|
||||
w->l->team = w;
|
||||
return w;
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_bind_thread
|
||||
*
|
||||
* Exported function to bind a thread to the runtime.
|
||||
*
|
||||
* This function name should always have a trailing suffix for the latest ABI
|
||||
* version. This means that code built with a new compiler will not load
|
||||
* against an old copy of the runtime.
|
||||
*
|
||||
* Symbols for the function called by code compiled with old versions of the
|
||||
* compiler are created in an OS-specific manner:
|
||||
* - On Windows the old symbols are defined in the cilk-exports.def linker
|
||||
* definitions file as aliases of BIND_THREAD_RTN
|
||||
* - On Linux aliased symbols are created for BIND_THREAD_RTN in this file
|
||||
* - On MacOS the alternate entrypoints are implemented and simply call
|
||||
* BIND_THREAD_RTN.
|
||||
*/
|
||||
CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
|
||||
{
|
||||
__cilkrts_worker *w;
|
||||
int start_cilkscreen = 0;
|
||||
#ifdef USE_ITTNOTIFY
|
||||
static int unique_obj;
|
||||
#endif
|
||||
|
||||
// Cannot set this pointer until after __cilkrts_init_internal() call:
|
||||
global_state_t* g;
|
||||
|
||||
ITT_SYNC_CREATE (&unique_obj, "Initialization");
|
||||
ITT_SYNC_PREPARE(&unique_obj);
|
||||
ITT_SYNC_ACQUIRED(&unique_obj);
|
||||
|
||||
|
||||
/* 1: Initialize and start the Cilk runtime */
|
||||
__cilkrts_init_internal(1);
|
||||
|
||||
/*
|
||||
* 2: Choose a worker for this thread (fail if none left). The table of
|
||||
* user workers is protected by the global OS mutex lock.
|
||||
*/
|
||||
g = cilkg_get_global_state();
|
||||
global_os_mutex_lock();
|
||||
if (__builtin_expect(g->work_done, 0))
|
||||
__cilkrts_bug("Attempt to enter Cilk while Cilk is shutting down");
|
||||
w = find_free_worker(g);
|
||||
CILK_ASSERT(w);
|
||||
|
||||
__cilkrts_set_tls_worker(w);
|
||||
__cilkrts_cilkscreen_establish_worker(w);
|
||||
{
|
||||
full_frame *ff = __cilkrts_make_full_frame(w, 0);
|
||||
|
||||
ff->fiber_self = cilk_fiber_allocate_from_thread();
|
||||
CILK_ASSERT(ff->fiber_self);
|
||||
|
||||
cilk_fiber_set_owner(ff->fiber_self, w);
|
||||
cilk_fiber_tbb_interop_use_saved_stack_op_info(ff->fiber_self);
|
||||
|
||||
CILK_ASSERT(ff->join_counter == 0);
|
||||
ff->join_counter = 1;
|
||||
w->l->frame_ff = ff;
|
||||
w->reducer_map = __cilkrts_make_reducer_map(w);
|
||||
__cilkrts_set_leftmost_reducer_map(w->reducer_map, 1);
|
||||
load_pedigree_leaf_into_user_worker(w);
|
||||
}
|
||||
|
||||
// Make sure that the head and tail are reset, and saved_protected_tail
|
||||
// allows all frames to be stolen.
|
||||
//
|
||||
// Note that we must NOT check w->exc, since workers that are trying to
|
||||
// steal from it will be updating w->exc and we don't own the worker lock.
|
||||
// It's not worth taking out the lock just for an assertion.
|
||||
CILK_ASSERT(w->head == w->l->ltq);
|
||||
CILK_ASSERT(w->tail == w->l->ltq);
|
||||
CILK_ASSERT(w->protected_tail == w->ltq_limit);
|
||||
|
||||
// There may have been an old pending exception which was freed when the
|
||||
// exception was caught outside of Cilk
|
||||
w->l->pending_exception = NULL;
|
||||
|
||||
w->reserved = NULL;
|
||||
|
||||
// If we've already created a scheduling fiber for this worker, we'll just
|
||||
// reuse it. If w->self < 0, it means that this is an ad-hoc user worker
|
||||
// not known to the global state. Thus, we need to create a scheduling
|
||||
// stack only if we don't already have one and w->self >= 0.
|
||||
if (NULL == w->l->scheduling_fiber && w->self >= 0)
|
||||
{
|
||||
START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
|
||||
// Create a scheduling fiber for this worker.
|
||||
w->l->scheduling_fiber =
|
||||
cilk_fiber_allocate_from_heap(CILK_SCHEDULING_STACK_SIZE);
|
||||
cilk_fiber_reset_state(w->l->scheduling_fiber,
|
||||
scheduler_fiber_proc_for_user_worker);
|
||||
cilk_fiber_set_owner(w->l->scheduling_fiber, w);
|
||||
} STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
|
||||
}
|
||||
|
||||
// If the scheduling fiber is NULL, we've either exceeded our quota for
|
||||
// fibers or workers or we're out of memory, so we should lose parallelism
|
||||
// by disallowing stealing.
|
||||
if (NULL == w->l->scheduling_fiber)
|
||||
__cilkrts_disallow_stealing(w, NULL);
|
||||
|
||||
start_cilkscreen = (0 == w->g->Q);
|
||||
|
||||
if (w->self != -1) {
|
||||
// w->self != -1, means that w is a normal user worker and must be
|
||||
// accounted for by the global state since other workers can steal from
|
||||
// it.
|
||||
|
||||
// w->self == -1, means that w is an overflow worker and was created on
|
||||
// demand. I.e., it does not need to be accounted for by the global
|
||||
// state.
|
||||
|
||||
__cilkrts_enter_cilk(w->g);
|
||||
}
|
||||
|
||||
global_os_mutex_unlock();
|
||||
|
||||
/* If there's only 1 worker, the counts will be started in
|
||||
* __cilkrts_scheduler */
|
||||
if (g->P > 1)
|
||||
{
|
||||
START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
|
||||
START_INTERVAL(w, INTERVAL_WORKING);
|
||||
}
|
||||
|
||||
ITT_SYNC_RELEASING(&unique_obj);
|
||||
|
||||
/* Turn on Cilkscreen if this is the first worker. This needs to be done
|
||||
* when we are NOT holding the os mutex. */
|
||||
if (start_cilkscreen)
|
||||
__cilkrts_cilkscreen_enable_instrumentation();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
#ifndef _MSC_VER
|
||||
/*
|
||||
* Define old version-specific symbols for binding threads (since they exist in
|
||||
* all Cilk code). These aliases prohibit newly compiled code from loading an
|
||||
* old version of the runtime. We can handle old code with a new runtime, but
|
||||
* new code with an old runtime is verboten!
|
||||
*
|
||||
* For Windows, the aliased symbol is exported in cilk-exports.def.
|
||||
*/
|
||||
#if defined(_DARWIN_C_SOURCE) || defined(__APPLE__)
|
||||
/**
|
||||
* Mac OS X: Unfortunately, Darwin doesn't allow aliasing, so we just make a
|
||||
* call and hope the optimizer does the right thing.
|
||||
*/
|
||||
CILK_ABI_WORKER_PTR __cilkrts_bind_thread (void) {
|
||||
return BIND_THREAD_RTN();
|
||||
}
|
||||
#else
|
||||
|
||||
/**
|
||||
* Macro to convert a parameter to a string. Used on Linux or BSD.
|
||||
*/
|
||||
#define STRINGIFY(x) #x
|
||||
|
||||
/**
|
||||
* Macro to generate an __attribute__ for an aliased name
|
||||
*/
|
||||
#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x))))
|
||||
|
||||
/**
|
||||
* Linux or BSD: Use the alias attribute to make the labels for the versioned
|
||||
* functions point to the same place in the code as the original. Using
|
||||
* the two macros is annoying but required.
|
||||
*/
|
||||
|
||||
CILK_ABI_WORKER_PTR __cilkrts_bind_thread(void)
|
||||
ALIASED_NAME(BIND_THREAD_RTN);
|
||||
|
||||
#endif // defined _DARWIN_C_SOURCE || defined __APPLE__
|
||||
#endif // !defined _MSC_VER
|
||||
|
||||
CILK_API_SIZET
|
||||
__cilkrts_get_stack_size(void) {
|
||||
return cilkg_get_stack_size();
|
||||
}
|
||||
|
||||
// Method for debugging.
|
||||
CILK_API_VOID __cilkrts_dump_stats(void)
|
||||
{
|
||||
// While the stats aren't protected by the global OS mutex, the table
|
||||
// of workers is, so take out the global OS mutex while we're doing this
|
||||
global_os_mutex_lock();
|
||||
if (cilkg_is_published()) {
|
||||
global_state_t *g = cilkg_get_global_state();
|
||||
__cilkrts_dump_stats_to_stderr(g);
|
||||
}
|
||||
else {
|
||||
__cilkrts_bug("Attempting to report Cilk stats before the runtime has started\n");
|
||||
}
|
||||
global_os_mutex_unlock();
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
CILK_ABI_THROWS_VOID __cilkrts_rethrow(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_gcc_rethrow(sf);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __cilkrts_unwatch_stack
|
||||
*
|
||||
* Callback for TBB to tell us they don't want to watch the stack anymore
|
||||
*/
|
||||
|
||||
static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data)
|
||||
{
|
||||
__cilk_tbb_stack_op_thunk o;
|
||||
|
||||
// If the cilk_fiber wasn't available fetch it now
|
||||
if (TBB_INTEROP_DATA_DELAYED_UNTIL_BIND == data)
|
||||
{
|
||||
full_frame *ff;
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
if (NULL == w)
|
||||
{
|
||||
// Free any saved stack op information
|
||||
cilk_fiber_tbb_interop_free_stack_op_info();
|
||||
|
||||
return 0; /* Success! */
|
||||
}
|
||||
|
||||
__cilkrts_worker_lock(w);
|
||||
ff = w->l->frame_ff;
|
||||
__cilkrts_frame_lock(w,ff);
|
||||
data = ff->fiber_self;
|
||||
__cilkrts_frame_unlock(w,ff);
|
||||
__cilkrts_worker_unlock(w);
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG /* Debug code */
|
||||
/* Get current stack */
|
||||
full_frame *ff;
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
__cilkrts_worker_lock(w);
|
||||
ff = w->l->frame_ff;
|
||||
__cilkrts_frame_lock(w,ff);
|
||||
CILK_ASSERT (data == ff->fiber_self);
|
||||
__cilkrts_frame_unlock(w,ff);
|
||||
__cilkrts_worker_unlock(w);
|
||||
#endif
|
||||
|
||||
/* Clear the callback information */
|
||||
o.data = NULL;
|
||||
o.routine = NULL;
|
||||
cilk_fiber_set_stack_op((cilk_fiber*)data, o);
|
||||
|
||||
// Note. Do *NOT* free any saved stack information here. If they want to
|
||||
// free the saved stack op information, they'll do it when the thread is
|
||||
// unbound
|
||||
|
||||
return 0; /* Success! */
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_watch_stack
|
||||
*
|
||||
* Called by TBB, defined by Cilk.
|
||||
*
|
||||
* Requests that Cilk invoke the stack op routine when it orphans a stack.
|
||||
* Cilk sets *u to a thunk that TBB should call when it is no longer interested
|
||||
* in watching the stack.
|
||||
*/
|
||||
|
||||
CILK_API_TBB_RETCODE
|
||||
__cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
|
||||
__cilk_tbb_stack_op_thunk o)
|
||||
{
|
||||
cilk_fiber* current_fiber;
|
||||
__cilkrts_worker *w;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// This may be called by TBB *before* the OS has given us our
|
||||
// initialization call. Make sure the module is initialized.
|
||||
sysdep_init_module();
|
||||
#endif
|
||||
|
||||
// Fetch the __cilkrts_worker bound to this thread
|
||||
w = __cilkrts_get_tls_worker();
|
||||
if (NULL == w)
|
||||
{
|
||||
// Save data for later. We'll deal with it when/if this thread binds
|
||||
// to the runtime
|
||||
cilk_fiber_tbb_interop_save_stack_op_info(o);
|
||||
|
||||
u->routine = __cilkrts_unwatch_stack;
|
||||
u->data = TBB_INTEROP_DATA_DELAYED_UNTIL_BIND;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get current stack */
|
||||
__cilkrts_worker_lock(w);
|
||||
current_fiber = w->l->frame_ff->fiber_self;
|
||||
__cilkrts_worker_unlock(w);
|
||||
|
||||
/* CILK_ASSERT( !sd->stack_op_data ); */
|
||||
/* CILK_ASSERT( !sd->stack_op_routine ); */
|
||||
|
||||
/* Give TBB our callback */
|
||||
u->routine = __cilkrts_unwatch_stack;
|
||||
u->data = current_fiber;
|
||||
/* Save the callback information */
|
||||
cilk_fiber_set_stack_op(current_fiber, o);
|
||||
|
||||
return 0; /* Success! */
|
||||
}
|
||||
|
||||
|
||||
// This function must be called only within a continuation, within the stack
|
||||
// frame of the continuation itself.
|
||||
CILK_API_INT __cilkrts_synched(void)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
|
||||
// If we don't have a worker, then we're synched by definition :o)
|
||||
if (NULL == w)
|
||||
return 1;
|
||||
|
||||
// Check to see if we are in a stolen continuation. If not, then
|
||||
// we are synched.
|
||||
uint32_t flags = w->current_stack_frame->flags;
|
||||
if (0 == (flags & CILK_FRAME_UNSYNCHED))
|
||||
return 1;
|
||||
|
||||
// We are in a stolen continutation, but the join counter might have been
|
||||
// decremented to one, making us synched again. Get the full frame so
|
||||
// that we can check the join counter. ASSUME: frame_ff is stable (can be
|
||||
// read without a lock) in a stolen continuation -- it can't be stolen
|
||||
// while it's currently executing.
|
||||
full_frame *ff = w->l->frame_ff;
|
||||
|
||||
// Make sure we have a full frame
|
||||
// TBD: Don't think that we should ever not have a full frame here.
|
||||
// CILK_ASSERT(NULL != ff); ?
|
||||
if (NULL == ff)
|
||||
return 1;
|
||||
|
||||
// We're synched if there are no outstanding children at this instant in
|
||||
// time. Note that this is a known race, but it's ok since we're only
|
||||
// reading. We can get false negatives, but not false positives. (I.e.,
|
||||
// we can read a non-one join_counter just before it goes to one, but the
|
||||
// join_counter cannot go from one to greater than one while we're
|
||||
// reading.)
|
||||
return 1 == ff->join_counter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
CILK_API_INT
|
||||
__cilkrts_bump_loop_rank_internal(__cilkrts_worker* w)
|
||||
{
|
||||
// If we don't have a worker, then the runtime is not bound to this
|
||||
// thread and there is no rank to increment
|
||||
if (NULL == w)
|
||||
return -1;
|
||||
|
||||
// We're at the start of the loop body. Advance the cilk_for loop
|
||||
// body pedigree by following the parent link and updating its
|
||||
// rank.
|
||||
|
||||
// Normally, we'd just write "w->pedigree.parent->rank++"
|
||||
// But we need to cast away the "const".
|
||||
((__cilkrts_pedigree*) w->pedigree.parent)->rank++;
|
||||
|
||||
// Zero the worker's pedigree rank since this is the start of a new
|
||||
// pedigree domain.
|
||||
w->pedigree.rank = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
CILK_ABI_VOID
|
||||
__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
// Pass call onto OS/architecture dependent function
|
||||
sysdep_save_fp_ctrl_state(sf);
|
||||
}
|
||||
|
||||
/* end cilk-abi.c */
|
100
libcilkrts/runtime/cilk-ittnotify.h
Normal file
100
libcilkrts/runtime/cilk-ittnotify.h
Normal file
|
@ -0,0 +1,100 @@
|
|||
/* cilk-ittnotify.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_CILK_ITTNOTIFY_DOT_H
|
||||
#define INCLUDED_CILK_ITTNOTIFY_DOT_H
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
|
||||
// ITTNOTIFY does not support ARM at this time
|
||||
#ifdef __arm__
|
||||
#undef USE_ITTNOTIFY
|
||||
#endif
|
||||
|
||||
#ifdef USE_ITTNOTIFY
|
||||
#include <ittnotify.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define ITT_SYNC_CREATE(_address, _description) \
|
||||
__itt_sync_createA(_address, \
|
||||
"Intel Cilk Plus " _description, \
|
||||
"", \
|
||||
__itt_attr_barrier)
|
||||
#else
|
||||
# define ITT_SYNC_CREATE(_address, _description) \
|
||||
__itt_sync_create(_address, \
|
||||
"Intel Cilk Plus " _description, \
|
||||
"", \
|
||||
__itt_attr_barrier)
|
||||
#endif
|
||||
|
||||
#define ITT_SYNC_PREPARE(_address) __itt_sync_prepare(_address)
|
||||
#define ITT_SYNC_ACQUIRED(_address) __itt_sync_acquired(_address)
|
||||
#define ITT_SYNC_RELEASING(_address) __itt_sync_releasing(_address)
|
||||
#define ITT_SYNC_DESTROY(_address) __itt_sync_destroy(_address)
|
||||
// Note that we subtract 5 from the return address to find the CALL instruction
|
||||
// to __cilkrts_sync
|
||||
#if 1 // Disable renaming for now. Piersol isn't ready yet
|
||||
#define ITT_SYNC_SET_NAME_AND_PREPARE(_address, _sync_ret_address) __itt_sync_prepare(_address)
|
||||
#else
|
||||
#define ITT_SYNC_SET_NAME_AND_PREPARE(_address, _sync_ret_address) \
|
||||
if (NULL != __itt_sync_prepare_ptr) { \
|
||||
if (0 == _sync_ret_address) \
|
||||
__itt_sync_renameA(_address, ""); \
|
||||
else \
|
||||
{ \
|
||||
char buf[128]; \
|
||||
sprintf_s(buf, 128, "IP:0x%p", (DWORD_PTR)_sync_ret_address - 5); \
|
||||
__itt_sync_renameA(_address, buf); \
|
||||
_sync_ret_address = 0; \
|
||||
} \
|
||||
__itt_sync_prepare(_address); \
|
||||
}
|
||||
#endif
|
||||
#else // USE_ITTNOTIFY not defined, compile out all calls
|
||||
#define ITT_SYNC_CREATE(_address, _description)
|
||||
#define ITT_SYNC_PREPARE(_address)
|
||||
#define ITT_SYNC_ACQUIRED(_address)
|
||||
#define ITT_SYNC_RELEASING(_addresss)
|
||||
#define ITT_SYNC_DESTROY(_address)
|
||||
#define ITT_SYNC_SET_NAME_AND_PREPARE(_sync_address, _wait_address)
|
||||
#endif
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_ITTNOTIFY_DOT_H)
|
192
libcilkrts/runtime/cilk-tbb-interop.h
Normal file
192
libcilkrts/runtime/cilk-tbb-interop.h
Normal file
|
@ -0,0 +1,192 @@
|
|||
/* cilk-tbb-interop.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk-tbb-interop.h
|
||||
*
|
||||
* @brief Interface between TBB and Cilk to allow TBB to associate it's
|
||||
* per-thread data with Cilk workers, and maintain the association as work
|
||||
* moves between worker threads. This handles the case where TBB calls
|
||||
* into a Cilk function which may later call back to a function making
|
||||
* TBB calls.
|
||||
*
|
||||
* Each thunk structure has two pointers: \"routine\" and \"data\".
|
||||
* The caller of the thunk invokes *routine, passing \"data\" as the void*
|
||||
* parameter.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_TBB_INTEROP_DOT_H
|
||||
#define INCLUDED_CILK_TBB_INTEROP_DOT_H
|
||||
|
||||
#include <cilk/common.h> // for CILK_EXPORT
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** A return code. 0 indicates success. */
|
||||
typedef int __cilk_tbb_retcode;
|
||||
|
||||
/**
|
||||
* Enumeration of reasons that Cilk will call the TBB stack operation
|
||||
* function.
|
||||
*
|
||||
* When a non-empty stack is transfered between threads, the first thread must
|
||||
* orphan it and the second thread must adopt it.
|
||||
*
|
||||
* An empty stack can be transfered similarly, or simply released by the first
|
||||
* thread.
|
||||
*
|
||||
* Here is a summary of the actions as transitions on a state machine.
|
||||
@verbatim
|
||||
watch ORPHAN
|
||||
-->--> -->--
|
||||
/ \ / \
|
||||
(freed empty stack) (TBB sees stack running on thread) (stack in limbo)
|
||||
\ / \ /
|
||||
--<-- --<--
|
||||
RELEASE or ADOPT
|
||||
unwatch
|
||||
@endverbatim
|
||||
*/
|
||||
typedef enum __cilk_tbb_stack_op {
|
||||
/**
|
||||
* Disconnecting stack from a thread.
|
||||
*
|
||||
* The thunk must be invoked on the thread disconnecting itself from the
|
||||
* stack. Must \"happen before\" the stack is adopted elsewhere.
|
||||
*/
|
||||
CILK_TBB_STACK_ORPHAN,
|
||||
|
||||
/**
|
||||
* Reconnecting orphaned stack to a thread.
|
||||
*
|
||||
* The thunk must be invoked on the thread adopting the stack.
|
||||
*/
|
||||
CILK_TBB_STACK_ADOPT,
|
||||
|
||||
/**
|
||||
* Releasing stack.
|
||||
*
|
||||
* The thunk must be invoked on the thread doing the releasing, Must
|
||||
* \"happen before\" the stack is used elsewhere.
|
||||
*/
|
||||
CILK_TBB_STACK_RELEASE
|
||||
} __cilk_tbb_stack_op;
|
||||
|
||||
/**
|
||||
* Function that will be called by the Cilk runtime to inform TBB of a change
|
||||
* in the stack associated with the current thread.
|
||||
*
|
||||
* It does not matter what stack the thunk runs on.
|
||||
* The thread (not fiber) on which the thunk runs is important.
|
||||
*
|
||||
* @param op Enumerated value indicating what type of change is ocurring.
|
||||
* @param data Context value provided by TBB in the __cilkrts_watch_stack
|
||||
* call. This data is opaque to Cilk.
|
||||
*
|
||||
* @return 0 indicates success.
|
||||
*/
|
||||
typedef __cilk_tbb_retcode (*__cilk_tbb_pfn_stack_op)(enum __cilk_tbb_stack_op op,
|
||||
void* data);
|
||||
|
||||
/**
|
||||
* Function that will be called by TBB to inform the Cilk runtime that TBB
|
||||
* is no longer interested in watching the stack bound to the current thread.
|
||||
*
|
||||
* @param data Context value provided to TBB by the __cilkrts_watch_stack
|
||||
* call. This data is opaque to TBB.
|
||||
*
|
||||
* @return 0 indicates success.
|
||||
*/
|
||||
typedef __cilk_tbb_retcode (*__cilk_tbb_pfn_unwatch_stacks)(void *data);
|
||||
|
||||
/**
|
||||
* Thunk invoked by Cilk to call back to TBB to tell it about a change in
|
||||
* the stack bound to the current thread.
|
||||
*/
|
||||
typedef struct __cilk_tbb_stack_op_thunk {
|
||||
/// Function in TBB the Cilk runtime should call when something
|
||||
// "interesting" happens involving a stack
|
||||
__cilk_tbb_pfn_stack_op routine;
|
||||
|
||||
/// TBB context data to pass with the call to the stack_op routine
|
||||
void* data;
|
||||
} __cilk_tbb_stack_op_thunk;
|
||||
|
||||
/**
|
||||
* Thunk invoked by TBB when it is no longer interested in watching the stack
|
||||
* bound to the current thread.
|
||||
*/
|
||||
typedef struct __cilk_tbb_unwatch_thunk {
|
||||
/// Function in Cilk runtime to call when TBB no longer wants to watch
|
||||
// stacks
|
||||
__cilk_tbb_pfn_unwatch_stacks routine;
|
||||
|
||||
/// Cilk runtime context data to pass with the call to the unwatch_stacks
|
||||
/// routine
|
||||
void* data;
|
||||
} __cilk_tbb_unwatch_thunk;
|
||||
|
||||
/**
|
||||
* Requests that Cilk invoke __cilk_tbb_orphan_thunk when it orphans a stack.
|
||||
* Cilk sets *u to a thunk that TBB should call when it is no longer
|
||||
* interested in watching the stack.
|
||||
*
|
||||
* If the thread is not yet bound to the Cilk runtime, the Cilk runtime should
|
||||
* save this data in thread-local storage until __cilkrts_bind_thread is called.
|
||||
*
|
||||
* Called by TBB, defined by Cilk. This function is exported from the Cilk
|
||||
* runtime DLL/shared object. This declaration also appears in
|
||||
* cilk/cilk_undocumented.h -- don't change one declaration without also
|
||||
* changing the other.
|
||||
*
|
||||
* @param u __cilk_tbb_unwatch_thunk. This structure will be filled in by
|
||||
* the Cilk runtime to allow TBB to register that it is no longer interested
|
||||
* in watching the stack bound to the current thread.
|
||||
* @param o __cilk_tbb_stack_op_thunk. This structure specifies the routine
|
||||
* that the Cilk runtime should call when an "interesting" change in the stack
|
||||
* associate with the current worker occurs.
|
||||
*
|
||||
* @return 0 indicates success.
|
||||
*/
|
||||
CILK_EXPORT
|
||||
__cilk_tbb_retcode __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk* u,
|
||||
__cilk_tbb_stack_op_thunk o);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_TBB_INTEROP_DOT_H)
|
255
libcilkrts/runtime/cilk_api.c
Normal file
255
libcilkrts/runtime/cilk_api.c
Normal file
|
@ -0,0 +1,255 @@
|
|||
/* cilk_api.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Implementation of functions declared in cilk_api.h
|
||||
*/
|
||||
|
||||
/*
|
||||
* Define the COMPILING_CILK_ABI_FUNCTIONS macro, so that
|
||||
* compilation of this file generates non-inlined definitions for the
|
||||
* functions marked as CILK_EXPORT_AND_INLINE in cilk_api.h.
|
||||
*
|
||||
* We must deal with these functions differently because we need to
|
||||
* continue to ship nonlined versions of these functions.
|
||||
*
|
||||
* CILK_EXPORT_AND_INLINE int __cilkrts_get_worker_rank(uint64_t *rank);
|
||||
* CILK_EXPORT_AND_INLINE int __cilkrts_bump_worker_rank();
|
||||
* CILK_EXPORT_AND_INLINE int __cilkrts_bump_loop_rank();
|
||||
*/
|
||||
#define COMPILING_CILK_API_FUNCTIONS
|
||||
|
||||
#include <internal/abi.h>
|
||||
#include <cilk/cilk_api.h>
|
||||
|
||||
#include "os.h"
|
||||
#include "os_mutex.h"
|
||||
#include "bug.h"
|
||||
#include "global_state.h"
|
||||
#include "local_state.h"
|
||||
#include "scheduler.h"
|
||||
#include "sysdep.h"
|
||||
|
||||
CILK_API_VOID __cilkrts_init(void)
|
||||
{
|
||||
// Initialize, but don't start, the cilk runtime.
|
||||
__cilkrts_init_internal(0);
|
||||
}
|
||||
|
||||
CILK_API_VOID __cilkrts_end_cilk(void)
|
||||
{
|
||||
// Take out the global OS mutex while we do this to protect against
|
||||
// another thread attempting to bind while we do this
|
||||
global_os_mutex_lock();
|
||||
|
||||
if (cilkg_is_published()) {
|
||||
global_state_t *g = cilkg_get_global_state();
|
||||
if (g->Q || __cilkrts_get_tls_worker())
|
||||
__cilkrts_bug("Attempt to shut down Cilk while Cilk is still "
|
||||
"running");
|
||||
__cilkrts_stop_workers(g);
|
||||
__cilkrts_deinit_internal(g);
|
||||
}
|
||||
|
||||
global_os_mutex_unlock();
|
||||
}
|
||||
|
||||
CILK_API_INT
|
||||
__cilkrts_get_nworkers()
|
||||
{
|
||||
return cilkg_get_nworkers();
|
||||
}
|
||||
|
||||
CILK_API_INT
|
||||
__cilkrts_get_total_workers()
|
||||
{
|
||||
return cilkg_get_total_workers();
|
||||
}
|
||||
|
||||
CILK_API_INT __cilkrts_get_force_reduce(void)
|
||||
{
|
||||
return cilkg_get_force_reduce();
|
||||
}
|
||||
|
||||
CILK_API_INT __cilkrts_set_param(const char* param, const char* value)
|
||||
{
|
||||
return cilkg_set_param(param, value);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
CILK_API_INT __cilkrts_set_param_w(const wchar_t* param, const wchar_t* value)
|
||||
{
|
||||
return cilkg_set_param_w(param, value);
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
/* Return a small integer indicating which Cilk worker the function is
|
||||
* currently running on. Each thread started by the Cilk runtime library
|
||||
* (system worker) has a unique worker number in the range 1..P-1, where P is
|
||||
* the valued returned by __cilkrts_get_nworkers(). All threads started by
|
||||
* the user or by other libraries (user workers) share the worker number 0.
|
||||
* Therefore, the worker number is not unique across multiple user threads.
|
||||
*
|
||||
* Implementor's note: The value returned from this function is different from
|
||||
* the value, w->self, used in most debug messages.
|
||||
*/
|
||||
CILK_API_INT
|
||||
__cilkrts_get_worker_number(void)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
|
||||
if (0 == w)
|
||||
/* A non-worker always has a worker number of zero. */
|
||||
return 0;
|
||||
else if (WORKER_USER == w->l->type)
|
||||
/* User worker was once a non-worker, so its number should still be
|
||||
* zero. */
|
||||
return 0;
|
||||
else
|
||||
/* w->self for a system worker is in range 0..(P-1); adjust to 1..P
|
||||
* to avoid conflicting with the user thread's worker number. */
|
||||
return w->self + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal definition of the pedigree context. The size of the
|
||||
* structure must match __cilkrts_pedigree_context_t defined in abi.i
|
||||
*/
|
||||
typedef struct pedigree_context_t
|
||||
{
|
||||
/** Size of the structure, in bytes */
|
||||
size_t size;
|
||||
|
||||
/** Next __cilkrts_pedigree to return */
|
||||
const __cilkrts_pedigree *pedigree;
|
||||
|
||||
/** Unused. Left over from previous implementation */
|
||||
void *unused1;
|
||||
|
||||
/** Unused. Left over from previous implementation */
|
||||
void *unused2;
|
||||
|
||||
// // Debugging aid for pedigree-test:
|
||||
// __cilkrts_stack_frame *expected_sf;
|
||||
} pedigree_context_t;
|
||||
|
||||
/*
|
||||
* __cilkrts_get_pedigree_info
|
||||
*
|
||||
* Fetch the birthrank for a stack frame. To initialize the walk, both sf_in
|
||||
* and frame_in should be NULL. parent_sf_ptr and parent_frame_ptr provide
|
||||
* context for the stackwalk and should be returned as sf_in and frame_in on
|
||||
* the next call.
|
||||
*
|
||||
* Returns:
|
||||
* 0 - Success - birthrank, parent_sf_out and parent_frame_out are valid
|
||||
* >1 - Pedigree walk completed
|
||||
* <1 - Failure - -1: No worker bound to thread, -2: Sanity check failed
|
||||
*/
|
||||
|
||||
#define PEDIGREE_WALK_COMPLETE (__cilkrts_pedigree *)-1
|
||||
|
||||
CILK_API_INT
|
||||
__cilkrts_get_pedigree_info(__cilkrts_pedigree_context_t *external_context,
|
||||
uint64_t *sf_birthrank)
|
||||
{
|
||||
pedigree_context_t *context = (pedigree_context_t *)external_context;
|
||||
|
||||
CILK_ASSERT(sizeof(__cilkrts_pedigree_context_t) ==
|
||||
sizeof(pedigree_context_t));
|
||||
if (context->size != sizeof(pedigree_context_t))
|
||||
return -3; // Invalid size
|
||||
|
||||
// If the pointer to the last __cilkrts_pedigree is -1, we've
|
||||
// finished the walk. We're still done.
|
||||
if (PEDIGREE_WALK_COMPLETE == context->pedigree)
|
||||
return 1;
|
||||
|
||||
// The passed in context value contains a pointer to the last
|
||||
// __cilkrts_pedigree returned, or NULL if we're starting a
|
||||
// new walk
|
||||
if (NULL == context->pedigree)
|
||||
{
|
||||
__cilkrts_worker *w = __cilkrts_get_tls_worker();
|
||||
__cilkrts_pedigree* pedigree_node;
|
||||
if (NULL != w) {
|
||||
pedigree_node = &w->pedigree;
|
||||
}
|
||||
else {
|
||||
pedigree_node = __cilkrts_get_tls_pedigree_leaf(1);
|
||||
}
|
||||
context->pedigree = pedigree_node->parent;
|
||||
}
|
||||
else
|
||||
context->pedigree = context->pedigree->parent;
|
||||
|
||||
// Note: If we want to omit the user root node,
|
||||
// stop at context->pedigree->parent instead.
|
||||
if (NULL == context->pedigree)
|
||||
{
|
||||
context->pedigree = PEDIGREE_WALK_COMPLETE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
*sf_birthrank = context->pedigree->rank;
|
||||
return 0;
|
||||
}
|
||||
|
||||
CILK_API_PEDIGREE
|
||||
__cilkrts_get_pedigree_internal(__cilkrts_worker *w)
|
||||
{
|
||||
if (NULL != w) {
|
||||
return w->pedigree;
|
||||
}
|
||||
else {
|
||||
const __cilkrts_pedigree *pedigree =
|
||||
__cilkrts_get_tls_pedigree_leaf(1);
|
||||
return *pedigree;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CILK_API_INT __cilkrts_bump_worker_rank_internal(__cilkrts_worker *w)
|
||||
{
|
||||
__cilkrts_pedigree *pedigree;
|
||||
pedigree = (w ? &w->pedigree : __cilkrts_get_tls_pedigree_leaf(1));
|
||||
pedigree->rank++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End cilk_api.c */
|
273
libcilkrts/runtime/cilk_fiber-unix.cpp
Normal file
273
libcilkrts/runtime/cilk_fiber-unix.cpp
Normal file
|
@ -0,0 +1,273 @@
|
|||
/* cilk_fiber-unix.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "cilk_fiber-unix.h"
|
||||
#include "cilk_malloc.h"
|
||||
#include "bug.h"
|
||||
#include "os.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <alloca.h>
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// MAP_ANON is deprecated on Linux, but seems to be required on Mac...
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
// Magic number for sanity checking fiber structure
|
||||
const unsigned magic_number = 0x5afef00d;
|
||||
|
||||
int cilk_fiber_sysdep::s_page_size = getpagesize();
|
||||
|
||||
cilk_fiber_sysdep::cilk_fiber_sysdep(std::size_t stack_size)
|
||||
: cilk_fiber(stack_size)
|
||||
, m_magic(magic_number)
|
||||
{
|
||||
// Set m_stack and m_stack_base.
|
||||
make_stack(stack_size);
|
||||
|
||||
// Get high-address of stack, with 32-bytes of spare space, and rounded
|
||||
// down to the nearest 32-byte boundary.
|
||||
const uintptr_t align_mask = 32 - 1;
|
||||
m_stack_base -= ((std::size_t) m_stack_base) & align_mask;
|
||||
}
|
||||
|
||||
cilk_fiber_sysdep::cilk_fiber_sysdep(from_thread_t)
|
||||
: cilk_fiber()
|
||||
, m_magic(magic_number)
|
||||
{
|
||||
this->set_allocated_from_thread(true);
|
||||
|
||||
// Dummy stack data for thread-main fiber
|
||||
m_stack = NULL;
|
||||
m_stack_base = NULL;
|
||||
}
|
||||
|
||||
void cilk_fiber_sysdep::convert_fiber_back_to_thread()
|
||||
{
|
||||
// Does nothing on Linux.
|
||||
}
|
||||
|
||||
cilk_fiber_sysdep::~cilk_fiber_sysdep()
|
||||
{
|
||||
CILK_ASSERT(magic_number == m_magic);
|
||||
if (!this->is_allocated_from_thread())
|
||||
free_stack();
|
||||
}
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
cilk_fiber_sysdep* cilk_fiber_sysdep::get_current_fiber_sysdep()
|
||||
{
|
||||
return cilkos_get_tls_cilk_fiber();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Jump to resume other fiber. We may or may not come back.
|
||||
inline void cilk_fiber_sysdep::resume_other_sysdep(cilk_fiber_sysdep* other)
|
||||
{
|
||||
if (other->is_resumable()) {
|
||||
other->set_resumable(false);
|
||||
// Resume by longjmp'ing to the place where we suspended.
|
||||
CILK_LONGJMP(other->m_resume_jmpbuf);
|
||||
}
|
||||
else {
|
||||
// Otherwise, we've never ran this fiber before. Start the
|
||||
// proc method.
|
||||
other->run();
|
||||
}
|
||||
}
|
||||
|
||||
void cilk_fiber_sysdep::suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other)
|
||||
{
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
cilkos_set_tls_cilk_fiber(other);
|
||||
#endif
|
||||
CILK_ASSERT(this->is_resumable());
|
||||
|
||||
|
||||
// Jump to the other fiber. We expect to come back.
|
||||
if (! CILK_SETJMP(m_resume_jmpbuf)) {
|
||||
resume_other_sysdep(other);
|
||||
}
|
||||
|
||||
// Return here when another fiber resumes me.
|
||||
// If the fiber that switched to me wants to be deallocated, do it now.
|
||||
do_post_switch_actions();
|
||||
}
|
||||
|
||||
NORETURN cilk_fiber_sysdep::jump_to_resume_other_sysdep(cilk_fiber_sysdep* other)
|
||||
{
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
cilkos_set_tls_cilk_fiber(other);
|
||||
#endif
|
||||
CILK_ASSERT(!this->is_resumable());
|
||||
|
||||
// Jump to the other fiber. But we are never coming back because
|
||||
// this fiber is being reset.
|
||||
resume_other_sysdep(other);
|
||||
|
||||
// We should never come back here...
|
||||
__cilkrts_bug("Should not get here");
|
||||
}
|
||||
|
||||
|
||||
NORETURN cilk_fiber_sysdep::run()
|
||||
{
|
||||
// Only fibers created from a pool have a proc method to run and execute.
|
||||
CILK_ASSERT(m_start_proc);
|
||||
CILK_ASSERT(!this->is_allocated_from_thread());
|
||||
CILK_ASSERT(!this->is_resumable());
|
||||
|
||||
// TBD: This setjmp/longjmp pair simply changes the stack pointer.
|
||||
// We could probably replace this code with some assembly.
|
||||
if (! CILK_SETJMP(m_resume_jmpbuf))
|
||||
{
|
||||
// Calculate the size of the current stack frame (i.e., this
|
||||
// run() function.
|
||||
size_t frame_size = (size_t)JMPBUF_FP(m_resume_jmpbuf) - (size_t)JMPBUF_SP(m_resume_jmpbuf);
|
||||
|
||||
// Macs require 16-byte alignment. Do it always because it just
|
||||
// doesn't matter
|
||||
if (frame_size & (16-1))
|
||||
frame_size += 16 - (frame_size & (16-1));
|
||||
|
||||
// Assert that we are getting a reasonable frame size out of
|
||||
// it. If this run() function is using more than 4096 bytes
|
||||
// of space for its local variables / any state that spills to
|
||||
// registers, something is probably *very* wrong here...
|
||||
//
|
||||
// 4096 bytes just happens to be a number that seems "large
|
||||
// enough" --- for an example GCC 32-bit compilation, the
|
||||
// frame size was 48 bytes.
|
||||
CILK_ASSERT(frame_size < 4096);
|
||||
|
||||
// Change stack pointer to fiber stack. Offset the
|
||||
// calculation by the frame size, so that we've allocated
|
||||
// enough extra space from the top of the stack we are
|
||||
// switching to for any temporaries required for this run()
|
||||
// function.
|
||||
JMPBUF_SP(m_resume_jmpbuf) = m_stack_base - frame_size;
|
||||
CILK_LONGJMP(m_resume_jmpbuf);
|
||||
}
|
||||
|
||||
// Note: our resetting of the stack pointer is valid only if the
|
||||
// compiler has not saved any temporaries onto the stack for this
|
||||
// function before the longjmp that we still care about at this
|
||||
// point.
|
||||
|
||||
// Verify that 1) 'this' is still valid and 2) '*this' has not been
|
||||
// corrupted.
|
||||
CILK_ASSERT(magic_number == m_magic);
|
||||
|
||||
// If the fiber that switched to me wants to be deallocated, do it now.
|
||||
do_post_switch_actions();
|
||||
|
||||
// Now call the user proc on the new stack
|
||||
m_start_proc(this);
|
||||
|
||||
// alloca() to force generation of frame pointer. The argument to alloca
|
||||
// is contrived to prevent the compiler from optimizing it away. This
|
||||
// code should never actually be executed.
|
||||
int* dummy = (int*) alloca((sizeof(int) + (std::size_t) m_start_proc) & 0x1);
|
||||
*dummy = 0xface;
|
||||
|
||||
// User proc should never return.
|
||||
__cilkrts_bug("Should not get here");
|
||||
}
|
||||
|
||||
void cilk_fiber_sysdep::make_stack(size_t stack_size)
|
||||
{
|
||||
char* p;
|
||||
// We've already validated that the stack size is page-aligned and
|
||||
// is a reasonable value. No need to do any extra rounding here.
|
||||
size_t rounded_stack_size = stack_size;
|
||||
|
||||
// Normally, we have already validated that the stack size is
|
||||
// aligned to 4K. In the rare case that pages are huge though, we
|
||||
// need to do some extra checks.
|
||||
if (rounded_stack_size < 3 * (size_t)s_page_size) {
|
||||
// If the specified stack size is too small, round up to 3
|
||||
// pages. We need at least 2 extra for the guard pages.
|
||||
rounded_stack_size = 3 * (size_t)s_page_size;
|
||||
}
|
||||
else {
|
||||
// Otherwise, the stack size is large enough, but might not be
|
||||
// a multiple of page size. Round up to nearest multiple of
|
||||
// s_page_size, just to be safe.
|
||||
size_t remainder = rounded_stack_size % s_page_size;
|
||||
if (remainder) {
|
||||
rounded_stack_size += s_page_size - remainder;
|
||||
}
|
||||
}
|
||||
|
||||
p = (char*)mmap(0, rounded_stack_size,
|
||||
PROT_READ|PROT_WRITE,
|
||||
MAP_PRIVATE|MAP_ANONYMOUS,
|
||||
-1, 0);
|
||||
if (MAP_FAILED == p) {
|
||||
// For whatever reason (probably ran out of memory), mmap() failed.
|
||||
// There is no stack to return, so the program loses parallelism.
|
||||
m_stack = NULL;
|
||||
m_stack_base = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
// mprotect guard pages.
|
||||
mprotect(p + rounded_stack_size - s_page_size, s_page_size, PROT_NONE);
|
||||
mprotect(p, s_page_size, PROT_NONE);
|
||||
|
||||
m_stack = p;
|
||||
m_stack_base = p + rounded_stack_size - s_page_size;
|
||||
}
|
||||
|
||||
|
||||
void cilk_fiber_sysdep::free_stack()
|
||||
{
|
||||
if (m_stack) {
|
||||
size_t rounded_stack_size = m_stack_base - m_stack + s_page_size;
|
||||
if (munmap(m_stack, rounded_stack_size) < 0)
|
||||
__cilkrts_bug("Cilk: stack munmap failed error %d\n", errno);
|
||||
}
|
||||
}
|
||||
|
||||
/* End cilk_fiber-unix.cpp */
|
149
libcilkrts/runtime/cilk_fiber-unix.h
Normal file
149
libcilkrts/runtime/cilk_fiber-unix.h
Normal file
|
@ -0,0 +1,149 @@
|
|||
/* cilk_fiber-unix.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_CILK_FIBER_UNIX_DOT_H
|
||||
#define INCLUDED_CILK_FIBER_UNIX_DOT_H
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error cilk_fiber-unix.h is a C++-only header
|
||||
#endif
|
||||
|
||||
#include "cilk_fiber.h"
|
||||
#include "jmpbuf.h"
|
||||
|
||||
/**
|
||||
* @file cilk_fiber-unix.h
|
||||
*
|
||||
* @brief Unix-specific implementation for cilk_fiber.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Unix-specific fiber class derived from portable fiber class
|
||||
*/
|
||||
struct cilk_fiber_sysdep : public cilk_fiber
|
||||
{
|
||||
public:
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
/**
|
||||
* @brief Gets the current fiber from TLS.
|
||||
*/
|
||||
static cilk_fiber_sysdep* get_current_fiber_sysdep();
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Construct the system-dependent portion of a fiber.
|
||||
*
|
||||
* @param stack_size The size of the stack for this fiber.
|
||||
*/
|
||||
cilk_fiber_sysdep(std::size_t stack_size);
|
||||
|
||||
/**
|
||||
* @brief Construct the system-dependent of a fiber created from a
|
||||
* thread.
|
||||
*/
|
||||
cilk_fiber_sysdep(from_thread_t);
|
||||
|
||||
/**
|
||||
* @brief Destructor
|
||||
*/
|
||||
~cilk_fiber_sysdep();
|
||||
|
||||
/**
|
||||
* @brief OS-specific calls to convert this fiber back to thread.
|
||||
*
|
||||
* Nothing to do for Linux.
|
||||
*/
|
||||
void convert_fiber_back_to_thread();
|
||||
|
||||
/**
|
||||
* @brief System-dependent function to suspend self and resume execution of "other".
|
||||
*
|
||||
* This fiber is suspended.
|
||||
*
|
||||
* @pre @c is_resumable() should be true.
|
||||
*
|
||||
* @param other Fiber to resume.
|
||||
*/
|
||||
void suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other);
|
||||
|
||||
/**
|
||||
* @brief System-dependent function called to jump to @p other
|
||||
* fiber.
|
||||
*
|
||||
* @pre @c is_resumable() should be false.
|
||||
*
|
||||
* @param other Fiber to resume.
|
||||
*/
|
||||
NORETURN jump_to_resume_other_sysdep(cilk_fiber_sysdep* other);
|
||||
|
||||
/**
|
||||
* @brief Runs the start_proc.
|
||||
* @pre is_resumable() should be false.
|
||||
* @pre is_allocated_from_thread() should be false.
|
||||
* @pre m_start_proc must be valid.
|
||||
*/
|
||||
NORETURN run();
|
||||
|
||||
/**
|
||||
* @brief Returns the base of this fiber's stack.
|
||||
*/
|
||||
inline char* get_stack_base_sysdep() { return m_stack_base; }
|
||||
|
||||
private:
|
||||
char* m_stack_base; ///< The base of this fiber's stack.
|
||||
char* m_stack; // Stack memory (low address)
|
||||
__CILK_JUMP_BUFFER m_resume_jmpbuf; // Place to resume fiber
|
||||
unsigned m_magic; // Magic number for checking
|
||||
|
||||
static int s_page_size; // Page size for
|
||||
// stacks.
|
||||
|
||||
// Allocate memory for a stack. This method
|
||||
// initializes m_stack and m_stack_base.
|
||||
void make_stack(size_t stack_size);
|
||||
|
||||
// Deallocates memory for the stack.
|
||||
void free_stack();
|
||||
|
||||
// Common helper method for implementation of resume_other_sysdep
|
||||
// variants.
|
||||
inline void resume_other_sysdep(cilk_fiber_sysdep* other);
|
||||
};
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_FIBER_UNIX_DOT_H)
|
1078
libcilkrts/runtime/cilk_fiber.cpp
Normal file
1078
libcilkrts/runtime/cilk_fiber.cpp
Normal file
File diff suppressed because it is too large
Load diff
882
libcilkrts/runtime/cilk_fiber.h
Normal file
882
libcilkrts/runtime/cilk_fiber.h
Normal file
|
@ -0,0 +1,882 @@
|
|||
/* cilk_fiber.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk_fiber.h
|
||||
*
|
||||
* @brief Abstraction of a "fiber": A coprocess-like stack and auxiliary data
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_FIBER_DOT_H
|
||||
#define INCLUDED_CILK_FIBER_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#ifdef __cplusplus
|
||||
# include <cstddef>
|
||||
#else
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
#include "bug.h"
|
||||
#include "cilk-tbb-interop.h"
|
||||
#include "spin_mutex.h"
|
||||
#include "internal/abi.h" // Define __cilkrts_stack_frame
|
||||
|
||||
/**
|
||||
* @brief Debugging level for Cilk fiber code.
|
||||
*
|
||||
* A value of 0 means no debugging.
|
||||
* Higher values generate more debugging output.
|
||||
*/
|
||||
#define FIBER_DEBUG 0
|
||||
|
||||
/**
|
||||
* @brief Flag for validating reference counts.
|
||||
*
|
||||
* Set to 1 to assert that fiber reference counts are reasonable.
|
||||
*/
|
||||
#define FIBER_CHECK_REF_COUNTS 1
|
||||
|
||||
/**
|
||||
* @brief Flag to determine whether fibers support reference counting.
|
||||
* We require reference counting only on Windows, for exception
|
||||
* processing. Unix does not need reference counting.
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
# define NEED_FIBER_REF_COUNTS 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Flag to enable support for the
|
||||
* cilk_fiber_get_current_fiber() method.
|
||||
*
|
||||
* I'd like this flag to be 0. However, the cilk_fiber test depends
|
||||
* on being able to call this method.
|
||||
*/
|
||||
#if !defined(SUPPORT_GET_CURRENT_FIBER)
|
||||
# define SUPPORT_GET_CURRENT_FIBER 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Switch for enabling "fast path" check for fibers, which
|
||||
* doesn't go to the heap or OS until checking the ancestors first.
|
||||
*
|
||||
* Doing this check seems to make the stress test in
|
||||
* cilk_fiber_pool.t.cpp run faster. But it doesn't seem to make much
|
||||
* difference in other benchmarks, so it is disabled by default.
|
||||
*/
|
||||
#define USE_FIBER_TRY_ALLOCATE_FROM_POOL 0
|
||||
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/// @brief Forward reference to fiber pool.
|
||||
typedef struct cilk_fiber_pool cilk_fiber_pool;
|
||||
|
||||
/** @brief Opaque data structure representing a fiber */
|
||||
typedef struct cilk_fiber cilk_fiber;
|
||||
|
||||
/** @brief Function pointer type for use as a fiber's "main" procedure */
|
||||
typedef void (*cilk_fiber_proc)(cilk_fiber*);
|
||||
|
||||
/** @brief Data structure associated with each fiber. */
|
||||
typedef struct cilk_fiber_data
|
||||
{
|
||||
__STDNS size_t stack_size; /**< Size of stack for fiber */
|
||||
__cilkrts_worker* owner; /**< Worker using this fiber */
|
||||
__cilkrts_stack_frame* resume_sf; /**< Stack frame to resume */
|
||||
__cilk_tbb_pfn_stack_op stack_op_routine; /**< Cilk/TBB interop callback */
|
||||
void* stack_op_data; /**< Data for Cilk/TBB callback */
|
||||
void* client_data; /**< Data managed by client */
|
||||
|
||||
#ifdef _WIN32
|
||||
char *initial_sp; /**< Initalized in fiber_stub */
|
||||
# ifdef _WIN64
|
||||
char *steal_frame_sp; /**< RSP for frame stealing work */
|
||||
// Needed for exception handling so we can
|
||||
// identify when about to unwind off stack
|
||||
# endif
|
||||
#endif
|
||||
|
||||
} cilk_fiber_data;
|
||||
|
||||
/** @brief Pool of cilk_fiber for fiber reuse
|
||||
*
|
||||
* Pools form a hierarchy, with each pool pointing to its parent. When the
|
||||
* pool undeflows, it gets a fiber from its parent. When a pool overflows,
|
||||
* it returns some fibers to its parent. If the root pool underflows, it
|
||||
* allocates and initializes a new fiber from the heap but only if the total
|
||||
* is less than max_size; otherwise, fiber creation fails.
|
||||
*/
|
||||
struct cilk_fiber_pool
|
||||
{
|
||||
spin_mutex* lock; ///< Mutual exclusion for pool operations
|
||||
__STDNS size_t stack_size; ///< Size of stacks for fibers in this pool.
|
||||
cilk_fiber_pool* parent; ///< @brief Parent pool.
|
||||
///< If this pool is empty, get from parent
|
||||
|
||||
// Describes inactive fibers stored in the pool.
|
||||
cilk_fiber** fibers; ///< Array of max_size fiber pointers
|
||||
unsigned max_size; ///< Limit on number of fibers in pool
|
||||
unsigned size; ///< Number of fibers currently in the pool
|
||||
|
||||
// Statistics on active fibers that were allocated from this pool,
|
||||
// but no longer in the pool.
|
||||
int total; ///< @brief Fibers allocated - fiber deallocated from pool
|
||||
///< total may be negative for non-root pools.
|
||||
int high_water; ///< High water mark of total fibers
|
||||
int alloc_max; ///< Limit on number of fibers allocated from the heap/OS
|
||||
};
|
||||
|
||||
/** @brief Initializes a cilk_fiber_pool structure
|
||||
*
|
||||
* @param pool - The address of the pool that is to be initialized
|
||||
* @param parent - The address of this pool's parent, or NULL for root pool
|
||||
* @param stack_size - Size of stacks for fibers allocated from this pool.
|
||||
* @param buffer_size - The maximum number of fibers that may be pooled.
|
||||
* @param alloc_max - Limit on # of fibers this pool can allocate from the heap.
|
||||
* @param is_shared - True if accessing this pool needs a lock, false otherwise.
|
||||
*/
|
||||
void cilk_fiber_pool_init(cilk_fiber_pool* pool,
|
||||
cilk_fiber_pool* parent,
|
||||
size_t stack_size,
|
||||
unsigned buffer_size,
|
||||
int alloc_max,
|
||||
int is_shared);
|
||||
|
||||
/** @brief Sets the maximum number of fibers to allocate from a root pool.
|
||||
*
|
||||
* @param root_pool - A root fiber pool
|
||||
* @param max_fibers_to_allocate - The limit on # of fibers to allocate.
|
||||
*
|
||||
* Sets the maximum number of fibers that can be allocated from this
|
||||
* pool and all its descendants. This pool must be a root pool.
|
||||
*/
|
||||
void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool,
|
||||
unsigned max_fibers_to_allocate);
|
||||
|
||||
/** @brief De-initalizes a cilk_fiber_pool
|
||||
*
|
||||
* @param pool - The address of the pool that is to be destroyed
|
||||
*/
|
||||
void cilk_fiber_pool_destroy(cilk_fiber_pool* pool);
|
||||
|
||||
/** @brief Allocates a new cilk_fiber.
|
||||
*
|
||||
* If the specified pool is empty, this method may choose to either
|
||||
* allocate a fiber from the heap (if pool->total < pool->alloc_max),
|
||||
* or retrieve a fiber from the parent pool.
|
||||
*
|
||||
* @note If a non-null fiber is returned, @c cilk_fiber_reset_state
|
||||
* should be called on this fiber before using it.
|
||||
*
|
||||
* An allocated fiber begins with a reference count of 1.
|
||||
* This method may lock @c pool or one of its ancestors.
|
||||
*
|
||||
* @pre pool should not be NULL.
|
||||
*
|
||||
* @param pool The fiber pool from which to retrieve a fiber.
|
||||
* @return An allocated fiber, or NULL if failed to allocate.
|
||||
*/
|
||||
cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool);
|
||||
|
||||
/** @brief Allocate and initialize a new cilk_fiber using memory from
|
||||
* the heap and/or OS.
|
||||
*
|
||||
* The allocated fiber begins with a reference count of 1.
|
||||
*
|
||||
* @param stack_size The size (in bytes) to be allocated for the fiber's
|
||||
* stack.
|
||||
* @return An initialized fiber. This method should not return NULL
|
||||
* unless some exceptional condition has occurred.
|
||||
*/
|
||||
cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size);
|
||||
|
||||
|
||||
/** @brief Resets an fiber object just allocated from a pool with the
|
||||
* specified proc.
|
||||
*
|
||||
* After this call, cilk_fiber_data object associated with this fiber
|
||||
* is filled with zeros.
|
||||
*
|
||||
* This function can be called only on a fiber that has been allocated
|
||||
* from a pool, but never used.
|
||||
*
|
||||
* @param fiber The fiber to reset and initialize.
|
||||
* @param start_proc The function to run when switching to the fiber. If
|
||||
* null, the fiber can be used with cilk_fiber_run_proc()
|
||||
* but not with cilk_fiber_resume().
|
||||
*/
|
||||
void cilk_fiber_reset_state(cilk_fiber* fiber,
|
||||
cilk_fiber_proc start_proc);
|
||||
|
||||
/** @brief Remove a reference from this fiber, possibly deallocating it.
|
||||
*
|
||||
* This fiber is deallocated only when there are no other references
|
||||
* to it. Deallocation happens either by returning the fiber to the
|
||||
* specified pool, or returning it to the heap.
|
||||
*
|
||||
* A fiber that is currently executing should not remove the last
|
||||
* reference to itself.
|
||||
*
|
||||
* When a fiber is deallocated, destructors are not called for the
|
||||
* objects (if any) still on its stack. The fiber's stack and fiber
|
||||
* data is returned to the stack pool but the client fiber data is not
|
||||
* deallocated.
|
||||
*
|
||||
* If the pool overflows because of a deallocation, then some fibers
|
||||
* will be returned to the parent pool. If the root pool overflows,
|
||||
* then the fiber is returned to the heap.
|
||||
*
|
||||
* @param fiber The Cilk fiber to remove a reference to.
|
||||
* @param pool The fiber pool to which the fiber should be returned. The
|
||||
* caller is assumed to have exclusive access to the pool
|
||||
* either because there is no contention for it or because
|
||||
* its lock has been acquired. If pool is NULL, any
|
||||
* deallocated fiber is destroyed and returned to the
|
||||
* heap.
|
||||
*
|
||||
* @return Final reference count. If the count is 0, the fiber was
|
||||
* returned to a pool or the heap.
|
||||
*/
|
||||
int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool);
|
||||
|
||||
/** @brief Allocates and intializes this thread's main fiber
|
||||
*
|
||||
* Each thread has an "implicit" main fiber that control's the
|
||||
* thread's initial stack. This function makes this fiber visible to
|
||||
* the client and allocates the Cilk-specific aspects of the implicit
|
||||
* fiber. A call to this function must be paired with a call to
|
||||
* cilk_fiber_deallocate_fiber_from_thread()
|
||||
* or a memory leak (or worse) will result.
|
||||
*
|
||||
* A fiber allocated from a thread begins with a reference count of 2.
|
||||
* One is for being allocated, and one is for being active.
|
||||
* (A fiber created from a thread is automatically currently executing.)
|
||||
* The matching calls above each decrement the reference count by 1.
|
||||
*
|
||||
* @return A fiber for the currently executing thread.
|
||||
*/
|
||||
cilk_fiber* cilk_fiber_allocate_from_thread(void);
|
||||
|
||||
/** @brief Remove a fiber created from a thread,
|
||||
* possibly deallocating it.
|
||||
*
|
||||
* Same as cilk_fiber_remove_reference, except that it works on fibers
|
||||
* created via cilk_fiber_allocate_from_thread().
|
||||
*
|
||||
* Fibers created from a thread are never returned to a pool.
|
||||
*
|
||||
* @param fiber The Cilk fiber to remove a reference from.
|
||||
* @return Final reference count. If the count is 0, the fiber was
|
||||
* returned to the heap.
|
||||
*/
|
||||
int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber);
|
||||
|
||||
/** @brief Deallocate a fiber created from a thread,
|
||||
* possibly destroying it.
|
||||
*
|
||||
* This method decrements the reference count of the fiber by 2, and
|
||||
* destroys the fiber struct if the reference count is 0.
|
||||
*
|
||||
* OS-specific cleanup for the fiber executes unconditionally with
|
||||
* this method. The destruction of the actual object, however, does
|
||||
* not occur unless the reference count is 0.
|
||||
*
|
||||
* @param fiber The cilk_fiber to deallocate from a thread.
|
||||
* @return Final reference count. If the count is 0, the fiber was
|
||||
* returned to the heap.
|
||||
*/
|
||||
int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber);
|
||||
|
||||
/** @brief Returns true if this fiber is allocated from a thread.
|
||||
*/
|
||||
int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber);
|
||||
|
||||
|
||||
/** @brief Suspend execution on current fiber resumes other fiber.
|
||||
*
|
||||
* Suspends the current fiber and transfers control to a new fiber. Execution
|
||||
* on the new fiber resumes from the point at which fiber suspended itself to
|
||||
* run a different fiber. If fiber was freshly allocated, then runs the
|
||||
* start_proc function specified at allocation. This function returns when
|
||||
* another fiber resumes the self fiber. Note that the state of the
|
||||
* floating-point control register (i.e., the register that controls rounding
|
||||
* mode, etc.) is valid but indeterminate on return -- different
|
||||
* implementations will have different results.
|
||||
*
|
||||
* When the @c self fiber is resumed, execution proceeds as though
|
||||
* this function call returns.
|
||||
*
|
||||
* This operation increments the reference count of @p other.
|
||||
* This operation decrements the reference count of @p self.
|
||||
*
|
||||
* @param self Fiber to switch from. Must equal current fiber.
|
||||
* @param other Fiber to switch to.
|
||||
*/
|
||||
void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self,
|
||||
cilk_fiber* other);
|
||||
|
||||
/** @brief Removes a reference from the currently executing fiber and
|
||||
* resumes other fiber.
|
||||
*
|
||||
* Removes a reference from @p self and transfer control to @p other
|
||||
* fiber. Execution on @p other resumes from the point at which @p
|
||||
* other suspended itself to run a different fiber. If @p other fiber
|
||||
* was freshly allocated, then runs the function specified at
|
||||
* creation.
|
||||
*
|
||||
*
|
||||
* This operation increments the reference count of @p other.
|
||||
*
|
||||
* This operation conceptually decrements the reference count of
|
||||
* @p self twice, once to suspend it, and once to remove a reference to
|
||||
* it. Then, if the count is 0, it is returned to the specified pool
|
||||
* or destroyed.
|
||||
*
|
||||
* @pre @p self is the currently executing fiber.
|
||||
*
|
||||
* @param self Fiber to remove reference switch from.
|
||||
* @param self_pool Pool to which the current fiber should be returned
|
||||
* @param other Fiber to switch to.
|
||||
*/
|
||||
NORETURN
|
||||
cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self,
|
||||
cilk_fiber_pool* self_pool,
|
||||
cilk_fiber* other);
|
||||
|
||||
/** @brief Set the proc method to execute immediately after a switch
|
||||
* to this fiber.
|
||||
*
|
||||
* The @c post_switch_proc method executes immediately after switching
|
||||
* away form @p self fiber to some other fiber, but before @c self
|
||||
* gets cleaned up.
|
||||
*
|
||||
* @note A fiber can have only one post_switch_proc method at a time.
|
||||
* If this method is called multiple times before switching to the
|
||||
* fiber, only the last proc method will execute.
|
||||
*
|
||||
* @param self Fiber.
|
||||
* @param post_switch_proc Proc method to execute immediately after switching to this fiber.
|
||||
*/
|
||||
void cilk_fiber_set_post_switch_proc(cilk_fiber* self, cilk_fiber_proc post_switch_proc);
|
||||
|
||||
/** @brief Invoke TBB stack op for this fiber.
|
||||
*
|
||||
* @param fiber Fiber to invoke stack op for.
|
||||
* @param op The stack op to invoke
|
||||
*/
|
||||
void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber, __cilk_tbb_stack_op op);
|
||||
|
||||
/** @brief Returns the fiber data associated with the specified fiber.
|
||||
*
|
||||
* The returned struct is owned by the fiber and is deallocated automatically
|
||||
* when the fiber is destroyed. However, the client_data field is owned by
|
||||
* the client and must be deallocated separately. When called for a
|
||||
* newly-allocated fiber, the returned data is zero-filled.
|
||||
*
|
||||
* @param fiber The fiber for which data is being requested.
|
||||
* @return The fiber data for the specified fiber
|
||||
*/
|
||||
cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber);
|
||||
|
||||
/** @brief Retrieve the owner field from the fiber.
|
||||
*
|
||||
* This method is provided for convenience. One can also get the
|
||||
* fiber data, and then get the owner field.
|
||||
*/
|
||||
__CILKRTS_INLINE
|
||||
__cilkrts_worker* cilk_fiber_get_owner(cilk_fiber* fiber)
|
||||
{
|
||||
// TBD: We really want a static assert here, that this cast is
|
||||
// doing the right thing.
|
||||
cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
|
||||
return fdata->owner;
|
||||
}
|
||||
|
||||
/** @brief Sets the owner field of a fiber.
|
||||
*
|
||||
* This method is provided for convenience. One can also get the
|
||||
* fiber data, and then get the owner field.
|
||||
*/
|
||||
__CILKRTS_INLINE
|
||||
void cilk_fiber_set_owner(cilk_fiber* fiber, __cilkrts_worker* owner)
|
||||
{
|
||||
// TBD: We really want a static assert here, that this cast is
|
||||
// doing the right thing.
|
||||
cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
|
||||
fdata->owner = owner;
|
||||
}
|
||||
|
||||
/** @brief Returns true if this fiber is resumable.
|
||||
*
|
||||
* A fiber is considered resumable when it is not currently being
|
||||
* executed.
|
||||
*
|
||||
* This function is used by Windows exception code.
|
||||
* @param fiber The fiber to check.
|
||||
* @return Nonzero value if fiber is resumable.
|
||||
*/
|
||||
int cilk_fiber_is_resumable(cilk_fiber* fiber);
|
||||
|
||||
/**
|
||||
* @brief Returns the base of this fiber's stack.
|
||||
*
|
||||
* On some platforms (e.g., Windows), the fiber must have started
|
||||
* running before we can get this information.
|
||||
*
|
||||
* @param fiber The fiber to get the stack pointer from.
|
||||
* @return The base of the stack, or NULL if this
|
||||
* information is not available yet.
|
||||
*/
|
||||
char* cilk_fiber_get_stack_base(cilk_fiber* fiber);
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* TBB interop functions
|
||||
* **************************************************************************/
|
||||
/**
|
||||
* @brief Set the TBB callback information for a stack
|
||||
*
|
||||
* @param fiber The fiber to set the TBB callback information for
|
||||
* @param o The TBB callback thunk. Specifies the callback address and
|
||||
* context value.
|
||||
*/
|
||||
void cilk_fiber_set_stack_op(cilk_fiber *fiber,
|
||||
__cilk_tbb_stack_op_thunk o);
|
||||
|
||||
/**
|
||||
* @brief Save the TBB callback address and context value in
|
||||
* thread-local storage.
|
||||
*
|
||||
* We'll use it later when the thread binds to a worker.
|
||||
*
|
||||
* @param o The TBB callback thunk which is to be saved.
|
||||
*/
|
||||
void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o);
|
||||
|
||||
/**
|
||||
* @brief Move TBB stack-op info from thread-local storage and store
|
||||
* it into the fiber.
|
||||
*
|
||||
* Called when we bind a thread to the runtime. If there is any TBB
|
||||
* interop information in thread-local storage, bind it to the stack
|
||||
* now.
|
||||
*
|
||||
* @pre \c fiber should not be NULL.
|
||||
* @param fiber The fiber that should take over the TBB interop information.
|
||||
*/
|
||||
void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber *fiber);
|
||||
|
||||
/**
|
||||
* @brief Free any TBB interop information saved in thread-local storage
|
||||
*/
|
||||
void cilk_fiber_tbb_interop_free_stack_op_info(void);
|
||||
|
||||
/**
|
||||
* @brief Migrate any TBB interop information from a cilk_fiber to
|
||||
* thread-local storage.
|
||||
*
|
||||
* Returns immediately if no TBB interop information has been
|
||||
* associated with the stack.
|
||||
*
|
||||
* @param fiber The cilk_fiber who's TBB interop information should be
|
||||
* saved in thread-local storage.
|
||||
*/
|
||||
void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber* fiber);
|
||||
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
/** @brief Returns the fiber associated with the currently executing thread
|
||||
*
|
||||
* @note This function is currently used only for testing the Cilk
|
||||
* runtime.
|
||||
*
|
||||
* @return Fiber associated with the currently executing thread or NULL if no
|
||||
* fiber was associated with this thread.
|
||||
*/
|
||||
cilk_fiber* cilk_fiber_get_current_fiber(void);
|
||||
#endif
|
||||
|
||||
|
||||
#if NEED_FIBER_REF_COUNTS
|
||||
/** @brief Returns true if this fiber has reference count > 0.
|
||||
*
|
||||
* @param fiber The fiber to check for references.
|
||||
* @return Nonzero value if the fiber has references.
|
||||
*/
|
||||
int cilk_fiber_has_references(cilk_fiber *fiber);
|
||||
|
||||
/** @brief Returns the value of the reference count.
|
||||
*
|
||||
* @param fiber The fiber to check for references.
|
||||
* @return The value of the reference count of fiber.
|
||||
*/
|
||||
int cilk_fiber_get_ref_count(cilk_fiber *fiber);
|
||||
|
||||
/** @brief Adds a reference to this fiber.
|
||||
*
|
||||
* Increments the reference count of a current fiber. Fibers with
|
||||
* nonzero reference count will not be freed or returned to a fiber
|
||||
* pool.
|
||||
*
|
||||
* @param fiber The fiber to add a reference to.
|
||||
*/
|
||||
void cilk_fiber_add_reference(cilk_fiber *fiber);
|
||||
|
||||
#endif // NEED_FIBER_REF_COUNTS
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Some C++ implementation details
|
||||
|
||||
/// Opaque declaration of a cilk_fiber_sysdep object.
|
||||
struct cilk_fiber_sysdep;
|
||||
|
||||
/**
|
||||
* cilk_fiber is a base-class for system-dependent fiber implementations.
|
||||
*/
|
||||
struct cilk_fiber : protected cilk_fiber_data
|
||||
{
|
||||
protected:
|
||||
// This is a rare acceptable use of protected inheritence and protected
|
||||
// variable access: when the base class and derived class collaborate
|
||||
// tightly to comprise a single component.
|
||||
|
||||
/// For overloading constructor of cilk_fiber.
|
||||
enum from_thread_t { from_thread = 1 };
|
||||
|
||||
// Boolean flags capturing the status of the fiber.
|
||||
// Each one can be set independently.
|
||||
// A default fiber is constructed with a flag value of 0.
|
||||
static const int RESUMABLE = 0x01; ///< True if the fiber is in a suspended state and can be resumed.
|
||||
static const int ALLOCATED_FROM_THREAD = 0x02; ///< True if fiber was allocated from a thread.
|
||||
|
||||
cilk_fiber_proc m_start_proc; ///< Function to run on start up/reset
|
||||
cilk_fiber_proc m_post_switch_proc; ///< Function that executes when we first switch to a new fiber from a different one.
|
||||
|
||||
cilk_fiber* m_pending_remove_ref;///< Fiber to possibly delete on start up or resume
|
||||
cilk_fiber_pool* m_pending_pool; ///< Pool where m_pending_remove_ref should go if it is deleted.
|
||||
unsigned m_flags; ///< Captures the status of this fiber.
|
||||
|
||||
#if NEED_FIBER_REF_COUNTS
|
||||
volatile long m_outstanding_references; ///< Counts references to this fiber.
|
||||
#endif
|
||||
|
||||
/// Creates a fiber with NULL data.
|
||||
cilk_fiber();
|
||||
|
||||
/**
|
||||
* @brief Creates a fiber with user-specified arguments.
|
||||
*
|
||||
* @param stack_size Size of stack to use for this fiber.
|
||||
*/
|
||||
cilk_fiber(std::size_t stack_size);
|
||||
|
||||
/// Empty destructor.
|
||||
~cilk_fiber();
|
||||
|
||||
/**
|
||||
* @brief Performs any actions that happen after switching from
|
||||
* one fiber to another.
|
||||
*
|
||||
* These actions are:
|
||||
* 1. Execute m_post_switch_proc on a fiber.
|
||||
* 2. Do any pending deallocations from the previous fiber.
|
||||
*/
|
||||
void do_post_switch_actions();
|
||||
|
||||
/**
|
||||
*@brief Helper method that converts a @c cilk_fiber object into a
|
||||
* @c cilk_fiber_sysdep object.
|
||||
*
|
||||
* The @c cilk_fiber_sysdep object contains the system-dependent parts
|
||||
* of the implementation of a @\c cilk_fiber.
|
||||
*
|
||||
* We could have @c cilk_fiber_sysdep inherit from @c cilk_fiber and
|
||||
* then use virtual functions. But since a given platform only uses
|
||||
* one definition of @c cilk_fiber_sysdep at a time, we statically
|
||||
* cast between them.
|
||||
*/
|
||||
inline cilk_fiber_sysdep* sysdep();
|
||||
|
||||
/**
|
||||
* @brief Set resumable flag to specified state.
|
||||
*/
|
||||
inline void set_resumable(bool state) {
|
||||
m_flags = state ? (m_flags | RESUMABLE) : (m_flags & (~RESUMABLE));
|
||||
}
|
||||
|
||||
/**
|
||||
*@brief Set the allocated_from_thread flag.
|
||||
*/
|
||||
inline void set_allocated_from_thread(bool state) {
|
||||
m_flags = state ? (m_flags | ALLOCATED_FROM_THREAD) : (m_flags & (~ALLOCATED_FROM_THREAD));
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Allocates and initializes a new cilk_fiber, either from
|
||||
* the specified pool or from the heap.
|
||||
*
|
||||
* @pre pool should not be NULL.
|
||||
*/
|
||||
static cilk_fiber* allocate(cilk_fiber_pool* pool);
|
||||
|
||||
/**
|
||||
* @brief Allocates a fiber from the heap.
|
||||
*/
|
||||
static cilk_fiber* allocate_from_heap(size_t stack_size);
|
||||
|
||||
/**
|
||||
* @brief Return a fiber to the heap.
|
||||
*/
|
||||
void deallocate_to_heap();
|
||||
|
||||
/**
|
||||
* @brief Reset the state of a fiber just allocated from a pool.
|
||||
*/
|
||||
void reset_state(cilk_fiber_proc start_proc);
|
||||
|
||||
/**
|
||||
* @brief Remove a reference from this fiber, possibly
|
||||
* deallocating it if the reference count becomes 0.
|
||||
*
|
||||
* @param pool The fiber pool to which this fiber should be returned.
|
||||
* @return The final reference count.
|
||||
*/
|
||||
int remove_reference(cilk_fiber_pool* pool);
|
||||
|
||||
/**
|
||||
* @brief Deallocate the fiber by returning it to the pool.
|
||||
* @pre This method should only be called if the reference count
|
||||
* is 0.
|
||||
*
|
||||
* @param pool The fiber pool to return this fiber to. If NULL,
|
||||
* fiber is returned to the heap.
|
||||
*/
|
||||
void deallocate_self(cilk_fiber_pool *pool);
|
||||
|
||||
/** @brief Allocates and intializes this thread's main fiber. */
|
||||
static cilk_fiber* allocate_from_thread();
|
||||
|
||||
/** @brief Deallocate a fiber created from a thread,
|
||||
* possibly destroying it.
|
||||
*
|
||||
* This method decrements the reference count of this fiber by 2,
|
||||
* and destroys the fiber if the reference count is 0.
|
||||
*
|
||||
* OS-specific cleanup for the fiber executes unconditionally with for
|
||||
* this method. The destruction of the actual object, however, does
|
||||
* not occur unless the reference count is 0.
|
||||
*
|
||||
* @return Final reference count. If the count is 0, the fiber was
|
||||
* returned to the heap.
|
||||
*/
|
||||
int deallocate_from_thread();
|
||||
|
||||
/** @brief Removes a reference from this fiber.
|
||||
*
|
||||
* This method deallocates this fiber if the reference count
|
||||
* becomes 0.
|
||||
*
|
||||
* @pre This fiber must be allocated from a thread.
|
||||
* @return The final reference count of this fiber.
|
||||
*/
|
||||
int remove_reference_from_thread();
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER
|
||||
/** @brief Get the current fiber from TLS.
|
||||
*
|
||||
* @note This function is only used for testing the runtime.
|
||||
*/
|
||||
static cilk_fiber* get_current_fiber();
|
||||
#endif
|
||||
|
||||
/** @brief Suspend execution on current fiber resumes other fiber.
|
||||
*
|
||||
* Control returns after resuming execution of the self fiber.
|
||||
*/
|
||||
void suspend_self_and_resume_other(cilk_fiber* other);
|
||||
|
||||
|
||||
/** @brief Removes a reference from the currently executing fiber
|
||||
* and resumes other fiber.
|
||||
*
|
||||
* This fiber may be returned to a pool or deallocated.
|
||||
*/
|
||||
NORETURN remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool,
|
||||
cilk_fiber* other);
|
||||
|
||||
/** @brief Set the proc method to execute immediately after a switch
|
||||
* to this fiber.
|
||||
*
|
||||
* @param post_switch_proc Proc method to execute immediately
|
||||
* after switching to this fiber.
|
||||
*/
|
||||
inline void set_post_switch_proc(cilk_fiber_proc post_switch_proc) {
|
||||
m_post_switch_proc = post_switch_proc;
|
||||
}
|
||||
|
||||
/** @brief Returns true if this fiber is resumable.
|
||||
*
|
||||
* A fiber is considered resumable when it is not currently being
|
||||
* executed.
|
||||
*/
|
||||
inline bool is_resumable(void) {
|
||||
return (m_flags & RESUMABLE);
|
||||
}
|
||||
|
||||
/** @brief Returns true if fiber was allocated from a thread. */
|
||||
inline bool is_allocated_from_thread(void) {
|
||||
return (m_flags & ALLOCATED_FROM_THREAD);
|
||||
}
|
||||
|
||||
/**
|
||||
*@brief Get the address at the base of the stack for this fiber.
|
||||
*/
|
||||
inline char* get_stack_base();
|
||||
|
||||
/** @brief Return the data for this fiber. */
|
||||
cilk_fiber_data* get_data() { return this; }
|
||||
|
||||
/** @brief Return the data for this fiber. */
|
||||
cilk_fiber_data const* get_data() const { return this; }
|
||||
|
||||
|
||||
#if NEED_FIBER_REF_COUNTS
|
||||
/** @brief Verifies that this fiber's reference count equals v. */
|
||||
inline void assert_ref_count_equals(long v) {
|
||||
#if FIBER_CHECK_REF_COUNTS
|
||||
CILK_ASSERT(m_outstanding_references >= v);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Verifies that this fiber's reference count is at least v. */
|
||||
inline void assert_ref_count_at_least(long v) {
|
||||
#if FIBER_CHECK_REF_COUNTS
|
||||
CILK_ASSERT(m_outstanding_references >= v);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Get reference count. */
|
||||
inline long get_ref_count() { return m_outstanding_references; }
|
||||
|
||||
/** @brief Initialize reference count.
|
||||
* Operation is not atomic.
|
||||
*/
|
||||
inline void init_ref_count(long v) { m_outstanding_references = v; }
|
||||
|
||||
// For Windows, updates to the fiber reference count need to be
|
||||
// atomic, because exceptions can live on a stack that we are not
|
||||
// currently executing on. Thus, we can update the reference
|
||||
// count of a fiber we are not currently executing on.
|
||||
|
||||
/** @brief Increment reference count for this fiber [Windows]. */
|
||||
inline void inc_ref_count() { atomic_inc_ref_count(); }
|
||||
|
||||
/** @brief Decrement reference count for this fiber [Windows]. */
|
||||
inline long dec_ref_count() { return atomic_dec_ref_count(); }
|
||||
|
||||
/** @brief Subtract v from the reference count for this fiber [Windows]. */
|
||||
inline long sub_from_ref_count(long v) { return atomic_sub_from_ref_count(v); }
|
||||
#else // NEED_FIBER_REF_COUNTS
|
||||
|
||||
// Without reference counting, we have placeholder methods.
|
||||
inline void init_ref_count(long v) { }
|
||||
|
||||
inline void inc_ref_count() { }
|
||||
|
||||
// With no reference counting, dec_ref_count always return 0.
|
||||
// Thus, anyone checking is always the "last" one.
|
||||
inline long dec_ref_count() { return 0; }
|
||||
inline long sub_from_ref_count(long v) { return 0; }
|
||||
|
||||
// The assert methods do nothing.
|
||||
inline void assert_ref_count_equals(long v) { }
|
||||
inline void assert_ref_count_at_least(long v) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Call TBB to tell it about an "interesting" event.
|
||||
*
|
||||
* @param op Value specifying the event to track.
|
||||
*/
|
||||
void invoke_tbb_stack_op(__cilk_tbb_stack_op op);
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* @brief Helper method: try to allocate a fiber from this pool or
|
||||
* its ancestors without going to the OS / heap.
|
||||
*
|
||||
* Returns allocated pool, or NULL if no pool is found.
|
||||
*
|
||||
* If pool contains a suitable fiber. Return it. Otherwise, try to
|
||||
* recursively grab a fiber from the parent pool, if there is one.
|
||||
*
|
||||
* This method will not allocate a fiber from the heap.
|
||||
*/
|
||||
static cilk_fiber* try_allocate_from_pool_recursive(cilk_fiber_pool* pool);
|
||||
|
||||
|
||||
#if NEED_FIBER_REF_COUNTS
|
||||
/**
|
||||
* @brief Atomic increment of reference count.
|
||||
*/
|
||||
void atomic_inc_ref_count();
|
||||
|
||||
/**
|
||||
* @brief Atomic decrement of reference count.
|
||||
*/
|
||||
long atomic_dec_ref_count();
|
||||
|
||||
/**
|
||||
* @brief Atomic subtract of v from reference count.
|
||||
* @param v Value to subtract.
|
||||
*/
|
||||
long atomic_sub_from_ref_count(long v);
|
||||
#endif // NEED_FIBER_REF_COUNTS
|
||||
|
||||
};
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_FIBER_DOT_H)
|
84
libcilkrts/runtime/cilk_malloc.c
Normal file
84
libcilkrts/runtime/cilk_malloc.c
Normal file
|
@ -0,0 +1,84 @@
|
|||
/* cilk_malloc.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "cilk_malloc.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#if defined _WIN32 || defined _WIN64 || defined __linux__
|
||||
#include <malloc.h>
|
||||
#define HAS_MEMALIGN 1
|
||||
#endif
|
||||
#ifdef __VXWORKS__
|
||||
#define HAS_MEMALIGN 1
|
||||
#include <memLib.h>
|
||||
#endif
|
||||
|
||||
#define PREFERRED_ALIGNMENT 64 /* try to keep runtime system data
|
||||
structures within one cache line */
|
||||
|
||||
void *__cilkrts_malloc(size_t size)
|
||||
{
|
||||
/* TODO: check for out of memory */
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, PREFERRED_ALIGNMENT);
|
||||
#elif defined HAS_MEMALIGN
|
||||
return memalign(PREFERRED_ALIGNMENT, size);
|
||||
#else
|
||||
return malloc(size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void *__cilkrts_realloc(void *ptr, size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _aligned_realloc(ptr, size, PREFERRED_ALIGNMENT);
|
||||
#else
|
||||
return realloc(ptr, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __cilkrts_free(void *ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End cilk_malloc.c */
|
90
libcilkrts/runtime/cilk_malloc.h
Normal file
90
libcilkrts/runtime/cilk_malloc.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
/* cilk_malloc.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file cilk_malloc.h
|
||||
*
|
||||
* @brief Provides replacement memory allocation functions to allocate
|
||||
* (and free) memory on cache line boundaries, if supported by the OS.
|
||||
*
|
||||
* If aligned memory functions are not provided by the OS, the calls just
|
||||
* pass through to the standard memory allocation functions.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILK_MALLOC_DOT_H
|
||||
#define INCLUDED_CILK_MALLOC_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "rts-common.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* malloc replacement function to allocate memory aligned on a cache line
|
||||
* boundary if aligned memory allocations are supported by the OS.
|
||||
*
|
||||
* @param size Number of bytes to allocate.
|
||||
*
|
||||
* @return pointer to memory block allocated, or NULL if unsuccessful.
|
||||
*/
|
||||
COMMON_PORTABLE void *__cilkrts_malloc(size_t size);
|
||||
|
||||
/**
|
||||
* realloc replacement function to allocate memory aligned on a cache line
|
||||
* boundary if aligned memory allocations are supported by the OS.
|
||||
*
|
||||
* @param ptr Block to be reallocated.
|
||||
* @param size Number of bytes to allocate.
|
||||
*
|
||||
* @return pointer to memory block allocated, or NULL if unsuccessful.
|
||||
*/
|
||||
COMMON_PORTABLE void *__cilkrts_realloc(void *ptr, size_t size);
|
||||
|
||||
/**
|
||||
* free replacement function to deallocate memory aligned on a cache line
|
||||
* boundary if aligned memory allocations are supported by the OS.
|
||||
*
|
||||
* @param ptr Block to be freed.
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_free(void *ptr);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_CILK_MALLOC_DOT_H)
|
52
libcilkrts/runtime/component.h
Normal file
52
libcilkrts/runtime/component.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* component.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_COMPONENT_DOT_H
|
||||
#define INCLUDED_COMPONENT_DOT_H
|
||||
|
||||
#define COMPONENT_NAME "Intel® Cilk™ Plus Runtime"
|
||||
|
||||
#define COMPONENT_INTERNAL_NAME COMPONENT_NAME
|
||||
|
||||
#define COMPONENT_FILENAME "CILKRTS20"
|
||||
|
||||
#define BuildVersionString(_major, _minor, _build, _rev) #_major "," #_minor "," #_build "," #_rev
|
||||
|
||||
#define COMPONENT_VERSION_STRING BuildVersionString (VERSION_MAJOR, VERSION_MINOR, VERSION_BUILD, VERSION_REVISION)
|
||||
|
||||
#endif // ! defined(INCLUDED_COMPONENT_DOT_H)
|
107
libcilkrts/runtime/config/generic/cilk-abi-vla.c
Normal file
107
libcilkrts/runtime/config/generic/cilk-abi-vla.c
Normal file
|
@ -0,0 +1,107 @@
|
|||
/* cilk-abi-vla.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Implementation of Variable Length Array (VLA) ABI.
|
||||
*
|
||||
* The compiler calls these functions to allocate Variable Length Arrays
|
||||
* at runtime. The compiler must guarantee that __cilkrts_stack_free() is
|
||||
* called to cleanup any memory allocated by __cilkrts_stack_alloc().
|
||||
*
|
||||
* This generic implementation always allocates the memory from the heap.
|
||||
* Optimally, the implementation should expand the frame of the calling
|
||||
* function if possible, since that will be faster. See the x86 version
|
||||
* for one possible implementation.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "internal/abi.h"
|
||||
#include "cilk-abi-vla-internal.h"
|
||||
|
||||
#define c_cilk_ptr_from_heap 0xc2f2f00d
|
||||
#define c_cilk_ptr_from_stack 0xc3f30d0f
|
||||
|
||||
// Allocate space for a variable length array
|
||||
CILK_ABI(__cilkrts_void_ptr)
|
||||
__cilkrts_stack_alloc(
|
||||
__cilkrts_stack_frame *sf,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align, // align is always >= minimum stack alignment and
|
||||
// >= ptr_size as well, and must be a power of 2.
|
||||
uint32_t needs_tag // non-zero if the pointer being returned needs to
|
||||
// be tagged
|
||||
)
|
||||
{
|
||||
// full_size will be a multiple of align, and contains
|
||||
// enough extra space to allocate a marker.
|
||||
size_t full_size = (size + align - 1) & ~(align - 1);
|
||||
|
||||
// Allocate memory from the heap. The compiler is responsible
|
||||
// for guaranteeing us a chance to free it before the function
|
||||
// exits
|
||||
|
||||
return (void *)vla_internal_heap_alloc(sf, full_size, align);
|
||||
}
|
||||
|
||||
// Free the space allocated for a variable length array.
|
||||
CILK_ABI(void)
|
||||
__cilkrts_stack_free(
|
||||
__cilkrts_stack_frame *sf,
|
||||
void *p,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align, // same requirements as for align in allocation,
|
||||
// and must match alignment that was passed when
|
||||
// doing the allocation
|
||||
uint32_t known_from_stack // non-zero if this is known to be allocated
|
||||
// on the stack, and therefore has no tag
|
||||
)
|
||||
{
|
||||
// full_size will be a multiple of align, and contains
|
||||
// enough extra space to allocate a marker if one was needed.
|
||||
size_t full_size = (size + align - 1) & ~(align - 1);
|
||||
|
||||
// Just free the allocated memory to the heap since we don't know
|
||||
// how to expand/contract the calling frame
|
||||
vla_internal_heap_free(t, full_size);
|
||||
}
|
53
libcilkrts/runtime/config/generic/os-fence.h
Normal file
53
libcilkrts/runtime/config/generic/os-fence.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* os.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* void __cilkrts_fence(void)
|
||||
*
|
||||
* Executes an MFENCE instruction to serialize all load and store instructions
|
||||
* that were issued prior the MFENCE instruction. This serializing operation
|
||||
* guarantees that every load and store instruction that precedes the MFENCE
|
||||
* instruction is globally visible before any load or store instruction that
|
||||
* follows the MFENCE instruction. The MFENCE instruction is ordered with
|
||||
* respect to all load and store instructions, other MFENCE instructions, any
|
||||
* SFENCE and LFENCE instructions, and any serializing instructions (such as
|
||||
* the CPUID instruction).
|
||||
*/
|
||||
|
||||
COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
|
||||
|
94
libcilkrts/runtime/config/generic/os-unix-sysdep.c
Normal file
94
libcilkrts/runtime/config/generic/os-unix-sysdep.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* os-unix-sysdep.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*************************************************************************
|
||||
*
|
||||
* This file contains generic implementations of system-specific code for
|
||||
* Unix-based systems
|
||||
*/
|
||||
|
||||
#include "os.h"
|
||||
#include "sysdep.h"
|
||||
|
||||
/*
|
||||
* The cycle counter is used for debugging. This funciton is only called if
|
||||
* CILK_PROFILE is defined when the runtime is built.
|
||||
*/
|
||||
COMMON_SYSDEP unsigned long long __cilkrts_getticks(void)
|
||||
{
|
||||
# warning "unimplemented cycle counter"
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A "short pause" - called from the Cilk runtime's spinloops.
|
||||
*/
|
||||
COMMON_SYSDEP void __cilkrts_short_pause(void)
|
||||
{
|
||||
# warning __cilkrts_short_pause empty
|
||||
}
|
||||
|
||||
/*
|
||||
* Interlocked exchange - used to implement the Cilk runtime's spinloops
|
||||
*/
|
||||
COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
|
||||
{
|
||||
x = __sync_lock_test_and_set(ptr, x);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Restore the floating point state that is stored in a stack frame at each
|
||||
* spawn. This should be called each time a frame is resumed.
|
||||
*
|
||||
* Only valid for IA32 and Intel64 processors.
|
||||
*/
|
||||
void restore_x86_fp_state (__cilkrts_stack_frame *sf)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Save the floating point state to the __cilkrts_stack_frame at each spawn.
|
||||
*
|
||||
* Architecture-specific - Should only be needed on IA32 and Intel64
|
||||
* processors.
|
||||
*/
|
||||
void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
}
|
||||
|
422
libcilkrts/runtime/config/x86/cilk-abi-vla.c
Normal file
422
libcilkrts/runtime/config/x86/cilk-abi-vla.c
Normal file
|
@ -0,0 +1,422 @@
|
|||
/* cilk-abi-vla.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Implementation of Variable Length Array (VLA) ABI.
|
||||
*
|
||||
* __cilkrts_stack_alloc() and __cilkrts_stack_free must be compiled
|
||||
* such that ebp/rbp is used for the stack frames. This is done by having
|
||||
* each of them use alloca, which forces the special frame types needed on
|
||||
* each of the ABIs. Additionally, for some forms of stack frame, special
|
||||
* care must be taken because the alloca space may not be at the bottom of the
|
||||
* stack frame of the caller. For Intel64 windows, and for some options
|
||||
* with other ABIs, a preallocated parameter block may exist on the stack
|
||||
* at a lower address than the alloca. If this is the case, the parameter
|
||||
* distance_from_sp_to_alloca_area will be non-zero, and will indicate how
|
||||
* much pre-allocated parameter space resides in the caller's stack frame
|
||||
* between the alloca area, and the bottom of the stack when the call to
|
||||
* the cilkrts is made. As such, when non-zero it also includes any space
|
||||
* used for passing the cilkrts_stack_alloc or cilkrts_stack_free parameters.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#ifdef _WIN32
|
||||
# define alloca _alloca
|
||||
# define INLINE static __inline
|
||||
# pragma warning(disable:1025) // Don't whine about zero extending result of unary operation
|
||||
#else
|
||||
# include <alloca.h>
|
||||
# define INLINE static inline
|
||||
#endif
|
||||
|
||||
#include "internal/abi.h"
|
||||
#include "cilk-abi-vla-internal.h"
|
||||
|
||||
#if defined(__x86_64) || defined(_M_X64)
|
||||
INLINE void setsp(void *val)
|
||||
{
|
||||
__asm__("movq %0, %%rsp" : : "r"(val): "rsp");
|
||||
}
|
||||
INLINE char* getsp(void)
|
||||
{
|
||||
void *res;
|
||||
|
||||
__asm__("movq %%rsp, %0" : "=r"(res): : "rsp");
|
||||
return res;
|
||||
}
|
||||
INLINE char* getbp(void)
|
||||
{
|
||||
void *res;
|
||||
|
||||
__asm__("movq %%rbp, %0" : "=r"(res): : "rbp");
|
||||
return res;
|
||||
}
|
||||
INLINE void copy_frame_down_and_move_bp(
|
||||
char *dst,
|
||||
char *src,
|
||||
size_t cpy_bytes,
|
||||
char *new_ebp
|
||||
)
|
||||
{
|
||||
// In this version, dst is guaranteed to be lower address than src,
|
||||
// therefore copying upwards from src into dst is safe in case
|
||||
// there is overlap. The number of bytes is also guaranteed to be
|
||||
// a multiple of 8, and the copy is done in 64 bit word chunks for
|
||||
// best efficiency.
|
||||
__asm__(
|
||||
"movq %0, %%rdi;"
|
||||
"movq %1, %%rsi;"
|
||||
"movq %2, %%rcx;"
|
||||
"shrq $3, %%rcx;"
|
||||
"rep movsq;"
|
||||
"movq %3, %%rbp" :
|
||||
:
|
||||
"rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
|
||||
"rsi", "rdi", "rcx", "rbp", "memory");
|
||||
}
|
||||
INLINE void copy_frame_up_and_move_bp(
|
||||
char *dst,
|
||||
char *src,
|
||||
size_t cpy_bytes,
|
||||
char *new_ebp
|
||||
)
|
||||
{
|
||||
// In this version, dst is guaranteed to be higher address than src,
|
||||
// therefore copying downwards from src into dst is safe in case
|
||||
// there is overlap. The number of bytes is also guaranteed to be
|
||||
// a multiple of 8, and the copy is done in 64 bit word chunks for
|
||||
// best efficiency.
|
||||
dst += cpy_bytes - 8;
|
||||
src += cpy_bytes - 8;
|
||||
__asm__(
|
||||
"movq %0, %%rdi;"
|
||||
"movq %1, %%rsi;"
|
||||
"movq %2, %%rcx;"
|
||||
"shrq $3, %%rcx;"
|
||||
"std; rep movsq; cld;"
|
||||
"movl %3, %%rbp;" :
|
||||
:
|
||||
"rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
|
||||
"rsi", "rdi", "rcx", "rbp", "memory");
|
||||
}
|
||||
#else
|
||||
INLINE void setsp(void *val)
|
||||
{
|
||||
__asm__("movl %0, %%esp" : : "r"(val): "esp");
|
||||
}
|
||||
INLINE char* getsp(void)
|
||||
{
|
||||
void *res;
|
||||
|
||||
__asm__("movl %%esp, %0" : "=r"(res): : "esp");
|
||||
return res;
|
||||
}
|
||||
INLINE char* getbp(void)
|
||||
{
|
||||
void *res;
|
||||
|
||||
__asm__("movl %%ebp, %0" : "=r"(res): : "ebp");
|
||||
return res;
|
||||
}
|
||||
INLINE void copy_frame_down_and_move_bp(
|
||||
char *dst,
|
||||
char *src,
|
||||
size_t cpy_bytes,
|
||||
char *new_ebp
|
||||
)
|
||||
{
|
||||
// In this version, dst is guaranteed to be lower address than src,
|
||||
// therefore copying upwards from src into dst is safe in case
|
||||
// there is overlap. The number of bytes is also guaranteed to be
|
||||
// a multiple of 4, and the copy is done in 32 bit word chunks for
|
||||
// best efficiency.
|
||||
__asm__(
|
||||
"movl %0, %%edi;"
|
||||
"movl %1, %%esi;"
|
||||
"movl %2, %%ecx;"
|
||||
"shrl $2, %%ecx;"
|
||||
"rep movsd;"
|
||||
"movl %3, %%ebp" :
|
||||
:
|
||||
"rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
|
||||
"esi", "edi", "ecx", "ebp", "memory");
|
||||
}
|
||||
INLINE void copy_frame_up_and_move_bp(
|
||||
char *dst,
|
||||
char *src,
|
||||
size_t cpy_bytes,
|
||||
char *new_ebp
|
||||
)
|
||||
{
|
||||
// In this version, dst is guaranteed to be higher address than src,
|
||||
// therefore copying downwards from src into dst is safe in case
|
||||
// there is overlap. The number of bytes is also guaranteed to be
|
||||
// a multiple of 4, and the copy is done in 32 bit word chunks for
|
||||
// best efficiency.
|
||||
dst += cpy_bytes - 4;
|
||||
src += cpy_bytes - 4;
|
||||
__asm__(
|
||||
"movl %0, %%edi;"
|
||||
"movl %1, %%esi;"
|
||||
"movl %2, %%ecx;"
|
||||
"shrl $2, %%ecx;"
|
||||
"std; rep movsd; cld;"
|
||||
"movl %3, %%ebp" :
|
||||
// "=D"(dst), "=S"(src), "=C"(cpy_bytes) :
|
||||
:
|
||||
"rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
|
||||
"esi", "edi", "ecx", "ebp", "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define c_cilk_ptr_from_heap 0xc2f2f00d
|
||||
#define c_cilk_ptr_from_stack 0xc3f30d0f
|
||||
|
||||
CILK_ABI(__cilkrts_void_ptr)
|
||||
__cilkrts_stack_alloc(
|
||||
__cilkrts_stack_frame *sf,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align, // align is always >= minimum stack alignment and
|
||||
// >= ptr_size as well, and must be a power of 2.
|
||||
uint32_t needs_tag // non-zero if the pointer being returned needs to
|
||||
// be tagged
|
||||
)
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
// full_size will be a multiple of align, and contains
|
||||
// enough extra space to allocate a marker.
|
||||
size_t full_size = (size + align - 1) & ~(align - 1);
|
||||
|
||||
if (needs_tag) {
|
||||
full_size += align;
|
||||
}
|
||||
|
||||
char *t;
|
||||
if (sf->worker != 0 &&
|
||||
((sf->flags & CILK_FRAME_UNSYNCHED) != 0)) {
|
||||
t = vla_internal_heap_alloc(sf, full_size, align);
|
||||
if (needs_tag) {
|
||||
t += align;
|
||||
((uint32_t*)t)[-1] = c_cilk_ptr_from_heap;
|
||||
}
|
||||
return (void *)t;
|
||||
}
|
||||
|
||||
// stack is still synced, allocate full_size from esp,
|
||||
// and record in 32 bits immediately below the space
|
||||
// allocated that this was space that this was
|
||||
// allocated in the stack.
|
||||
char *old_ebp = getbp();
|
||||
char *old_esp = getsp();
|
||||
|
||||
// make top_ptr point to base of first parameter.
|
||||
char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
|
||||
sizeof(char *));
|
||||
size_t param_size = 0;
|
||||
|
||||
#if defined(__x86_64)
|
||||
// For Intel64 linux & MACH ABI, all the parameters were passed in
|
||||
// register, so top of the stack frame above the return address
|
||||
// is just the size of the return address plus
|
||||
// distance_from_sp_to_alloca_area on the chance that the alloca
|
||||
// area isn't at the very bottom of the calling functions stack.
|
||||
#elif defined(__MACH__)
|
||||
// For ia32 MACH, parameter size is always a mutliple of 16
|
||||
// bytes to keep the stack 16 byte aligned. So we need to round
|
||||
// number of parameters up to multiple of 4.
|
||||
param_size = 8 * sizeof(char *);
|
||||
#else
|
||||
// For both windows Intel64 ABI, and the IA32 windows and
|
||||
// linux ABIs, space is reserved on the stack for all these
|
||||
// parameters. param_size is 5 * size of a stack slot.
|
||||
param_size = 5 * sizeof(char *);
|
||||
#endif
|
||||
|
||||
// now make top_ptr point above the params, or if
|
||||
// distance_from_sp_to_alloca_area is not zero, make
|
||||
// it point above that area. When non-zero,
|
||||
// distance_from_sp_to_alloca area is expected to contain
|
||||
// the parameter space, so we only add one or the other,
|
||||
// not both.
|
||||
top_ptr += (distance_from_sp_to_alloca_area != 0) ?
|
||||
distance_from_sp_to_alloca_area : param_size;
|
||||
|
||||
// t needs to end up at current value of top_ptr less full_size and less
|
||||
// distance_from_sp_to_alloca_area and
|
||||
// then rounded down to the alignment needed. Then we have to bump
|
||||
// esp down by current frame_size, so that when all is done with respect
|
||||
// to executing the return sequence, the final value of esp will be the
|
||||
// same value as t.
|
||||
t = (top_ptr - full_size) - distance_from_sp_to_alloca_area;
|
||||
intptr_t temp = (intptr_t)t;
|
||||
temp &= ~((intptr_t)(align - 1));
|
||||
t = (char *)temp;
|
||||
|
||||
// ok, the value of t is set where we need it. Now set esp
|
||||
// to the value of t less the current frame size.
|
||||
// So now when we do regular return esp should be left such
|
||||
// that it has moved down by full_size.
|
||||
size_t cur_fm_size = (top_ptr - old_esp);
|
||||
char *new_esp = t - cur_fm_size;
|
||||
char *new_ebp = old_ebp - (old_esp - new_esp);
|
||||
|
||||
// extend the stack down by at least the difference between where
|
||||
// I want it to be and where it currently is. This should take care
|
||||
// of touching any pages necessary.
|
||||
char *foo = alloca(old_esp - new_esp);
|
||||
setsp(foo < new_esp ? foo : new_esp);
|
||||
|
||||
// Now set esp exactly where I want it.
|
||||
// setsp(new_esp);
|
||||
|
||||
copy_frame_down_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
|
||||
|
||||
if (needs_tag) {
|
||||
t += align;
|
||||
((uint32_t*)t)[-1] = c_cilk_ptr_from_stack;
|
||||
}
|
||||
|
||||
return t;
|
||||
#else // Not __INTEL_COMPILER
|
||||
// Not supported unless we can figure out how to get the size of the frame
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This frees the space allocated for a variable length array.
|
||||
CILK_ABI(void)
|
||||
__cilkrts_stack_free(
|
||||
__cilkrts_stack_frame *sf,
|
||||
void *p,
|
||||
size_t size,
|
||||
size_t distance_from_sp_to_alloca_area,
|
||||
uint32_t align, // same requirements as for align in allocation,
|
||||
// and must match alignment that was passed when
|
||||
// doing the allocation
|
||||
uint32_t known_from_stack // non-zero if this is known to be allocated
|
||||
// on the stack, and therefore has no tag
|
||||
)
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
uint32_t *t = (uint32_t*)p;
|
||||
|
||||
// full_size will be a multiple of align, and contains
|
||||
// enough extra space to allocate a marker if one was needed.
|
||||
size_t full_size = (size + align - 1) & ~(align - 1);
|
||||
if (known_from_stack == 0) {
|
||||
// if the compiler hasn't told the run-time that this is
|
||||
// known to be on the stack, then this pointer must have been
|
||||
// tagged such that the run-time can tell.
|
||||
assert(t[-1] == c_cilk_ptr_from_stack ||
|
||||
t[-1] == c_cilk_ptr_from_heap);
|
||||
|
||||
known_from_stack = t[-1] == c_cilk_ptr_from_stack;
|
||||
full_size += align; // accounts for extra space for marker
|
||||
t = (uint32_t *)(((char *)t) - align);
|
||||
}
|
||||
|
||||
if (known_from_stack) {
|
||||
// alloca useage forces an ebp/rbp based stack frame even though
|
||||
// 0 and unused.
|
||||
char *foo = alloca(0);
|
||||
if (sf->worker == 0 || (sf->flags & CILK_FRAME_UNSYNCHED) == 0) {
|
||||
// p was allocated from current stack frame and we
|
||||
// are synced on current stack frame. Return the
|
||||
// amount of the stack that needs to be freed.
|
||||
char *old_ebp = getbp();
|
||||
char *old_esp = getsp();
|
||||
|
||||
// make top_ptr point to base of first parameter.
|
||||
char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
|
||||
sizeof(char *));
|
||||
size_t param_size = 0;
|
||||
|
||||
#if defined(__x86_64)
|
||||
// For Intel64 linux & MACH ABI, all the parameters were passed in
|
||||
// register, so top of the stack frame above the return address
|
||||
// is just the size of the return address plus
|
||||
// distance_from_sp_to_alloca_area on the chance that the alloca
|
||||
// area isn't at the very bottom of the calling functions stack.
|
||||
#elif defined(__MACH__)
|
||||
// For ia32 MACH, parameter size is always a mutliple of 16
|
||||
// bytes to keep the stack 16 byte aligned. So we need to round
|
||||
// number of parameters up to multiple of 4.
|
||||
param_size = 8 * sizeof(char *);
|
||||
#else
|
||||
// For both windows Intel64 ABI, and the IA32 windows and
|
||||
// linux ABIs, space is reserved on the stack for all these
|
||||
// parameters. param_size is 5 * size of a stack slot.
|
||||
param_size = 6 * sizeof(char *);
|
||||
#endif
|
||||
|
||||
// now make top_ptr point above the params, or if
|
||||
// distance_from_sp_to_alloca_area is not zero, make
|
||||
// it point above that area. When non-zero,
|
||||
// distance_from_sp_to_alloca area is expected to contain
|
||||
// the parameter space, so we only add one or the other,
|
||||
// not both.
|
||||
top_ptr += (distance_from_sp_to_alloca_area != 0) ?
|
||||
distance_from_sp_to_alloca_area : param_size;
|
||||
|
||||
size_t cur_fm_size = (top_ptr - old_esp);
|
||||
char *new_esp = old_esp + full_size;
|
||||
char *new_ebp = old_ebp + full_size;
|
||||
|
||||
copy_frame_up_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
|
||||
setsp(new_esp);
|
||||
}
|
||||
else {
|
||||
// p was allocated on stack frame, but that is
|
||||
// no longer the current stack frame. Need to adjust the
|
||||
// saved esp that is somewhere in the cilk runtime so that
|
||||
// on sync, esp will be cut back correctly.
|
||||
vla_free_from_original_stack(sf, full_size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
vla_internal_heap_free(t, full_size);
|
||||
}
|
||||
#else // Not __INTEL_COMPILER
|
||||
// Not supported unless we can figure out how to get the size of the frame
|
||||
#endif
|
||||
}
|
72
libcilkrts/runtime/config/x86/os-fence.h
Normal file
72
libcilkrts/runtime/config/x86/os-fence.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* os.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/* gcc before 4.4 does not implement __sync_synchronize properly */
|
||||
#if (__ICC >= 1110 && !(__MIC__ || __MIC2__)) \
|
||||
|| (!defined __ICC && __GNUC__ * 10 + __GNUC_MINOR__ > 43)
|
||||
# define HAVE_SYNC_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* void __cilkrts_fence(void)
|
||||
*
|
||||
* Executes an MFENCE instruction to serialize all load and store instructions
|
||||
* that were issued prior the MFENCE instruction. This serializing operation
|
||||
* guarantees that every load and store instruction that precedes the MFENCE
|
||||
* instruction is globally visible before any load or store instruction that
|
||||
* follows the MFENCE instruction. The MFENCE instruction is ordered with
|
||||
* respect to all load and store instructions, other MFENCE instructions, any
|
||||
* SFENCE and LFENCE instructions, and any serializing instructions (such as
|
||||
* the CPUID instruction).
|
||||
*/
|
||||
#ifdef HAVE_SYNC_INTRINSICS
|
||||
# define __cilkrts_fence() __sync_synchronize()
|
||||
#elif defined __ICC || defined __GNUC__
|
||||
/* mfence is a strict subset of lock add but takes longer on many
|
||||
* processors. */
|
||||
// # define __cilkrts_fence() __asm__ volatile ("mfence")
|
||||
/* On MIC, fence seems to be completely unnecessary.
|
||||
* Just for simplicity of 1st implementation, it defaults to x86 */
|
||||
# define __cilkrts_fence() __asm__ volatile ("lock addl $0,(%rsp)")
|
||||
// #elif defined _WIN32
|
||||
// # pragma intrinsic(_ReadWriteBarrier)
|
||||
// # define __cilkrts_fence() _ReadWriteBarrier()
|
||||
#else
|
||||
COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
|
||||
#endif
|
123
libcilkrts/runtime/config/x86/os-unix-sysdep.c
Normal file
123
libcilkrts/runtime/config/x86/os-unix-sysdep.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
/* os-unix-sysdep.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*************************************************************************
|
||||
*
|
||||
* This file contains system-specific code for Unix systems
|
||||
*/
|
||||
|
||||
#include "os.h"
|
||||
#include "sysdep.h"
|
||||
#include <internal/abi.h>
|
||||
|
||||
// On x86 processors (but not MIC processors), the compiler generated code to
|
||||
// save the FP state (rounding mode and the like) before calling setjmp. We
|
||||
// will need to restore that state when we resume.
|
||||
#ifndef __MIC__
|
||||
# if defined(__i386__) || defined(__x86_64)
|
||||
# define RESTORE_X86_FP_STATE
|
||||
# endif // defined(__i386__) || defined(__x86_64)
|
||||
#endif // __MIC__
|
||||
|
||||
/* timer support */
|
||||
COMMON_SYSDEP unsigned long long __cilkrts_getticks(void)
|
||||
{
|
||||
#if defined __i386__ || defined __x86_64
|
||||
unsigned a, d;
|
||||
__asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
|
||||
return ((unsigned long long)a) | (((unsigned long long)d) << 32);
|
||||
#else
|
||||
# warning "unimplemented cycle counter"
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
COMMON_SYSDEP void __cilkrts_short_pause(void)
|
||||
{
|
||||
#if __ICC >= 1110
|
||||
# if __MIC__ || __MIC2__
|
||||
_mm_delay_32(16); // stall for 16 cycles
|
||||
# else
|
||||
_mm_pause();
|
||||
# endif
|
||||
#elif defined __i386__ || defined __x86_64
|
||||
__asm__("pause");
|
||||
#else
|
||||
# warning __cilkrts_short_pause empty
|
||||
#endif
|
||||
}
|
||||
|
||||
COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
|
||||
{
|
||||
#if defined __i386__ || defined __x86_64
|
||||
/* asm statement here works around icc bugs */
|
||||
__asm__("xchgl %0,%a1" :"=r" (x) : "r" (ptr), "0" (x) :"memory");
|
||||
#else
|
||||
x = __sync_lock_test_and_set(ptr, x);
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Restore the floating point state that is stored in a stack frame at each
|
||||
* spawn. This should be called each time a frame is resumed.
|
||||
*
|
||||
* Only valid for IA32 and Intel64 processors.
|
||||
*/
|
||||
void restore_x86_fp_state (__cilkrts_stack_frame *sf) {
|
||||
#ifdef RESTORE_X86_FP_STATE
|
||||
__asm__ ( "ldmxcsr %0\n\t"
|
||||
"fnclex\n\t"
|
||||
"fldcw %1"
|
||||
:
|
||||
: "m" (sf->mxcsr), "m" (sf->fpcsr));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
// If we're not going to restore, don't bother saving it
|
||||
#ifdef RESTORE_X86_FP_STATE
|
||||
if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
|
||||
{
|
||||
__asm__ ("stmxcsr %0" : "=m" (sf->mxcsr));
|
||||
__asm__ ("fnstsw %0" : "=m" (sf->fpcsr));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
222
libcilkrts/runtime/doxygen-layout.xml
Normal file
222
libcilkrts/runtime/doxygen-layout.xml
Normal file
|
@ -0,0 +1,222 @@
|
|||
<doxygenlayout version="1.0">
|
||||
|
||||
<!--
|
||||
# @copyright
|
||||
# Copyright (C) 2011-2013, Intel Corporation
|
||||
# All rights reserved.
|
||||
#
|
||||
# @copyright
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# @copyright
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
-->
|
||||
|
||||
<!-- Navigation index tabs for HTML output -->
|
||||
<navindex>
|
||||
<tab type="mainpage" visible="yes" title=""/>
|
||||
<tab type="pages" visible="yes" title="" intro=""/>
|
||||
<tab type="modules" visible="yes" title="" intro=""/>
|
||||
<tab type="namespaces" visible="yes" title="">
|
||||
<tab type="namespaces" visible="yes" title="" intro=""/>
|
||||
<tab type="namespacemembers" visible="yes" title="" intro=""/>
|
||||
</tab>
|
||||
<tab type="classes" visible="yes" title="Classes, Structs and Unions">
|
||||
<tab type="classes" visible="yes" title="Classes, Structs and Unions" intro=""/>
|
||||
<tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/>
|
||||
<tab type="hierarchy" visible="yes" title="" intro=""/>
|
||||
<tab type="classmembers" visible="yes" title="" intro=""/>
|
||||
</tab>
|
||||
<tab type="files" visible="yes" title="">
|
||||
<tab type="files" visible="yes" title="" intro=""/>
|
||||
<tab type="globals" visible="yes" title="" intro=""/>
|
||||
</tab>
|
||||
<tab type="globals" visible="yes" title="Global Functions" intro=""/>
|
||||
<tab type="dirs" visible="yes" title="" intro=""/>
|
||||
<tab type="examples" visible="yes" title="" intro=""/>
|
||||
</navindex>
|
||||
|
||||
<!-- Layout definition for a class page -->
|
||||
<class>
|
||||
<briefdescription visible="yes"/>
|
||||
<includes visible="$SHOW_INCLUDE_FILES"/>
|
||||
<inheritancegraph visible="$CLASS_GRAPH"/>
|
||||
<collaborationgraph visible="$COLLABORATION_GRAPH"/>
|
||||
<allmemberslink visible="yes"/>
|
||||
<memberdecl>
|
||||
<nestedclasses visible="yes" title=""/>
|
||||
<publictypes title=""/>
|
||||
<publicslots title=""/>
|
||||
<signals title=""/>
|
||||
<publicmethods title=""/>
|
||||
<publicstaticmethods title=""/>
|
||||
<publicattributes title=""/>
|
||||
<publicstaticattributes title=""/>
|
||||
<protectedtypes title=""/>
|
||||
<protectedslots title=""/>
|
||||
<protectedmethods title=""/>
|
||||
<protectedstaticmethods title=""/>
|
||||
<protectedattributes title=""/>
|
||||
<protectedstaticattributes title=""/>
|
||||
<packagetypes title=""/>
|
||||
<packagemethods title=""/>
|
||||
<packagestaticmethods title=""/>
|
||||
<packageattributes title=""/>
|
||||
<packagestaticattributes title=""/>
|
||||
<properties title=""/>
|
||||
<events title=""/>
|
||||
<privatetypes title=""/>
|
||||
<privateslots title=""/>
|
||||
<privatemethods title=""/>
|
||||
<privatestaticmethods title=""/>
|
||||
<privateattributes title=""/>
|
||||
<privatestaticattributes title=""/>
|
||||
<friends title=""/>
|
||||
<related title="" subtitle=""/>
|
||||
<membergroups visible="yes"/>
|
||||
</memberdecl>
|
||||
<detaileddescription title=""/>
|
||||
<memberdef>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<constructors title=""/>
|
||||
<functions title=""/>
|
||||
<related title=""/>
|
||||
<variables title=""/>
|
||||
<properties title=""/>
|
||||
<events title=""/>
|
||||
</memberdef>
|
||||
<usedfiles visible="$SHOW_USED_FILES"/>
|
||||
<authorsection visible="yes"/>
|
||||
</class>
|
||||
|
||||
<!-- Layout definition for a namespace page -->
|
||||
<namespace>
|
||||
<briefdescription visible="yes"/>
|
||||
<memberdecl>
|
||||
<nestednamespaces visible="yes" title=""/>
|
||||
<classes visible="yes" title=""/>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
<membergroups visible="yes"/>
|
||||
</memberdecl>
|
||||
<detaileddescription title=""/>
|
||||
<memberdef>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
</memberdef>
|
||||
<authorsection visible="yes"/>
|
||||
</namespace>
|
||||
|
||||
<!-- Layout definition for a file page -->
|
||||
<file>
|
||||
<briefdescription visible="no"/>
|
||||
<includegraph visible="$INCLUDE_GRAPH"/>
|
||||
<includedbygraph visible="$INCLUDED_BY_GRAPH"/>
|
||||
<detaileddescription title="Description"/>
|
||||
<includes visible="no"/>
|
||||
<sourcelink visible="yes"/>
|
||||
<memberdecl>
|
||||
<classes visible="yes" title="Structures and Classes"/>
|
||||
<namespaces visible="yes" title=""/>
|
||||
<defines title=""/>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
<membergroups visible="yes"/>
|
||||
</memberdecl>
|
||||
<memberdef>
|
||||
<defines title=""/>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
</memberdef>
|
||||
<authorsection/>
|
||||
</file>
|
||||
|
||||
<!-- Layout definition for a group page -->
|
||||
<group>
|
||||
<briefdescription visible="yes"/>
|
||||
<groupgraph visible="$GROUP_GRAPHS"/>
|
||||
<memberdecl>
|
||||
<classes visible="yes" title=""/>
|
||||
<namespaces visible="yes" title=""/>
|
||||
<dirs visible="yes" title=""/>
|
||||
<nestedgroups visible="yes" title=""/>
|
||||
<files visible="yes" title=""/>
|
||||
<defines title=""/>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<enumvalues title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
<signals title=""/>
|
||||
<publicslots title=""/>
|
||||
<protectedslots title=""/>
|
||||
<privateslots title=""/>
|
||||
<events title=""/>
|
||||
<properties title=""/>
|
||||
<friends title=""/>
|
||||
<membergroups visible="yes"/>
|
||||
</memberdecl>
|
||||
<detaileddescription title=""/>
|
||||
<memberdef>
|
||||
<pagedocs/>
|
||||
<inlineclasses title=""/>
|
||||
<defines title=""/>
|
||||
<typedefs title=""/>
|
||||
<enums title=""/>
|
||||
<enumvalues title=""/>
|
||||
<functions title=""/>
|
||||
<variables title=""/>
|
||||
<signals title=""/>
|
||||
<publicslots title=""/>
|
||||
<protectedslots title=""/>
|
||||
<privateslots title=""/>
|
||||
<events title=""/>
|
||||
<properties title=""/>
|
||||
<friends title=""/>
|
||||
</memberdef>
|
||||
<authorsection visible="yes"/>
|
||||
</group>
|
||||
|
||||
<!-- Layout definition for a directory page -->
|
||||
<directory>
|
||||
<briefdescription visible="yes"/>
|
||||
<directorygraph visible="yes"/>
|
||||
<memberdecl>
|
||||
<dirs visible="yes"/>
|
||||
<files visible="yes"/>
|
||||
</memberdecl>
|
||||
<detaileddescription title=""/>
|
||||
</directory>
|
||||
</doxygenlayout>
|
1774
libcilkrts/runtime/doxygen.cfg
Normal file
1774
libcilkrts/runtime/doxygen.cfg
Normal file
File diff suppressed because it is too large
Load diff
597
libcilkrts/runtime/except-gcc.cpp
Normal file
597
libcilkrts/runtime/except-gcc.cpp
Normal file
|
@ -0,0 +1,597 @@
|
|||
/* except-gcc.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "except-gcc.h"
|
||||
#include "except.h"
|
||||
#include "sysdep.h"
|
||||
#include "bug.h"
|
||||
#include "local_state.h"
|
||||
#include "full_frame.h"
|
||||
#include "scheduler.h"
|
||||
#include "frame_malloc.h"
|
||||
#include "pedigrees.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <typeinfo>
|
||||
|
||||
#define DEBUG_EXCEPTIONS 0
|
||||
|
||||
struct pending_exception_info
|
||||
{
|
||||
void make(__cxa_eh_globals *, _Unwind_Exception *, bool);
|
||||
void destruct();
|
||||
bool empty() const;
|
||||
void check() const;
|
||||
/* Active exception at time of suspend. */
|
||||
_Unwind_Exception *active;
|
||||
/* If true the most recently caught exception is to be rethrown
|
||||
on resume. This handling is technically incorrect but allows
|
||||
running without compiler support; the proper standards-compliant
|
||||
method is to save the exception in the previous field. */
|
||||
bool rethrow;
|
||||
struct __cxa_eh_globals runtime_state;
|
||||
};
|
||||
|
||||
void pending_exception_info::check() const
|
||||
{
|
||||
if (active)
|
||||
CILK_ASSERT((int)runtime_state.uncaughtExceptions > 0);
|
||||
}
|
||||
|
||||
void pending_exception_info::make(__cxa_eh_globals *state_in,
|
||||
_Unwind_Exception *exc_in, bool rethrow_in)
|
||||
{
|
||||
active = exc_in;
|
||||
rethrow = rethrow_in;
|
||||
runtime_state = *state_in;
|
||||
/* Read and clear C++ runtime state. */
|
||||
state_in->caughtExceptions = 0;
|
||||
state_in->uncaughtExceptions = 0;
|
||||
#if CILK_LIB_DEBUG
|
||||
check();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
pending_exception_info::empty() const
|
||||
{
|
||||
return !active && !rethrow && !runtime_state.caughtExceptions &&
|
||||
!runtime_state.uncaughtExceptions;
|
||||
}
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
#include <stdio.h>
|
||||
static void
|
||||
decode_exceptions(char *out, size_t len, struct pending_exception_info *info)
|
||||
{
|
||||
if (info->empty())
|
||||
snprintf(out, len, "[empty]");
|
||||
else if (info->rethrow)
|
||||
snprintf(out, len, "[rethrow %p]",
|
||||
info->runtime_state.caughtExceptions);
|
||||
else
|
||||
snprintf(out, len, "[throw %p]", (void *)info->active);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
save_exception_info(__cilkrts_worker *w,
|
||||
__cxa_eh_globals *state,
|
||||
_Unwind_Exception *exc,
|
||||
bool rethrow,
|
||||
const char *why)
|
||||
{
|
||||
struct pending_exception_info *info =
|
||||
(struct pending_exception_info *)__cilkrts_frame_malloc(w, sizeof (struct pending_exception_info));
|
||||
CILK_ASSERT(info);
|
||||
info->make(state, exc, rethrow);
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
{
|
||||
char buf[40];
|
||||
decode_exceptions(buf, sizeof buf, info);
|
||||
fprintf(stderr, "make exception info W%u %p %s (%s)\n",
|
||||
w->self, info, buf, why);
|
||||
}
|
||||
#endif
|
||||
|
||||
CILK_ASSERT(w->l->pending_exception == 0);
|
||||
w->l->pending_exception = info;
|
||||
}
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
#include <stdio.h> /* DEBUG */
|
||||
|
||||
static void decode_flags(int flags, char out[9])
|
||||
{
|
||||
out[0] = (flags & CILK_FRAME_STOLEN) ? 'S' : '_';
|
||||
out[1] = (flags & CILK_FRAME_UNSYNCHED) ? 'U' : '_';
|
||||
out[2] = (flags & CILK_FRAME_DETACHED) ? 'D' : '_';
|
||||
out[3] = (flags & CILK_FRAME_EXCEPTING) ? 'X' : '_';
|
||||
out[4] = '\0';
|
||||
}
|
||||
#endif
|
||||
|
||||
/* __cilkrts_save_except is called from the runtime epilogue
|
||||
when a function is returning with an exception pending.
|
||||
|
||||
If the function has a parent to which it could return normally,
|
||||
return and have the caller call _Unwind_Resume, the same as if
|
||||
an exception filter had not matched.
|
||||
|
||||
Otherwise save the exception in the worker.
|
||||
|
||||
If this is a return from a ordinary call that must go through
|
||||
the runtime, the assembly epilogue must have saved the call-saved
|
||||
register state in the parent frame. */
|
||||
|
||||
extern "C"
|
||||
CILK_ABI_THROWS_VOID
|
||||
__cilkrts_return_exception(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cilkrts_worker *w = sf->worker;
|
||||
_Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data;
|
||||
|
||||
CILK_ASSERT(sf->flags & CILK_FRAME_DETACHED);
|
||||
sf->flags &= ~CILK_FRAME_DETACHED;
|
||||
|
||||
/*
|
||||
* If we are in replay mode, and a steal occurred during the recording
|
||||
* phase, stall till a steal actually occurs.
|
||||
*/
|
||||
replay_wait_for_steal_if_parent_was_stolen(w);
|
||||
|
||||
/* If this is to be an abnormal return, save the active exception. */
|
||||
if (!__cilkrts_pop_tail(w)) {
|
||||
/* Write a record to the replay log for an attempt to return to a
|
||||
stolen parent. This must be done before the exception handler
|
||||
invokes __cilkrts_leave_frame which will bump the pedigree so
|
||||
the replay_wait_for_steal_if_parent_was_stolen() above will match on
|
||||
replay */
|
||||
replay_record_orphaned(w);
|
||||
|
||||
/* Now that the record/replay stuff is done, update the pedigree */
|
||||
update_pedigree_on_leave_frame(w, sf);
|
||||
|
||||
/* Inline pop_frame; this may not be needed. */
|
||||
w->current_stack_frame = sf->call_parent;
|
||||
sf->call_parent = 0;
|
||||
__cxa_eh_globals *state = __cxa_get_globals();
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fflush(stdout);
|
||||
char decoded[9];
|
||||
decode_flags(sf->flags, decoded);
|
||||
fprintf(stderr, "__cilkrts_save_except W%u sf %p/%s exc %p [%u %p] suspend\n",
|
||||
w->self, sf, decoded, exc,
|
||||
state->uncaughtExceptions,
|
||||
state->caughtExceptions);
|
||||
#endif
|
||||
|
||||
/* Like __cilkrts_save_exception_state except for setting the
|
||||
rethrow flag. */
|
||||
save_exception_info(w, state, exc, exc == NULL, "save_except");
|
||||
{
|
||||
full_frame *ff = w->l->frame_ff;
|
||||
CILK_ASSERT(NULL == ff->pending_exception);
|
||||
ff->pending_exception = w->l->pending_exception;
|
||||
w->l->pending_exception = NULL;
|
||||
}
|
||||
__cilkrts_exception_from_spawn(w, sf); /* does not return */
|
||||
}
|
||||
/* This code path is taken when the parent is attached. It is on
|
||||
the same stack and part of the same full frame. The caller is
|
||||
cleaning up the Cilk frame during unwind and will reraise the
|
||||
exception */
|
||||
|
||||
/* Now that the record/replay stuff is done, update the pedigree */
|
||||
update_pedigree_on_leave_frame(w, sf);
|
||||
|
||||
#if DEBUG_EXCEPTIONS /* DEBUG ONLY */
|
||||
{
|
||||
__cxa_eh_globals *state = __cxa_get_globals();
|
||||
|
||||
fflush(stdout);
|
||||
char decoded[9];
|
||||
decode_flags(sf->flags, decoded);
|
||||
fprintf(stderr, "__cilkrts_save_except W%d %p/%s %p->%p [%u %p] escape\n",
|
||||
w->self, sf, decoded, exc,
|
||||
exc ? to_cxx(exc)->nextException : 0,
|
||||
state->uncaughtExceptions,
|
||||
state->caughtExceptions);
|
||||
|
||||
/* XXX This is triggering in the user thread which gets an exception
|
||||
from somewhere but does not get the corresponding runtime exception
|
||||
state.
|
||||
XXX There might be two or more uncaught exceptions. Test could be
|
||||
(uncaught != 0) == (exc != 0). First, design tests to see if that
|
||||
case is otherwise handled correctly. And what if there's an uncaught
|
||||
exception that does not belong to this function? I.e. this is a return
|
||||
from spawn in a destructor. */
|
||||
if (exc)
|
||||
CILK_ASSERT((int)state->uncaughtExceptions > 0);
|
||||
/*CILK_ASSERT(state->uncaughtExceptions == (exc != 0));*/
|
||||
}
|
||||
#endif
|
||||
|
||||
/* The parent is attached so this exception can be propagated normally. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Save the exception state into the full frame, which is exiting
|
||||
or suspending. */
|
||||
extern "C"
|
||||
void __cilkrts_save_exception_state(__cilkrts_worker *w, full_frame *ff)
|
||||
{
|
||||
save_exception_info(w, __cxa_get_globals(), 0, false, "undo-detach");
|
||||
CILK_ASSERT(NULL == ff->pending_exception);
|
||||
ff->pending_exception = w->l->pending_exception;
|
||||
w->l->pending_exception = NULL;
|
||||
}
|
||||
|
||||
/* __cilkrts_c_sync_except is like __cilkrts_c_sync except that it
|
||||
saves exception state. __cilkrts_c_sync never returns here and
|
||||
always reinstalls the saved exception state.
|
||||
|
||||
This function must be used because a parent of this function may
|
||||
be propagating an uncaught exception. The uncaught exception
|
||||
count must be saved by the child and passed back to the parent. */
|
||||
|
||||
extern "C"
|
||||
NORETURN __cilkrts_c_sync_except (__cilkrts_worker *w, __cilkrts_stack_frame *sf)
|
||||
{
|
||||
__cxa_eh_globals *state = __cxa_get_globals();
|
||||
_Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data;
|
||||
|
||||
CILK_ASSERT((sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)) ==
|
||||
(CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING));
|
||||
sf->flags &= ~CILK_FRAME_EXCEPTING;
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fflush(stdout);
|
||||
char decoded[9];
|
||||
decode_flags(sf->flags, decoded);
|
||||
if (exc)
|
||||
fprintf(stderr, "__cilkrts_sync_except W%u %p/%s %p->%p [%u %p]\n",
|
||||
w->self, sf, decoded, exc,
|
||||
to_cxx(exc)->nextException,
|
||||
state->uncaughtExceptions,
|
||||
state->caughtExceptions);
|
||||
else
|
||||
fprintf(stderr, "__cilkrts_sync_except W%d %p/%s none [%u %p]\n",
|
||||
w->self, sf, decoded,
|
||||
state->uncaughtExceptions,
|
||||
state->caughtExceptions);
|
||||
#endif
|
||||
|
||||
/* Here the identity of an rethrown exception is always known.
|
||||
If exc is NULL this call is only to preserve parent state. */
|
||||
save_exception_info(w, state, exc, false, "sync_except");
|
||||
#if 0
|
||||
{
|
||||
full_frame *ff = w->l->frame_ff;
|
||||
CILK_ASSERT(NULL == ff->pending_exception);
|
||||
ff->pending_exception = w->l->pending_exception;
|
||||
w->l->pending_exception = NULL;
|
||||
}
|
||||
#endif
|
||||
CILK_ASSERT(!std::uncaught_exception());
|
||||
__cilkrts_c_sync(w, sf);
|
||||
}
|
||||
|
||||
void
|
||||
pending_exception_info::destruct()
|
||||
{
|
||||
if (active) {
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fprintf(stderr, "destroy exception info %p %p\n", this, active);
|
||||
#endif
|
||||
_Unwind_DeleteException(active);
|
||||
active = 0;
|
||||
} else {
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fprintf(stderr, "destroy exception info %p\n", this);
|
||||
#endif
|
||||
}
|
||||
while (runtime_state.caughtExceptions) {
|
||||
__cxa_exception *exc = runtime_state.caughtExceptions;
|
||||
runtime_state.caughtExceptions = exc->nextException;
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fprintf(stderr, "destroy caught exception %p\n", this);
|
||||
#endif
|
||||
_Unwind_DeleteException(&exc->unwindHeader);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_merge_pending_exceptions
|
||||
*
|
||||
* Merge the right exception record into the left. The left is logically
|
||||
* earlier.
|
||||
*
|
||||
* The active exception of E is
|
||||
* E->active if it is non-NULL (in which case E->rethrow is false)
|
||||
* unresolved if E->active is NULL and E->rethrow is true
|
||||
* nil if E->active is NULL and E->rethrow is false
|
||||
*
|
||||
* The merged active exception is left active exception if it is not
|
||||
* nil, otherwise the right.
|
||||
*
|
||||
* On entry the left state is synched and can not have an unresolved
|
||||
* exception. The merge may result in an unresolved exception.
|
||||
*
|
||||
* Due to scoping rules at most one of the caught exception lists is
|
||||
* non-NULL.
|
||||
*/
|
||||
|
||||
struct pending_exception_info *
|
||||
__cilkrts_merge_pending_exceptions (
|
||||
__cilkrts_worker *w,
|
||||
struct pending_exception_info *left,
|
||||
struct pending_exception_info *right)
|
||||
{
|
||||
/* If we've only got one exception, return it */
|
||||
|
||||
if (NULL == left) {
|
||||
#if DEBUG_EXCEPTIONS
|
||||
if (right) {
|
||||
char buf[40];
|
||||
decode_exceptions(buf, sizeof buf, right);
|
||||
fprintf(stderr, "__cilkrts merge W%u nil %p -> %p %s\n",
|
||||
w->self, right, right, buf);
|
||||
}
|
||||
#endif
|
||||
return right;
|
||||
}
|
||||
|
||||
if (NULL == right) {
|
||||
#if DEBUG_EXCEPTIONS
|
||||
if (left) {
|
||||
char buf[40];
|
||||
decode_exceptions(buf, sizeof buf, left);
|
||||
fprintf(stderr, "__cilkrts merge W%u %p nil -> %p %s\n",
|
||||
w->self, left, left, buf);
|
||||
}
|
||||
#endif
|
||||
return left;
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
/*volatile struct pending_exception_info left_in = *left, right_in = *right;*/
|
||||
left->check();
|
||||
right->check();
|
||||
#endif
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
{
|
||||
char buf1[40], buf2[40];
|
||||
decode_exceptions(buf1, sizeof buf1, left);
|
||||
decode_exceptions(buf2, sizeof buf2, right);
|
||||
fprintf(stderr, "__cilkrts merge W%u %p %s %p %s\n",
|
||||
w->self, left, buf1, right, buf2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* It should not be possible for both left and right to
|
||||
have accumulated catch blocks.
|
||||
|
||||
The left exception record may always have a catch
|
||||
chain it kept when its parent was stolen.
|
||||
|
||||
If they are siblings, the right sibling should not
|
||||
have accumulated any net catches. (Catch is lexically
|
||||
scoped.)
|
||||
|
||||
If the right frame is a parent, it should not have entered
|
||||
a catch block without syncing first. If it spawned in a
|
||||
catch block, the child got its catch. */
|
||||
__cxa_exception *caught = left->runtime_state.caughtExceptions;
|
||||
if (caught)
|
||||
CILK_ASSERT(!right->runtime_state.caughtExceptions);
|
||||
else {
|
||||
CILK_ASSERT(!left->rethrow);
|
||||
left->rethrow = right->rethrow;
|
||||
left->runtime_state.caughtExceptions = caught = right->runtime_state.caughtExceptions;
|
||||
right->runtime_state.caughtExceptions = NULL;
|
||||
}
|
||||
|
||||
/* Merge the uncaught exception and count of uncaught exceptions. */
|
||||
const unsigned int right_uncaught = right->runtime_state.uncaughtExceptions;
|
||||
if (!left->active){
|
||||
left->active = right->active; /* could be NULL */
|
||||
right->active = 0;
|
||||
left->runtime_state.uncaughtExceptions += right_uncaught;
|
||||
if (left->active)
|
||||
/* assert is C++ exception */
|
||||
/*CILK_ASSERT(__cxxabiv1::__is_gxx_exception_class(left->active->exception_class))*/;
|
||||
} else {
|
||||
/* Subtract 1 if the right exception is being destructed. */
|
||||
left->runtime_state.uncaughtExceptions += right_uncaught - (right->active != 0);
|
||||
}
|
||||
|
||||
right->destruct();
|
||||
__cilkrts_frame_free(w, right, sizeof *right);
|
||||
|
||||
/* If there is no state left, return NULL. */
|
||||
if (left->empty()) {
|
||||
left->destruct();
|
||||
__cilkrts_frame_free(w, left, sizeof *left);
|
||||
left = NULL;
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
if (left)
|
||||
left->check();
|
||||
#endif
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* __cilkrts_c_resume_except is called from the assembly language
|
||||
restart code when a resumed frame has a pending exception.
|
||||
|
||||
The handler count negation on rethrow was done when the throw was
|
||||
resolved.
|
||||
|
||||
The assembly language runtime must make the throw unwind to
|
||||
the sync, spawn, or other location where the exception should
|
||||
be injected. (This should not happen after a spawn but nothing
|
||||
here depends on there being no exception on steal.)
|
||||
|
||||
This function is unused in the Intel stack based system. */
|
||||
extern "C"
|
||||
void __cilkrts_c_resume_except (_Unwind_Exception *exc)
|
||||
{
|
||||
#if DEBUG_EXCEPTIONS
|
||||
fprintf(stderr, "resume exception %p\n", exc);
|
||||
#endif
|
||||
_Unwind_Reason_Code why = _Unwind_RaiseException(exc);
|
||||
__cilkrts_bug ("Cilk runtime error: failed to reinstate suspended exception %p (%d)\n", exc, why);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Restore the caught exception chain. This assumes no C++ exception
|
||||
code will run before the frame is resumed. If there is no exception
|
||||
to be resumed free the object. */
|
||||
|
||||
extern "C"
|
||||
void __cilkrts_setup_for_execution_sysdep(__cilkrts_worker *w, full_frame *ff)
|
||||
{
|
||||
// ASSERT: We own w->lock and ff->lock || P == 1
|
||||
|
||||
__cxa_eh_globals *state = __cxa_get_globals ();
|
||||
struct pending_exception_info *info = w->l->pending_exception;
|
||||
|
||||
if (info == NULL)
|
||||
return;
|
||||
|
||||
w->l->pending_exception = 0;
|
||||
|
||||
#if DEBUG_EXCEPTIONS
|
||||
_Unwind_Exception *exc = info->active;
|
||||
if (exc) {
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "__cilkrts_resume_except W%u %p->%p [%u %p]\n",
|
||||
w->self, exc,
|
||||
to_cxx(exc)->nextException,
|
||||
info->runtime_state.uncaughtExceptions,
|
||||
info->runtime_state.caughtExceptions);
|
||||
/*CILK_ASSERT(info->runtime_state.uncaughtExceptions > 0);*/
|
||||
}
|
||||
#endif
|
||||
|
||||
if (state->uncaughtExceptions || state->caughtExceptions)
|
||||
__cilkrts_bug("W%u: resuming with non-empty prior exception state %u %p\n", state->uncaughtExceptions, state->caughtExceptions);
|
||||
|
||||
*state = info->runtime_state;
|
||||
info->runtime_state.caughtExceptions = 0;
|
||||
info->runtime_state.uncaughtExceptions = 0;
|
||||
|
||||
if (info->rethrow) {
|
||||
info->rethrow = false;
|
||||
/* Resuming function will rethrow. Runtime calls
|
||||
std::terminate if there is no caught exception. */
|
||||
ff->call_stack->flags |= CILK_FRAME_EXCEPTING;
|
||||
}
|
||||
if (info->active) {
|
||||
ff->call_stack->flags |= CILK_FRAME_EXCEPTING;
|
||||
ff->call_stack->except_data = info->active;
|
||||
info->active = 0;
|
||||
}
|
||||
|
||||
if (info->empty()) {
|
||||
info->destruct();
|
||||
__cilkrts_frame_free(w, info, sizeof *info);
|
||||
w->l->pending_exception = NULL;
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
if (ff->call_stack->except_data)
|
||||
CILK_ASSERT(std::uncaught_exception());
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern "C"
|
||||
struct pending_exception_info *__cilkrts_get_exception(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf)
|
||||
{
|
||||
struct pending_exception_info *info = w->l->pending_exception;
|
||||
|
||||
if (info == NULL) {
|
||||
sf->flags &= ~CILK_FRAME_EXCEPTING;
|
||||
return 0;
|
||||
}
|
||||
|
||||
w->l->pending_exception = NULL;
|
||||
|
||||
/* This exception goes into the frame. */
|
||||
|
||||
_Unwind_Exception *exc = info->active;
|
||||
info->active = NULL;
|
||||
info->destruct();
|
||||
__cilkrts_frame_free(w, info, sizeof *info);
|
||||
info = 0;
|
||||
sf->flags |= CILK_FRAME_EXCEPTING;
|
||||
sf->exception = exc;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
void __attribute__((nonnull)) __cilkrts_gcc_rethrow(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
#ifdef __CYGWIN__
|
||||
// Cygwin doesn't support exceptions, so _Unwind_Resume isn't available
|
||||
// Which means we can't support exceptions either
|
||||
__cilkrts_bug("The Cygwin implementation of the Intel Cilk Plus runtime doesn't support exceptions\n");
|
||||
#else
|
||||
if (sf->except_data) {
|
||||
#if CILK_LIB_DEBUG
|
||||
CILK_ASSERT(std::uncaught_exception());
|
||||
#endif
|
||||
_Unwind_Resume ((_Unwind_Exception *)sf->except_data);
|
||||
} else {
|
||||
throw;
|
||||
}
|
||||
#endif // __CYGWIN__
|
||||
}
|
||||
|
||||
/* End except-gcc.cpp */
|
||||
|
146
libcilkrts/runtime/except-gcc.h
Normal file
146
libcilkrts/runtime/except-gcc.h
Normal file
|
@ -0,0 +1,146 @@
|
|||
/* except-gcc.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file except-gcc.h
|
||||
*
|
||||
* @brief ABI for gcc exception handling.
|
||||
*
|
||||
* @par Origin
|
||||
* The code below is generally copied from the Intel Itanium ABI (Intel
|
||||
* download 245370).
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_EXCEPT_GCC_DOT_H
|
||||
#define INCLUDED_EXCEPT_GCC_DOT_H
|
||||
|
||||
#ifndef __cplusplus
|
||||
# error except-gcc.h should be used in C++ code only.
|
||||
#endif
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <exception>
|
||||
#include <typeinfo>
|
||||
|
||||
struct __cxa_exception;
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Unwind reason code (Itanium ABI 6.1.2.1) */
|
||||
typedef enum _Unwind_Reason_Code {
|
||||
_URC_NO_REASON = 0,
|
||||
_URC_FOREIGN_EXCEPTION_CAUGHT = 1,
|
||||
_URC_FATAL_PHASE2_ERROR = 2,
|
||||
_URC_FATAL_PHASE1_ERROR = 3,
|
||||
_URC_NORMAL_STOP = 4,
|
||||
_URC_END_OF_STACK = 5,
|
||||
_URC_HANDLER_FOUND = 6,
|
||||
_URC_INSTALL_CONTEXT = 7,
|
||||
_URC_CONTINUE_UNWIND = 8
|
||||
} _Unwind_Reason_Code;
|
||||
|
||||
typedef struct _Unwind_Exception _Unwind_Exception;
|
||||
|
||||
/** Exception cleanup function pointer (Itanium ABI 6.1.2.2) */
|
||||
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code reason,
|
||||
_Unwind_Exception *exc);
|
||||
|
||||
/**
|
||||
* @brief Exception undwinding information
|
||||
*
|
||||
* This is copied from the Intel Itanium ABI except that the
|
||||
* private fields are declared unsigned long for binary
|
||||
* compatibility with gcc/g++ on 32 bit machines.
|
||||
*/
|
||||
struct _Unwind_Exception
|
||||
{
|
||||
uint64_t exception_class;
|
||||
_Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||||
unsigned long private_1;
|
||||
unsigned long private_2;
|
||||
};
|
||||
|
||||
/** Throw or rethrow an exception */
|
||||
_Unwind_Reason_Code
|
||||
_Unwind_RaiseException(_Unwind_Exception *exception_object);
|
||||
|
||||
/** Resume an exception other than by rethrowing it. */
|
||||
void _Unwind_Resume(_Unwind_Exception *exception_object);
|
||||
|
||||
/** Delete an exception object */
|
||||
void _Unwind_DeleteException(_Unwind_Exception *exception_object);
|
||||
|
||||
/**
|
||||
* C++ exception ABI.
|
||||
* The following declarations are from
|
||||
*
|
||||
* http://www.codesourcery.com/public/cxx-abi/abi-eh.html#cxx-abi
|
||||
*/
|
||||
|
||||
struct __cxa_exception {
|
||||
std::type_info * exceptionType;
|
||||
void (*exceptionDestructor)(void *);
|
||||
std::unexpected_handler unexpectedHandler;
|
||||
std::terminate_handler terminateHandler;
|
||||
__cxa_exception * nextException;
|
||||
|
||||
int handlerCount;
|
||||
int handlerSwitchValue;
|
||||
const char * actionRecord;
|
||||
const char * languageSpecificData;
|
||||
void * catchTemp;
|
||||
void * adjustedPtr;
|
||||
|
||||
_Unwind_Exception unwindHeader;
|
||||
};
|
||||
|
||||
static inline __cxa_exception *to_cxx(_Unwind_Exception *e)
|
||||
{
|
||||
return ((__cxa_exception *)(e+1)) - 1;
|
||||
}
|
||||
|
||||
typedef struct __cxa_eh_globals {
|
||||
__cxa_exception *caughtExceptions;
|
||||
unsigned int uncaughtExceptions;
|
||||
} __cxa_eh_globals;
|
||||
|
||||
__cxa_eh_globals*__cxa_get_globals(void) throw();
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_EXCEPT_GCC_DOT_H)
|
123
libcilkrts/runtime/except.h
Normal file
123
libcilkrts/runtime/except.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
/* except.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file except.h
|
||||
*
|
||||
* @brief Common definitions for the various implementations of exception
|
||||
* handling.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_EXCEPT_DOT_H
|
||||
#define INCLUDED_EXCEPT_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <internal/abi.h>
|
||||
#include "full_frame.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* OS-dependent information about an exception that's being moved between
|
||||
* strands.
|
||||
*/
|
||||
typedef struct pending_exception_info pending_exception_info;
|
||||
|
||||
/**
|
||||
* Merge the right exception record into the left. The left is logically
|
||||
* earlier.
|
||||
*
|
||||
* On entry the left state is synched and can not have an unresolved
|
||||
* exception. The merge may result in an unresolved exception.
|
||||
*
|
||||
* If there is both a right and left exception, the right exception will
|
||||
* be disposed of in preference to the left exception, destructing the
|
||||
* exception object.
|
||||
*
|
||||
* @param w The worker that is preparing to resume execution.
|
||||
* @param left_exception The exception that would have happened earlier
|
||||
* if the code executed serially. Can be NULL if the left strand has not
|
||||
* raised an exception.
|
||||
* @param right_exception The exception that would have happened later
|
||||
* if the code executed serially. Can be NULL if the right strand has not
|
||||
* raised an exception.
|
||||
*
|
||||
* @return NULL if there both the right and left exception are NULL. This
|
||||
* indicates that there are no pending exceptions.
|
||||
* @return The pending exception that is to be raised to continue searching
|
||||
* for a catch block to handle the exception.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
struct pending_exception_info *__cilkrts_merge_pending_exceptions(
|
||||
__cilkrts_worker *w,
|
||||
pending_exception_info *left_exception,
|
||||
pending_exception_info *right_exception);
|
||||
|
||||
/**
|
||||
* Move the exception information from the worker to the full_frame.
|
||||
*
|
||||
* @param w The worker which is suspending work on a full_frame.
|
||||
* @param ff The full_frame which is being suspended.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_save_exception_state(__cilkrts_worker *w,
|
||||
full_frame *ff);
|
||||
|
||||
/**
|
||||
* Function to delete pending exception. This will delete the
|
||||
* exception object and then free the stack/fiber.
|
||||
*
|
||||
* @param w The worker we're running on.
|
||||
* @param pei The pending exception to be delete
|
||||
* @param delete_object Unused. Should always be 1.
|
||||
*/
|
||||
void delete_exception_obj (__cilkrts_worker *w,
|
||||
struct pending_exception_info *pei,
|
||||
int delete_object);
|
||||
|
||||
#ifndef _WIN32
|
||||
/* gcc-style exception handling */
|
||||
NON_COMMON NORETURN __cilkrts_c_sync_except(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf);
|
||||
NON_COMMON void __attribute__((nonnull))
|
||||
__cilkrts_gcc_rethrow(__cilkrts_stack_frame *sf);
|
||||
#endif
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_EXCEPT_DOT_H)
|
462
libcilkrts/runtime/frame_malloc.c
Normal file
462
libcilkrts/runtime/frame_malloc.c
Normal file
|
@ -0,0 +1,462 @@
|
|||
/* frame_malloc.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "frame_malloc.h"
|
||||
#include "bug.h"
|
||||
#include "local_state.h"
|
||||
#include "cilk_malloc.h"
|
||||
|
||||
#ifndef __VXWORKS__
|
||||
#include <memory.h>
|
||||
#endif
|
||||
|
||||
/* #define USE_MMAP 1 */
|
||||
#if USE_MMAP
|
||||
#define __USE_MISC 1
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
// Define to fill the stack frame header with the fill character when pushing
|
||||
// it on a free list. Note that this should be #ifdef'd out when checked in!
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define HEADER_FILL_CHAR 0xbf
|
||||
#endif
|
||||
|
||||
// HEADER_FILL_CHAR should not be defined when checked in, so put out a warning
|
||||
// message if this is a release build
|
||||
|
||||
#if defined(NDEBUG) && defined (HEADER_FILL_CHAR)
|
||||
#pragma message ("Warning: HEADER_FILL_CHAR defined for a release build")
|
||||
#endif
|
||||
|
||||
static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size);
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] =
|
||||
{
|
||||
64, 128, 256, 512, 1024, 2048
|
||||
};
|
||||
|
||||
#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket]
|
||||
|
||||
/* threshold above which we use slow malloc */
|
||||
#define FRAME_MALLOC_MAX_SIZE 2048
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
/* Note that this must match the implementation of framesz_to_bucket in
|
||||
* asmilator/layout.ml! */
|
||||
#define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket)))
|
||||
|
||||
/* threshold above which we use slow malloc */
|
||||
#define FRAME_MALLOC_MAX_SIZE \
|
||||
FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1)
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
/* utility procedures */
|
||||
static void push(struct free_list **b, struct free_list *p)
|
||||
{
|
||||
#ifdef HEADER_FILL_CHAR
|
||||
memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0));
|
||||
#endif
|
||||
/* cons! onto free list */
|
||||
p->cdr = *b;
|
||||
*b = p;
|
||||
}
|
||||
|
||||
static struct free_list *pop(struct free_list **b)
|
||||
{
|
||||
struct free_list *p = *b;
|
||||
if (p)
|
||||
*b = p->cdr;
|
||||
return p;
|
||||
}
|
||||
|
||||
/*************************************************************
|
||||
global allocator:
|
||||
*************************************************************/
|
||||
/* request slightly less than 2^K from the OS, which after malloc
|
||||
overhead and alignment should end up filling each VM page almost
|
||||
completely. 128 is a guess of the total malloc overhead and cache
|
||||
line alignment */
|
||||
#define FRAME_MALLOC_CHUNK (32 * 1024 - 128)
|
||||
|
||||
/** Implements linked list of frames */
|
||||
struct pool_cons {
|
||||
char *p; /**< This element of the list */
|
||||
struct pool_cons *cdr; /**< Remainder of the list */
|
||||
};
|
||||
|
||||
static void extend_global_pool(global_state_t *g)
|
||||
{
|
||||
/* FIXME: memalign to a cache line? */
|
||||
struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c));
|
||||
g->frame_malloc.pool_begin =
|
||||
(char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK);
|
||||
g->frame_malloc.pool_end =
|
||||
g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK;
|
||||
g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK;
|
||||
c->p = g->frame_malloc.pool_begin;
|
||||
c->cdr = g->frame_malloc.pool_list;
|
||||
g->frame_malloc.pool_list = c;
|
||||
}
|
||||
|
||||
/* the size is already canonicalized at this point */
|
||||
static struct free_list *global_alloc(global_state_t *g, int bucket)
|
||||
{
|
||||
struct free_list *mem;
|
||||
size_t size;
|
||||
|
||||
CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
|
||||
size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
g->frame_malloc.allocated_from_global_pool += size;
|
||||
|
||||
if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) {
|
||||
|
||||
CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end);
|
||||
if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) {
|
||||
/* We waste the fragment of pool. */
|
||||
g->frame_malloc.wasted +=
|
||||
g->frame_malloc.pool_end - g->frame_malloc.pool_begin;
|
||||
extend_global_pool(g);
|
||||
}
|
||||
mem = (struct free_list *)g->frame_malloc.pool_begin;
|
||||
g->frame_malloc.pool_begin += size;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
static void global_free(global_state_t *g, void *mem, int bucket)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS);
|
||||
size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
g->frame_malloc.allocated_from_global_pool -= size;
|
||||
|
||||
push(&g->frame_malloc.global_free_list[bucket], mem);
|
||||
}
|
||||
|
||||
void __cilkrts_frame_malloc_global_init(global_state_t *g)
|
||||
{
|
||||
int i;
|
||||
|
||||
__cilkrts_mutex_init(&g->frame_malloc.lock);
|
||||
g->frame_malloc.check_for_leaks = 1;
|
||||
g->frame_malloc.pool_list = 0;
|
||||
g->frame_malloc.pool_begin = 0;
|
||||
g->frame_malloc.pool_end = 0;
|
||||
g->frame_malloc.batch_size = 8000;
|
||||
g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size;
|
||||
g->frame_malloc.allocated_from_os = 0;
|
||||
g->frame_malloc.allocated_from_global_pool = 0;
|
||||
g->frame_malloc.wasted = 0;
|
||||
for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
|
||||
g->frame_malloc.global_free_list[i] = 0;
|
||||
}
|
||||
|
||||
// Counts how many bytes are in the global free list.
|
||||
static size_t count_memory_in_global_list(global_state_t *g)
|
||||
{
|
||||
|
||||
// Count the memory remaining in the global free list.
|
||||
size_t size_remaining_in_global_list = 0;
|
||||
int i;
|
||||
for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
|
||||
struct free_list *p;
|
||||
size_t size_in_bucket = 0;
|
||||
p = g->frame_malloc.global_free_list[i];
|
||||
|
||||
while (p) {
|
||||
size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i);
|
||||
p = p->cdr;
|
||||
}
|
||||
size_remaining_in_global_list += size_in_bucket;
|
||||
}
|
||||
return size_remaining_in_global_list;
|
||||
}
|
||||
|
||||
|
||||
void __cilkrts_frame_malloc_global_cleanup(global_state_t *g)
|
||||
{
|
||||
struct pool_cons *c;
|
||||
|
||||
if (g->frame_malloc.check_for_leaks) {
|
||||
size_t memory_in_global_list = count_memory_in_global_list(g);
|
||||
// TBD: This check is weak. Short of memory corruption,
|
||||
// I don't see how we have more memory in the free list
|
||||
// than allocated from the os.
|
||||
// Ideally, we should count the memory in the global free list
|
||||
// and check that we have it all. But I believe the runtime
|
||||
// itself also uses some memory, which is not being tracked.
|
||||
if (memory_in_global_list > g->frame_malloc.allocated_from_os) {
|
||||
__cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n");
|
||||
}
|
||||
}
|
||||
|
||||
while ((c = g->frame_malloc.pool_list)) {
|
||||
g->frame_malloc.pool_list = c->cdr;
|
||||
__cilkrts_free(c->p);
|
||||
__cilkrts_free(c);
|
||||
}
|
||||
|
||||
__cilkrts_mutex_destroy(0, &g->frame_malloc.lock);
|
||||
|
||||
// Check that all the memory moved from the global pool into
|
||||
// workers has been returned to the global pool.
|
||||
if (g->frame_malloc.check_for_leaks
|
||||
&& (g->frame_malloc.allocated_from_global_pool != 0))
|
||||
{
|
||||
__cilkrts_bug("\n"
|
||||
"---------------------------" "\n"
|
||||
" MEMORY LEAK DETECTED!!! " "\n"
|
||||
"---------------------------" "\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/*************************************************************
|
||||
per-worker allocator
|
||||
*************************************************************/
|
||||
/* allocate a batch of frames of size SIZE from the global pool and
|
||||
store them in the worker's free list */
|
||||
static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size)
|
||||
{
|
||||
global_state_t *g = w->g;
|
||||
|
||||
__cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
|
||||
#if USE_MMAP
|
||||
char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||
if (p == MAP_FAILED)
|
||||
__cilkrts_bug("mmap failed %d", errno);
|
||||
assert(size < 4096);
|
||||
assert(p != MAP_FAILED);
|
||||
mprotect(p, 4096, PROT_NONE);
|
||||
mprotect(p + 8192, 4096, PROT_NONE);
|
||||
w->l->bucket_potential[bucket] += size;
|
||||
push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size));
|
||||
#else
|
||||
size_t bytes_allocated = 0;
|
||||
do {
|
||||
w->l->bucket_potential[bucket] += size;
|
||||
bytes_allocated += size;
|
||||
push(&w->l->free_list[bucket], global_alloc(g, bucket));
|
||||
} while (bytes_allocated < g->frame_malloc.batch_size);
|
||||
#endif
|
||||
} __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
|
||||
|
||||
}
|
||||
|
||||
static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size)
|
||||
{
|
||||
struct free_list *p, *q;
|
||||
global_state_t *g = w->g;
|
||||
size_t pot = w->l->bucket_potential[bucket];
|
||||
size_t newpot;
|
||||
|
||||
/* Keep up to POT/2 elements in the free list. The cost of
|
||||
counting up to POT/2 is amortized against POT. */
|
||||
newpot = 0;
|
||||
for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot;
|
||||
p = p->cdr, newpot += size)
|
||||
;
|
||||
w->l->bucket_potential[bucket] = newpot;
|
||||
|
||||
if (p) {
|
||||
/* free the rest of the list. The cost of grabbing the lock
|
||||
is amortized against POT/2; the cost of traversing the rest
|
||||
of the list is amortized against the free operation that
|
||||
puts the element on the list. */
|
||||
__cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
|
||||
while ((q = pop(&p->cdr)))
|
||||
#if USE_MMAP
|
||||
munmap((char *)q + size - 8192, 12288);
|
||||
#else
|
||||
global_free(g, q, bucket);
|
||||
#endif
|
||||
} __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
|
||||
}
|
||||
}
|
||||
|
||||
// Free all the memory in this bucket for the specified worker,
|
||||
// returning it to the global pool's free list.
|
||||
static void move_bucket_to_global_free_list(__cilkrts_worker *w,
|
||||
int bucket)
|
||||
{
|
||||
struct free_list *p, *q;
|
||||
global_state_t *g = w->g;
|
||||
p = w->l->free_list[bucket];
|
||||
|
||||
if (p) {
|
||||
__cilkrts_mutex_lock(w, &g->frame_malloc.lock); {
|
||||
while ((q = pop(&p))) {
|
||||
#if USE_MMAP
|
||||
size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
munmap((char *)q + size - 8192, 12288);
|
||||
#else
|
||||
global_free(g, q, bucket);
|
||||
#endif
|
||||
}
|
||||
} __cilkrts_mutex_unlock(w, &g->frame_malloc.lock);
|
||||
}
|
||||
|
||||
// I'm not sure this does anything useful now, since
|
||||
// the worker is about to be destroyed. But why not?
|
||||
w->l->bucket_potential[bucket] = 0;
|
||||
}
|
||||
|
||||
static int bucket_of_size(size_t size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i)
|
||||
if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i))
|
||||
return i;
|
||||
|
||||
CILK_ASSERT(0 /* can't happen */);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t __cilkrts_frame_malloc_roundup(size_t size)
|
||||
{
|
||||
if (size > FRAME_MALLOC_MAX_SIZE) {
|
||||
/* nothing, leave it alone */
|
||||
} else {
|
||||
int bucket = bucket_of_size(size);
|
||||
size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t __cilkrts_size_of_bucket(int bucket)
|
||||
{
|
||||
CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS);
|
||||
return FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
}
|
||||
|
||||
void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
|
||||
{
|
||||
int bucket;
|
||||
void *mem;
|
||||
|
||||
/* if too large, or if no worker, fall back to __cilkrts_malloc() */
|
||||
if (!w || size > FRAME_MALLOC_MAX_SIZE) {
|
||||
NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
|
||||
return __cilkrts_malloc(size);
|
||||
}
|
||||
|
||||
START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
|
||||
bucket = bucket_of_size(size);
|
||||
size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
|
||||
while (!(mem = pop(&w->l->free_list[bucket]))) {
|
||||
/* get a batch of frames from the global pool */
|
||||
START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
|
||||
allocate_batch(w, bucket, size);
|
||||
} STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
|
||||
}
|
||||
} STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size)
|
||||
{
|
||||
int bucket;
|
||||
struct free_list *p = (struct free_list *)p0;
|
||||
|
||||
/* if too large, or if no worker, fall back to __cilkrts_free() */
|
||||
if (!w || size > FRAME_MALLOC_MAX_SIZE) {
|
||||
NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE);
|
||||
__cilkrts_free(p);
|
||||
return;
|
||||
}
|
||||
|
||||
#if CILK_LIB_DEBUG
|
||||
*(volatile long *)w;
|
||||
#endif
|
||||
|
||||
START_INTERVAL(w, INTERVAL_FRAME_FREE); {
|
||||
bucket = bucket_of_size(size);
|
||||
size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);
|
||||
w->l->bucket_potential[bucket] += size;
|
||||
push(&w->l->free_list[bucket], p);
|
||||
if (w->l->bucket_potential[bucket] >
|
||||
w->g->frame_malloc.potential_limit) {
|
||||
START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) {
|
||||
gc_bucket(w, bucket, size);
|
||||
} STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL);
|
||||
}
|
||||
} STOP_INTERVAL(w, INTERVAL_FRAME_FREE);
|
||||
}
|
||||
|
||||
void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w)
|
||||
{
|
||||
int i;
|
||||
local_state *l = w->l;
|
||||
|
||||
for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
|
||||
l->free_list[i] = 0;
|
||||
l->bucket_potential[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w)
|
||||
{
|
||||
int i;
|
||||
// Move memory to the global pool. This operation
|
||||
// ensures the memory does not become unreachable / leak
|
||||
// when the worker is destroyed.
|
||||
for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) {
|
||||
move_bucket_to_global_free_list(w, i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Local Variables: **
|
||||
c-file-style:"bsd" **
|
||||
c-basic-offset:4 **
|
||||
indent-tabs-mode:nil **
|
||||
End: **
|
||||
*/
|
205
libcilkrts/runtime/frame_malloc.h
Normal file
205
libcilkrts/runtime/frame_malloc.h
Normal file
|
@ -0,0 +1,205 @@
|
|||
/* frame_malloc.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file frame_malloc.h
|
||||
*
|
||||
* @brief The frame allocation routines manage memory in a per-worker pool.
|
||||
*
|
||||
* The name "frame malloc" refers to an earlier implementation of Cilk which
|
||||
* allocated frames from the heap using this allocator.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FRAME_MALLOC_DOT_H
|
||||
#define INCLUDED_FRAME_MALLOC_DOT_H
|
||||
|
||||
#include "worker_mutex.h"
|
||||
#include "rts-common.h"
|
||||
#include <internal/abi.h> // __cilkrts_worker
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <cstddef>
|
||||
#else
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Number of buckets. Gives us buckets to hold 64, 128, 256, 512, 1024
|
||||
* and 2048 bytes
|
||||
*/
|
||||
#define FRAME_MALLOC_NBUCKETS 6
|
||||
|
||||
/** Layout of frames when unallocated */
|
||||
struct free_list {
|
||||
/** Pointer to next free frame */
|
||||
struct free_list *cdr;
|
||||
};
|
||||
|
||||
/** per-worker memory cache */
|
||||
struct __cilkrts_frame_cache
|
||||
{
|
||||
/** Mutex to serialize access */
|
||||
struct mutex lock;
|
||||
|
||||
/** Linked list of frames */
|
||||
struct pool_cons *pool_list;
|
||||
|
||||
/** Low bound of memory in pool */
|
||||
char *pool_begin;
|
||||
|
||||
/** High bound of memory in pool */
|
||||
char *pool_end;
|
||||
|
||||
/** Global free-list buckets */
|
||||
struct free_list *global_free_list[FRAME_MALLOC_NBUCKETS];
|
||||
|
||||
/**
|
||||
* How many bytes to obtain at once from the global pool
|
||||
* (approximately)
|
||||
*/
|
||||
size_t batch_size;
|
||||
|
||||
/** Garbage-collect a bucket when its potential exceeds the limit */
|
||||
size_t potential_limit;
|
||||
|
||||
/** If TRUE, check for memory leaks at the end of execution */
|
||||
int check_for_leaks;
|
||||
|
||||
/** Bytes of memory allocated from the OS by the global cache */
|
||||
size_t allocated_from_os;
|
||||
|
||||
/** Tracks memory allocated by a chunk that isn't a full bucket size */
|
||||
size_t wasted;
|
||||
|
||||
/** Bytes of memory allocated from the global cache */
|
||||
size_t allocated_from_global_pool;
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocate memory from the per-worker pool. If the size is too large, or
|
||||
* if we're given a NULL worker, the memory is allocated using
|
||||
* __cilkrts_malloc().
|
||||
*
|
||||
* @param w The worker to allocate the memory from.
|
||||
* @param size The number of bytes to allocate.
|
||||
*
|
||||
* @return pointer to allocated memory block.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void *__cilkrts_frame_malloc(__cilkrts_worker *w,
|
||||
size_t size) cilk_nothrow;
|
||||
|
||||
/**
|
||||
* Return memory to the per-worker pool. If the size is too large, or
|
||||
* if we're given a NULL worker, the memory is freed using
|
||||
* __cilkrts_free().
|
||||
*
|
||||
* @param w The worker to allocate the memory from.
|
||||
* @param p The memory block to be released.
|
||||
* @param size The size of the block, in bytes.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_frame_free(__cilkrts_worker *w,
|
||||
void* p,
|
||||
size_t size) cilk_nothrow;
|
||||
|
||||
/**
|
||||
* Destroy the global cache stored in the global state, freeing all memory
|
||||
* to the global heap. Checks whether any memory has been allocated but
|
||||
* not freed.
|
||||
*
|
||||
* @param g The global state.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_frame_malloc_global_cleanup(global_state_t *g);
|
||||
|
||||
/**
|
||||
* Initialize a worker's memory cache. Initially it is empty.
|
||||
*
|
||||
* @param w The worker who's memory cache is to be initialized.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w);
|
||||
|
||||
/**
|
||||
* If check_for_leaks is set in the global state's memory cache, free any
|
||||
* memory in the worker's memory cache.
|
||||
*
|
||||
* If check_for_leask is not set, nothing happens.
|
||||
*
|
||||
* @param w The worker who's memory cache is to be cleaned up.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w);
|
||||
|
||||
/**
|
||||
* Round a number of bytes to the size of the smallest bucket that will
|
||||
* hold it. If the size is bigger than the largest bucket, the value is
|
||||
* unchanged.
|
||||
*
|
||||
* @param size Number of bytes to be rounded up to the nearest bucket size.
|
||||
*
|
||||
* @return The size of the smallest bucket that will hold the specified bytes.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
size_t __cilkrts_frame_malloc_roundup(size_t size) cilk_nothrow;
|
||||
|
||||
/**
|
||||
* Return the number of bytes that can fit into a bucket.
|
||||
*
|
||||
* Preconditions:
|
||||
* - The index must be in the range 0 - FRAME_MALLOC_NBUCKETS
|
||||
*
|
||||
* @param bucket Index of the bucket to be sized.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
size_t __cilkrts_size_of_bucket(int bucket) cilk_nothrow;
|
||||
|
||||
/**
|
||||
* Initialize the global memory cache.
|
||||
*
|
||||
* @param g The global state.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_frame_malloc_global_init(global_state_t *g);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_FRAME_MALLOC_DOT_H)
|
181
libcilkrts/runtime/full_frame.c
Normal file
181
libcilkrts/runtime/full_frame.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
/* full_frame.c -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "full_frame.h"
|
||||
#include "stats.h"
|
||||
#include "os.h"
|
||||
#include "bug.h"
|
||||
#include "jmpbuf.h"
|
||||
#include "frame_malloc.h"
|
||||
|
||||
COMMON_PORTABLE
|
||||
full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf)
|
||||
{
|
||||
full_frame *ff;
|
||||
|
||||
START_INTERVAL(w, INTERVAL_ALLOC_FULL_FRAME) {
|
||||
ff = (full_frame *)__cilkrts_frame_malloc(w, sizeof(*ff));
|
||||
__cilkrts_mutex_init(&ff->lock);
|
||||
|
||||
ff->full_frame_magic_0 = FULL_FRAME_MAGIC_0;
|
||||
ff->join_counter = 0;
|
||||
ff->parent = 0;
|
||||
ff->rightmost_child = 0;
|
||||
ff->left_sibling = ff->right_sibling = 0;
|
||||
ff->call_stack = sf;
|
||||
ff->is_call_child = 0;
|
||||
ff->simulated_stolen = 0;
|
||||
ff->children_reducer_map = ff->right_reducer_map = 0;
|
||||
ff->pending_exception =
|
||||
ff->child_pending_exception =
|
||||
ff->right_pending_exception = NULL;
|
||||
|
||||
ff->sync_sp = 0;
|
||||
#ifdef _WIN32
|
||||
ff->exception_sp = 0;
|
||||
ff->trylevel = (unsigned long)-1;
|
||||
ff->registration = 0;
|
||||
#endif
|
||||
ff->frame_size = 0;
|
||||
ff->fiber_self = 0;
|
||||
ff->fiber_child = 0;
|
||||
|
||||
ff->sync_master = 0;
|
||||
|
||||
/*__cilkrts_init_full_frame_sysdep(w, ff);*/
|
||||
ff->full_frame_magic_1 = FULL_FRAME_MAGIC_1;
|
||||
} STOP_INTERVAL(w, INTERVAL_ALLOC_FULL_FRAME);
|
||||
return ff;
|
||||
}
|
||||
|
||||
COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
|
||||
__cilkrts_stack_frame *sf)
|
||||
{
|
||||
/* When suspending frame ff prior to stealing it, __cilkrts_put_stack is
|
||||
* used to store the stack pointer for eventual sync. When suspending
|
||||
* frame ff prior to a sync, __cilkrts_put_stack is called to re-establish
|
||||
* the sync stack pointer, offsetting it by any change in the stack depth
|
||||
* that occured between the spawn and the sync.
|
||||
* Although it is not usually meaningful to add two pointers, the value of
|
||||
* ff->sync_sp at the time of this call is really an integer, not a
|
||||
* pointer.
|
||||
*/
|
||||
ptrdiff_t sync_sp_i = (ptrdiff_t) ff->sync_sp;
|
||||
char* sp = (char*) __cilkrts_get_sp(sf);
|
||||
|
||||
ff->sync_sp = sp + sync_sp_i;
|
||||
|
||||
DBGPRINTF("%d- __cilkrts_put_stack - adjust (+) sync "
|
||||
"stack of full frame %p (+sp: %p) to %p\n",
|
||||
__cilkrts_get_tls_worker()->self, ff, sp, ff->sync_sp);
|
||||
}
|
||||
|
||||
COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp)
|
||||
{
|
||||
/* When resuming the parent after a steal, __cilkrts_take_stack is used to
|
||||
* subtract the new stack pointer from the current stack pointer, storing
|
||||
* the offset in ff->sync_sp. When resuming after a sync,
|
||||
* __cilkrts_take_stack is used to subtract the new stack pointer from
|
||||
* itself, leaving ff->sync_sp at zero (null). Although the pointers being
|
||||
* subtracted are not part of the same contiguous chunk of memory, the
|
||||
* flat memory model allows us to subtract them and get a useable offset.
|
||||
*/
|
||||
ptrdiff_t sync_sp_i = ff->sync_sp - (char*) sp;
|
||||
|
||||
ff->sync_sp = (char *) sync_sp_i;
|
||||
|
||||
DBGPRINTF("%d- __cilkrts_take_stack - adjust (-) sync "
|
||||
"stack of full frame %p to %p (-sp: %p)\n",
|
||||
__cilkrts_get_tls_worker()->self, ff, ff->sync_sp, sp);
|
||||
}
|
||||
|
||||
COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size)
|
||||
{
|
||||
/* When resuming the parent after a steal, __cilkrts_take_stack is used to
|
||||
* subtract the new stack pointer from the current stack pointer, storing
|
||||
* the offset in ff->sync_sp. When resuming after a sync,
|
||||
* __cilkrts_take_stack is used to subtract the new stack pointer from
|
||||
* itself, leaving ff->sync_sp at zero (null). Although the pointers being
|
||||
* subtracted are not part of the same contiguous chunk of memory, the
|
||||
* flat memory model allows us to subtract them and get a useable offset.
|
||||
*
|
||||
* __cilkrts_adjust_stack() is used to deallocate a Variable Length Array
|
||||
* by adding it's size to ff->sync_sp.
|
||||
*/
|
||||
ff->sync_sp = ff->sync_sp + size;
|
||||
|
||||
DBGPRINTF("%d- __cilkrts_adjust_stack - adjust (+) sync "
|
||||
"stack of full frame %p to %p (+ size: 0x%x)\n",
|
||||
__cilkrts_get_tls_worker()->self, ff, ff->sync_sp, size);
|
||||
}
|
||||
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff)
|
||||
{
|
||||
validate_full_frame(ff);
|
||||
CILK_ASSERT(ff->children_reducer_map == 0);
|
||||
CILK_ASSERT(ff->right_reducer_map == 0);
|
||||
CILK_ASSERT(NULL == ff->pending_exception);
|
||||
CILK_ASSERT(NULL == ff->child_pending_exception);
|
||||
CILK_ASSERT(NULL == ff->right_pending_exception);
|
||||
__cilkrts_mutex_destroy(w, &ff->lock);
|
||||
__cilkrts_frame_free(w, ff, sizeof(*ff));
|
||||
}
|
||||
|
||||
COMMON_PORTABLE void validate_full_frame(full_frame *ff)
|
||||
{
|
||||
/* check the magic numbers, for debugging purposes */
|
||||
if (ff->full_frame_magic_0 != FULL_FRAME_MAGIC_0 ||
|
||||
ff->full_frame_magic_1 != FULL_FRAME_MAGIC_1)
|
||||
abort_because_rts_is_corrupted();
|
||||
}
|
||||
|
||||
void __cilkrts_frame_lock(__cilkrts_worker *w, full_frame *ff)
|
||||
{
|
||||
validate_full_frame(ff);
|
||||
__cilkrts_mutex_lock(w, &ff->lock);
|
||||
}
|
||||
|
||||
void __cilkrts_frame_unlock(__cilkrts_worker *w, full_frame *ff)
|
||||
{
|
||||
__cilkrts_mutex_unlock(w, &ff->lock);
|
||||
}
|
||||
|
||||
/* End full_frame.c */
|
493
libcilkrts/runtime/full_frame.h
Normal file
493
libcilkrts/runtime/full_frame.h
Normal file
|
@ -0,0 +1,493 @@
|
|||
/* full_frame.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_FULL_FRAME_DOT_H
|
||||
#define INCLUDED_FULL_FRAME_DOT_H
|
||||
|
||||
|
||||
#include "rts-common.h"
|
||||
#include "worker_mutex.h"
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <internal/abi.h>
|
||||
#include <stddef.h>
|
||||
#include "cilk_fiber.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/** Magic numbers for full_frame, used for debugging */
|
||||
typedef unsigned long long ff_magic_t;
|
||||
|
||||
/* COMMON_SYSDEP */ struct pending_exception_info; /* opaque */
|
||||
|
||||
/*************************************************************
|
||||
Full frames
|
||||
*************************************************************/
|
||||
|
||||
/**
|
||||
* @file full_frame.h
|
||||
* @brief A full frame includes additional information such as a join
|
||||
* counter and parent frame.
|
||||
* @defgroup FullFrames Full Frames
|
||||
* A full frame includes additional information such as a join
|
||||
* counter and parent frame.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Convenience typedef so we don't have to specify "struct full_frame"
|
||||
* all over the code. Putting it before the structure definition allows
|
||||
* us to use the typedef within the structure itself
|
||||
*/
|
||||
typedef struct full_frame full_frame;
|
||||
|
||||
/**
|
||||
* @brief A full frame includes additional information such as a join
|
||||
* counter and parent frame.
|
||||
*
|
||||
* The frame at the top of a worker's stack is promoted into a "full"
|
||||
* frame, which carries additional information, such as join counter
|
||||
* and parent frame. Full frames can be suspended at a sync, in which
|
||||
* case they lie somewhere in memory and do not belong to any
|
||||
* worker.
|
||||
*
|
||||
* Full frames are in contrast to the entries in the worker's deque which
|
||||
* are only represented by a pointer to their __cilkrts_stack_frame.
|
||||
*
|
||||
* At any instant, we say that a full frame ff is either "suspended",
|
||||
* or "owned" by some worker w.
|
||||
*
|
||||
* More precisely, we say that a worker w owns a frame ff under one of
|
||||
* the following conditions:
|
||||
*
|
||||
* 1. Creation: Worker w has just created ff, but not yet linked ff
|
||||
* into the tree of full frames. This situation can occur when a
|
||||
* worker is unrolling a call stack to promote a
|
||||
* __cilkrts_stack_frame to a full_frame.
|
||||
* 2. Executing frame: We have w->l->frame_ff == ff, i.e,. ff is the
|
||||
* currently executing frame for w.
|
||||
* 3. Next frame: We have w->l->next_frame_ff == ff, i.e,. ff is the
|
||||
* next frame that w is about to execute.
|
||||
* 4. Resume execution: Worker w has popped ff from
|
||||
* w->l->next_frame_ff, and is about to resume execution of ff.
|
||||
* 5. Dying leaf: Worker w has finished executing a frame ff
|
||||
* that is a leaf the tree of full frames, and is in the process
|
||||
* of unlinking "ff" from the tree.
|
||||
*
|
||||
* Otherwise, the frame ff is suspended, and has no owner.
|
||||
* Note that work-stealing changes the owner of a full frame from the
|
||||
* victim to the thief.
|
||||
*
|
||||
* Using this notion of ownership, we classify the fields of a full
|
||||
* frame into one of several categories:
|
||||
*
|
||||
* 1. Local:
|
||||
* These fields are accessed only by the owner of the full frame.
|
||||
* Because a frame can have only one owner at a time, these fields
|
||||
* can be modified without any (additional) locking or
|
||||
* synchronization, assuming the correct synchronization for
|
||||
* changing the ownership of full frame (e.g., on a successful
|
||||
* steal) is already in place.
|
||||
*
|
||||
* 2. Constant (i.e., read-only):
|
||||
* This field is constant for the lifetime of the full frame.
|
||||
* No locks are needed to access this field.
|
||||
* Technically, a field could be read-only and local, but we assume
|
||||
* it is shared.
|
||||
*
|
||||
* 3. Self-locked:
|
||||
* To access this field in the frame ff, a worker should acquire
|
||||
* the lock on ff.
|
||||
* A self-locked field is conceptually "shared" between the worker
|
||||
* that owns frame ff (which is a child) and the worker that
|
||||
* owns the frame ff->parent (which is the parent of ff).
|
||||
*
|
||||
* 4. Parent-locked:
|
||||
* To access this field in the frame ff, a worker should
|
||||
* acquire the lock on ff->parent.
|
||||
* A parent-locked field is conceptually "shared" between the worker
|
||||
* that owns frame ff, and a worker that is either owns the
|
||||
* parent frame (ff->parent) or owns a sibling frame of ff (i.e.,
|
||||
* any child of ff->parent).
|
||||
*
|
||||
* 5. Synchronization
|
||||
* A field used explicitly for synchronization (i.e., locks).
|
||||
*/
|
||||
|
||||
/* COMMON_PORTABLE */
|
||||
struct full_frame
|
||||
{
|
||||
/**
|
||||
* Value to detect writes off the beginning of a full_frame.
|
||||
*/
|
||||
# define FULL_FRAME_MAGIC_0 ((ff_magic_t)0x361e710b9597d553ULL)
|
||||
|
||||
/**
|
||||
* Field to detect writes off the beginning of a full_frame. Must be
|
||||
* FULL_FRAME_MAGIC_0.
|
||||
* [constant]
|
||||
*/
|
||||
ff_magic_t full_frame_magic_0;
|
||||
|
||||
/**
|
||||
* Used to serialize access to this full_frame
|
||||
* [synchronization]
|
||||
*/
|
||||
struct mutex lock;
|
||||
|
||||
/**
|
||||
* Count of outstanding children running in parallel
|
||||
* [self-locked]
|
||||
*/
|
||||
int join_counter;
|
||||
|
||||
/**
|
||||
* If TRUE: frame was called by the parent.
|
||||
* If FALSE: frame was spawned by parent.
|
||||
* [constant]
|
||||
*/
|
||||
int is_call_child;
|
||||
|
||||
/**
|
||||
* TRUE if this frame is the loot of a simulated steal.
|
||||
*
|
||||
* This situation never happens in normal execution. However,
|
||||
* when running under cilkscreen, a worker may promote frames and
|
||||
* then immediately suspend them, in order to simulate an
|
||||
* execution on an infinite number of processors where all spawns
|
||||
* are stolen. In this case, the frame is marked as the loot of a fake
|
||||
* steal.
|
||||
* [local]
|
||||
*/
|
||||
int simulated_stolen;
|
||||
|
||||
/**
|
||||
* Caller of this full_frame
|
||||
* [constant]
|
||||
*/
|
||||
full_frame *parent;
|
||||
|
||||
/**
|
||||
* Doubly-linked list of children. The serial execution order is
|
||||
* by definition from left to right. Because of how we do work
|
||||
* stealing, the parent is always to the right of all its
|
||||
* children.
|
||||
*
|
||||
* For a frame ff, we lock the ff->parent to follow the sibling
|
||||
* links for ff.
|
||||
*
|
||||
* [parent-locked]
|
||||
*/
|
||||
full_frame *left_sibling;
|
||||
|
||||
/**
|
||||
* @copydoc left_sibling
|
||||
*/
|
||||
full_frame *right_sibling;
|
||||
|
||||
/**
|
||||
* Pointer to rightmost child
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
full_frame *rightmost_child;
|
||||
|
||||
/**
|
||||
* Call stack associated with this frame.
|
||||
* Set and reset in make_unrunnable and make_runnable
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
__cilkrts_stack_frame *call_stack;
|
||||
|
||||
/**
|
||||
* Accumulated reducers of children
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
struct cilkred_map *children_reducer_map;
|
||||
|
||||
/**
|
||||
* Accumulated reducers of right siblings that have already
|
||||
* terminated
|
||||
*
|
||||
* [parent-locked]
|
||||
*/
|
||||
struct cilkred_map *right_reducer_map;
|
||||
|
||||
/**
|
||||
* Exception that needs to be pass to our parent
|
||||
*
|
||||
* [local]
|
||||
*
|
||||
* TBD: verify that the exception code satisfies this requirement.
|
||||
*/
|
||||
struct pending_exception_info *pending_exception;
|
||||
|
||||
/**
|
||||
* Exception from one of our children
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
struct pending_exception_info *child_pending_exception;
|
||||
|
||||
/**
|
||||
* Exception from any right siblings
|
||||
*
|
||||
* [parent-locked]
|
||||
*/
|
||||
struct pending_exception_info *right_pending_exception;
|
||||
|
||||
/**
|
||||
* Stack pointer to restore on sync.
|
||||
* [local]
|
||||
*/
|
||||
char *sync_sp;
|
||||
|
||||
#ifdef _WIN32
|
||||
/**
|
||||
* Stack pointer to restore on exception.
|
||||
* [local]
|
||||
*/
|
||||
char *exception_sp;
|
||||
|
||||
/**
|
||||
* Exception trylevel at steal
|
||||
* [local]
|
||||
*
|
||||
* TBD: this field is set but not read?
|
||||
*/
|
||||
unsigned long trylevel;
|
||||
|
||||
/**
|
||||
* Exception registration head pointer to restore on sync.
|
||||
* [local]
|
||||
*/
|
||||
unsigned long registration;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Size of frame to match sync sp
|
||||
* [local]
|
||||
* TBD: obsolete field only used in debugging?
|
||||
*/
|
||||
ptrdiff_t frame_size;
|
||||
|
||||
/**
|
||||
* Allocated fibers that need to be freed. The fibers work
|
||||
* like a reducer. The leftmost frame may have @c fiber_self
|
||||
* null and owner non-null.
|
||||
*
|
||||
* [local]
|
||||
* TBD: verify exception code satisfies this requirement.
|
||||
*/
|
||||
cilk_fiber *fiber_self;
|
||||
|
||||
/**
|
||||
* Allocated fibers that need to be freed. The fibers work
|
||||
* like a reducer. The leftmost frame may have @c fiber_self
|
||||
* null and owner non-null.
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
cilk_fiber *fiber_child;
|
||||
|
||||
/**
|
||||
* If the sync_master is set, this function can only be sync'd by the team
|
||||
* leader, who first entered Cilk. This is set by the first worker to steal
|
||||
* from the user worker.
|
||||
*
|
||||
* [self-locked]
|
||||
*/
|
||||
__cilkrts_worker *sync_master;
|
||||
|
||||
/**
|
||||
* Value to detect writes off the end of a full_frame.
|
||||
*/
|
||||
# define FULL_FRAME_MAGIC_1 ((ff_magic_t)0x189986dcc7aee1caULL)
|
||||
|
||||
/**
|
||||
* Field to detect writes off the end of a full_frame. Must be
|
||||
* FULL_FRAME_MAGIC_1.
|
||||
*
|
||||
* [constant]
|
||||
*/
|
||||
ff_magic_t full_frame_magic_1;
|
||||
};
|
||||
|
||||
/* The functions __cilkrts_put_stack and __cilkrts_take_stack keep track of
|
||||
* changes in the stack's depth between when the point at which a frame is
|
||||
* stolen and when it is resumed at a sync. A stolen frame typically goes
|
||||
* through the following phase changes:
|
||||
*
|
||||
* 1. Suspend frame while stealing it.
|
||||
* 2. Resume stolen frame at begining of continuation
|
||||
* 3. Suspend stolen frame at a sync
|
||||
* 4. Resume frame (no longer marked stolen) after the sync
|
||||
*
|
||||
* When the frame is suspended (steps 1 and 3), __cilkrts_put_stack is called to
|
||||
* establish the stack pointer for the sync. When the frame is resumed (steps
|
||||
* 2 and 4), __cilkrts_take_stack is called to indicate the stack pointer
|
||||
* (which may be on a different stack) at
|
||||
* the point of resume. If the stack pointer changes between steps 2 and 3,
|
||||
* e.g., as a result of pushing 4 bytes onto the stack,
|
||||
* the offset is reflected in the value of ff->sync_sp after step 3 relative to
|
||||
* its value after step 1 (e.g., the value of ff->sync_sp after step 3 would be
|
||||
* 4 less than its value after step 1, for a down-growing stack).
|
||||
*
|
||||
* Imp detail: The actual call chains for each of these phase-change events is:
|
||||
*
|
||||
* 1. unroll_call_stack -> make_unrunnable -> __cilkrts_put_stack
|
||||
* 2. do_work -> __cilkrts_resume -> __cilkrts_take_stack
|
||||
* 3. do_sync -> disown -> make_runnable -> __cilkrts_put_stack
|
||||
* 4. __cilkrts_resume -> __cilkrts_take_stack
|
||||
*
|
||||
* (The above is a changeable implementation detail. The resume, sequence, in
|
||||
* particular, is more complex on some operating systems.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Records the stack pointer within the @c sf stack frame as the
|
||||
* current stack pointer at the point of suspending full frame @c ff.
|
||||
*
|
||||
* @pre @c ff->sync_sp must be either null or contain the result of a prior call to
|
||||
* @c __cilkrts_take_stack().
|
||||
* @pre If @c ff->sync_sp is not null, then @c SP(sf) must refer to the same stack as
|
||||
* the @c sp argument to the prior call to @c __cilkrts_take_stack().
|
||||
*
|
||||
|
||||
* @post If @c ff->sync_sp was null before the call, then @c
|
||||
* ff->sync_sp will be set to @c SP(sf).
|
||||
* @post Otherwise, @c ff->sync_sp will be restored to the value it had just prior
|
||||
* to the last call to @c __cilkrts_take_stack(), except offset by any change
|
||||
* in the stack pointer between the call to @c __cilkrts_take_stack() and
|
||||
* this call to @c __cilkrts_put_stack().
|
||||
*
|
||||
* @param ff The full frame that is being suspended.
|
||||
* @param sf The @c __cilkrts_stack_frame that is being suspended. The stack
|
||||
* pointer will be taken from the jmpbuf contained within this
|
||||
* @c __cilkrts_stack_frame.
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
|
||||
__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* @brief Records the stack pointer @c sp as the stack pointer at the point of
|
||||
* resuming execution on full frame @c ff.
|
||||
*
|
||||
* The value of @c sp may be on a different stack than the original
|
||||
* value recorded for the stack pointer using __cilkrts_put_stack().
|
||||
*
|
||||
* @pre @c ff->sync_sp must contain a value set by @c __cilkrts_put_stack().
|
||||
*
|
||||
* @post @c ff->sync_sp contains an *integer* value used to compute a change in the
|
||||
* stack pointer upon the next call to @c __cilkrts_take_stack().
|
||||
* @post If @c sp equals @c ff->sync_sp, then @c ff->sync_sp is set to null.
|
||||
*
|
||||
* @param ff The full frame that is being resumed.
|
||||
* @param sp The stack pointer for the stack the function is being resumed on.
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp);
|
||||
|
||||
/*
|
||||
* @brief Adjust the stack for to deallocate a Variable Length Array
|
||||
*
|
||||
* @param ff The full frame that is being adjusted.
|
||||
* @param size The size of the array being deallocated from the stack
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size);
|
||||
|
||||
/**
|
||||
* @brief Allocates and initailizes a full_frame.
|
||||
*
|
||||
* @param w The memory for the full_frame will be allocated out of the
|
||||
* worker's pool.
|
||||
* @param sf The @c __cilkrts_stack_frame which will be saved as the call_stack
|
||||
* for this full_frame.
|
||||
*
|
||||
* @return The newly allocated and initialized full_frame.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* @brief Deallocates a full_frame.
|
||||
*
|
||||
* @param w The memory for the full_frame will be returned to the worker's pool.
|
||||
* @param ff The full_frame to be deallocated.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff);
|
||||
|
||||
/**
|
||||
* @brief Performs sanity checks to check the integrity of a full_frame.
|
||||
*
|
||||
* @param ff The full_frame to be validated.
|
||||
*/
|
||||
COMMON_PORTABLE void validate_full_frame(full_frame *ff);
|
||||
|
||||
/**
|
||||
* @brief Locks the mutex contained in a full_frame.
|
||||
*
|
||||
* The full_frame is validated before the runtime attempts to lock it.
|
||||
*
|
||||
* @post @c ff->lock will be owned by @c w.
|
||||
*
|
||||
* @param w The worker that will own the full_frame. If the runtime is
|
||||
* collecting stats, the intervals will be attributed to the worker.
|
||||
* @param ff The full_frame containing the mutex to be locked.
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_frame_lock(__cilkrts_worker *w,
|
||||
full_frame *ff);
|
||||
|
||||
/**
|
||||
* @brief Unlocks the mutex contained in a full_frame.
|
||||
*
|
||||
* @pre @c ff->lock must must be owned by @c w.
|
||||
*
|
||||
* @param w The worker that currently owns the full_frame.
|
||||
* @param ff The full_frame containing the mutex to be unlocked.
|
||||
*/
|
||||
COMMON_PORTABLE void __cilkrts_frame_unlock(__cilkrts_worker *w,
|
||||
full_frame *ff);
|
||||
/** @} */
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_FULL_FRAME_DOT_H)
|
628
libcilkrts/runtime/global_state.cpp
Normal file
628
libcilkrts/runtime/global_state.cpp
Normal file
|
@ -0,0 +1,628 @@
|
|||
/* global_state.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "global_state.h"
|
||||
#include "os.h"
|
||||
#include "bug.h"
|
||||
#include "metacall_impl.h"
|
||||
#include "stats.h"
|
||||
#include "cilk/cilk_api.h"
|
||||
#include "cilk_malloc.h"
|
||||
#include "record-replay.h"
|
||||
|
||||
#include <algorithm> // For max()
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <climits>
|
||||
#include <cerrno>
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
// TBD: There is a race when multiple threads try to initialize the
|
||||
// user_settable_values??
|
||||
//
|
||||
// Set to true if the user settable values portion of the global state
|
||||
// singleton is initialized, even if the rest of the singleton is not
|
||||
// initialized.
|
||||
int cilkg_user_settable_values_initialized = false;
|
||||
|
||||
namespace {
|
||||
|
||||
// Single copy of the global state. Zero-filled until
|
||||
// cilkg_get_user_settable_values() is called and partially-zero-filled until
|
||||
// cilkg_init_global_state() is called. The first field is filled in with
|
||||
// the size of a void* for the debugger and must be valid before initialization
|
||||
global_state_t global_state_singleton =
|
||||
{
|
||||
sizeof(void *), // addr_size
|
||||
};
|
||||
|
||||
|
||||
// Variables that need to export C-style names
|
||||
extern "C"
|
||||
{
|
||||
// Pointer to the global state singleton.
|
||||
global_state_t *cilkg_singleton_ptr = NULL;
|
||||
|
||||
// __cilkrts_global_state is exported and referenced by the debugger.
|
||||
// The debugger expects it to be valid when the module loads.
|
||||
// CILK_EXPORT_DATA
|
||||
global_state_t *__cilkrts_global_state = &global_state_singleton;
|
||||
}
|
||||
|
||||
// Returns true if 'a' and 'b' are equal null-terminated strings
|
||||
inline bool strmatch(const char* a, const char* b)
|
||||
{
|
||||
return 0 == std::strcmp(a, b);
|
||||
}
|
||||
|
||||
// Returns the integer value represented by the null-terminated string at 's'.
|
||||
inline long to_long(const char* s)
|
||||
{
|
||||
char *end;
|
||||
|
||||
errno = 0;
|
||||
return std::strtol(s, &end, 0);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Returns true if 'a' and 'b' are equal null-terminated wide-char strings
|
||||
inline bool strmatch(const wchar_t* a, const wchar_t* b)
|
||||
{
|
||||
return 0 == wcscmp(a, b);
|
||||
}
|
||||
|
||||
// Returns true if the multi-byte character string at 'a' represents the same
|
||||
// character sequence as the wide-character string at 'b'. The behavior is
|
||||
// undefined if 'a' contains more than 30 multi-byte characters.
|
||||
bool strmatch(const char* a, const wchar_t* b)
|
||||
{
|
||||
// Convert 'a' to wide-characters, then compare.
|
||||
wchar_t wa[31];
|
||||
std::size_t count;
|
||||
errno_t err = mbstowcs_s(&count, wa, a, 30);
|
||||
CILK_ASSERT(0 == err);
|
||||
if (err) return false;
|
||||
return strmatch(wa, b);
|
||||
}
|
||||
|
||||
// Returns true if the wide-character string at 'a' represents the same
|
||||
// character sequence as the multi-byte character string at 'b'. The behavior
|
||||
// id undefined if 'b' contains more than 30 multi-byte characters.
|
||||
inline
|
||||
bool strmatch(const wchar_t* a, const char* b)
|
||||
{
|
||||
return strmatch(b, a);
|
||||
}
|
||||
|
||||
|
||||
// Returns the integer value represented by the null-terminated wide-char
|
||||
// string at 's'.
|
||||
inline long to_long(const wchar_t* s)
|
||||
{
|
||||
wchar_t *end;
|
||||
|
||||
errno = 0;
|
||||
return wcstol(s, &end, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if Cilkscreen or other sequential ptool wants to force reducers.
|
||||
bool always_force_reduce()
|
||||
{
|
||||
// Metacall *looks* like a no-op. volatile needed to keep compiler from
|
||||
// optimizing away variable.
|
||||
volatile char not_force_reduce = '\377';
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ZERO_IF_FORCE_REDUCE,
|
||||
const_cast<char*>(¬_force_reduce));
|
||||
return ! not_force_reduce;
|
||||
}
|
||||
|
||||
// Stores the boolean value represented by the null-terminated string at 'val'
|
||||
// into the integer object at 'out'. Returns '__CILKRTS_SET_PARAM_SUCCESS' if
|
||||
// 'val' is "true", "false", "0" or "1" and '__CILKRTS_SET_PARAM_INVALID'
|
||||
// otherwise.
|
||||
template <typename INT_T, typename CHAR_T>
|
||||
int store_bool(INT_T *out, const CHAR_T *val)
|
||||
{
|
||||
static const char* const s_zero = "0";
|
||||
static const char* const s_one = "1";
|
||||
static const char* const s_true = "true";
|
||||
static const char* const s_false = "false";
|
||||
|
||||
if (val == 0)
|
||||
return __CILKRTS_SET_PARAM_INVALID;
|
||||
|
||||
if (strmatch(s_false, val) || strmatch(s_zero, val)) {
|
||||
*out = 0;
|
||||
return __CILKRTS_SET_PARAM_SUCCESS;
|
||||
}
|
||||
|
||||
if (strmatch(s_true, val) || strmatch(s_one, val)) {
|
||||
*out = 1;
|
||||
return __CILKRTS_SET_PARAM_SUCCESS;
|
||||
}
|
||||
|
||||
return __CILKRTS_SET_PARAM_INVALID;
|
||||
}
|
||||
|
||||
// Stores the integer value represented by the null-terminated string at 'val'
|
||||
// into the integer object at 'out', restricting the result to the range 'min'
|
||||
// to 'max', inclusive. Returns '__CILKRTS_SET_PARAM_SUCCESS' if the conversion
|
||||
// succeeds and is in range, '__CILKRTS_SET_PARAM_XRANGE' if the conversion
|
||||
// succeeds but is out of range, and '__CILKRTS_SET_PARAM_INVALID' otherwise. In
|
||||
// the case of any error, '*out' is unchanged.
|
||||
template <typename INT_T, typename CHAR_T>
|
||||
int store_int(INT_T *out, const CHAR_T *val, INT_T min, INT_T max)
|
||||
{
|
||||
errno = 0;
|
||||
long val_as_long = to_long(val);
|
||||
if (val_as_long == 0 && errno != 0)
|
||||
return __CILKRTS_SET_PARAM_INVALID;
|
||||
if (val_as_long < min || val_as_long == LONG_MIN)
|
||||
return __CILKRTS_SET_PARAM_XRANGE;
|
||||
else if (val_as_long > max || val_as_long == LONG_MAX)
|
||||
return __CILKRTS_SET_PARAM_XRANGE;
|
||||
|
||||
*out = val_as_long;
|
||||
return __CILKRTS_SET_PARAM_SUCCESS;
|
||||
}
|
||||
|
||||
// Implementaton of cilkg_set_param templatized on character type.
|
||||
// Windows will instantiate with both char and wchar_t.
|
||||
// Note that g must have its user settable values set, but need not be fully
|
||||
// initialized.
|
||||
template <class CHAR_T>
|
||||
int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
|
||||
{
|
||||
static const char* const s_force_reduce = "force reduce";
|
||||
static const char* const s_nworkers = "nworkers";
|
||||
static const char* const s_max_user_workers = "max user workers";
|
||||
static const char* const s_local_stacks = "local stacks";
|
||||
static const char* const s_shared_stacks = "shared stacks";
|
||||
static const char* const s_nstacks = "nstacks";
|
||||
static const char* const s_stack_size = "stack size";
|
||||
|
||||
// We must have a parameter and a value
|
||||
if (0 == param)
|
||||
return __CILKRTS_SET_PARAM_INVALID;
|
||||
if (0 == value)
|
||||
return __CILKRTS_SET_PARAM_INVALID;
|
||||
|
||||
if (strmatch(param, s_force_reduce))
|
||||
{
|
||||
// Sets whether we force a reduce operation at every sync. Useful for
|
||||
// debugging reducers. Off by default. Overridden by Cilkscreen
|
||||
//
|
||||
// Documented in cilk_api_<os>.h
|
||||
if (always_force_reduce())
|
||||
// Force reduce is set by cilkscreen. User cannot change it.
|
||||
return __CILKRTS_SET_PARAM_LATE;
|
||||
|
||||
return store_bool(&g->force_reduce, value);
|
||||
}
|
||||
else if (strmatch(param, s_nworkers))
|
||||
{
|
||||
// Set the total number of workers. Overrides count of cores we get
|
||||
// from the OS and the setting of the CILK_NWORKERS environment
|
||||
// variable. Setting to 0 indicates that the default worker count
|
||||
// should be used.
|
||||
//
|
||||
// Documented in cilk_api_<os>.h
|
||||
if (cilkg_singleton_ptr)
|
||||
return __CILKRTS_SET_PARAM_LATE;
|
||||
|
||||
// Fetch the number of cores. There must be at last 1, since we're
|
||||
// executing on *something*, aren't we!?
|
||||
int hardware_cpu_count = __cilkrts_hardware_cpu_count();
|
||||
CILK_ASSERT(hardware_cpu_count > 0);
|
||||
|
||||
int max_cpu_count = 16 * hardware_cpu_count;
|
||||
if (__cilkrts_running_under_sequential_ptool())
|
||||
{
|
||||
hardware_cpu_count = 1;
|
||||
max_cpu_count = 1;
|
||||
}
|
||||
// Allow a value of 0, which means "set to hardware thread count".
|
||||
int ret = store_int(&g->P, value, 0, max_cpu_count);
|
||||
if (0 == g->P)
|
||||
g->P = hardware_cpu_count;
|
||||
return ret;
|
||||
}
|
||||
else if (strmatch(param, s_max_user_workers))
|
||||
{
|
||||
// ** UNDOCUMENTED **
|
||||
//
|
||||
// Sets the number of slots allocated for user worker threads
|
||||
int hardware_cpu_count = __cilkrts_hardware_cpu_count();
|
||||
CILK_ASSERT (hardware_cpu_count > 0);
|
||||
|
||||
return store_int(&g->max_user_workers, value, 1,
|
||||
16 * hardware_cpu_count);
|
||||
}
|
||||
else if (strmatch(param, s_local_stacks))
|
||||
{
|
||||
// ** UNDOCUMENTED **
|
||||
//
|
||||
// Number of stacks we'll hold in the per-worker stack cache. Maximum
|
||||
// value is 42. See __cilkrts_make_global_state for details.
|
||||
return store_int(&g->fiber_pool_size, value, 0, 42);
|
||||
}
|
||||
else if (strmatch(param, s_shared_stacks))
|
||||
{
|
||||
// ** UNDOCUMENTED **
|
||||
//
|
||||
// Maximum number of stacks we'll hold in the global stack
|
||||
// cache. Maximum value is 42. See __cilkrts_make_global_state for
|
||||
// details.
|
||||
return store_int(&g->global_fiber_pool_size, value, 0, 42);
|
||||
}
|
||||
else if (strmatch(param, s_nstacks))
|
||||
{
|
||||
// Sets the maximum number of stacks permitted at one time. If the
|
||||
// runtime reaches this maximum, it will cease to allocate stacks and
|
||||
// the app will lose parallelism. 0 means unlimited. Default is
|
||||
// unlimited. Minimum is twice the number of worker threads, though
|
||||
// that cannot be tested at this time.
|
||||
//
|
||||
// Undocumented at this time, though there are plans to expose it.
|
||||
// The current implentation is for Linux debugging only and is not
|
||||
// robust enough for users.
|
||||
if (cilkg_singleton_ptr)
|
||||
return __CILKRTS_SET_PARAM_LATE;
|
||||
return store_int<unsigned>(&g->max_stacks, value, 0, INT_MAX);
|
||||
}
|
||||
else if (strmatch(param, s_stack_size))
|
||||
{
|
||||
// ** UNDOCUMENTED **
|
||||
//
|
||||
// Sets the size (in bytes) of the stacks that Cilk creates.
|
||||
// Can only be set before the runtime starts.
|
||||
if (cilkg_singleton_ptr)
|
||||
return __CILKRTS_SET_PARAM_LATE;
|
||||
|
||||
// Maximum value that can be parsed is MAX_INT (32-bit).
|
||||
int ret = store_int<size_t>(&g->stack_size, value, 0, INT_MAX);
|
||||
|
||||
// Process the value the user set (or 0 if the user didn't set
|
||||
// anything) into something nice for the current OS. This
|
||||
// processing is done immediately and stored into
|
||||
// g->stack_size so that a call to get stack size will return
|
||||
// the value that the runtime will actually use.
|
||||
g->stack_size = cilkos_validate_stack_size(g->stack_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// If got here, then didn't match any of the strings
|
||||
return __CILKRTS_SET_PARAM_UNIMP;
|
||||
}
|
||||
|
||||
inline
|
||||
int calc_max_user_workers(global_state_t *g)
|
||||
{
|
||||
// If it's been set by the user, give back what we got
|
||||
if (g->max_user_workers > 0)
|
||||
return g->max_user_workers;
|
||||
|
||||
// Calculate it
|
||||
return std::max(3, g->P * 2);
|
||||
}
|
||||
|
||||
} // end unnamed namespace
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* @brief Returns the global state object. If called for the first time,
|
||||
* initializes the user-settable values in the global state, but does not
|
||||
* initialize the rest of the structure.
|
||||
*/
|
||||
global_state_t* cilkg_get_user_settable_values()
|
||||
{
|
||||
// Environment variable value. More than big enough for a 64-bit signed
|
||||
// integer.
|
||||
char envstr[24];
|
||||
|
||||
// Abbreviating &global_state_singleton as g is not only shorter, it also
|
||||
// facilitates grepping for the string "g->", which appears ubiquitously
|
||||
// in the runtime code.
|
||||
global_state_t* g = &global_state_singleton;
|
||||
|
||||
// TBD: We need synchronization around this loop to prevent
|
||||
// multiple threads from initializing this data.
|
||||
if (! cilkg_user_settable_values_initialized)
|
||||
{
|
||||
size_t len;
|
||||
|
||||
// Preserve stealing disabled since it may have been set by the
|
||||
// debugger
|
||||
int stealing_disabled = g->stealing_disabled;
|
||||
|
||||
// All fields will be zero until set. In particular
|
||||
std::memset(g, 0, sizeof(global_state_t));
|
||||
|
||||
// Fetch the number of cores. There must be at last 1, since we're
|
||||
// executing on *something*, aren't we!?
|
||||
int hardware_cpu_count = __cilkrts_hardware_cpu_count();
|
||||
CILK_ASSERT(hardware_cpu_count > 0);
|
||||
|
||||
bool under_ptool = __cilkrts_running_under_sequential_ptool();
|
||||
if (under_ptool)
|
||||
hardware_cpu_count = 1;
|
||||
|
||||
g->stealing_disabled = stealing_disabled;
|
||||
g->under_ptool = under_ptool;
|
||||
g->force_reduce = 0; // Default Off
|
||||
g->P = hardware_cpu_count; // Defaults to hardware CPU count
|
||||
g->max_user_workers = 0; // 0 unless set by user
|
||||
g->fiber_pool_size = 7; // Arbitrary default
|
||||
|
||||
g->global_fiber_pool_size = 3 * 3* g->P; // Arbitrary default
|
||||
// 3*P was the default size of the worker array (including
|
||||
// space for extra user workers). This parameter was chosen
|
||||
// to match previous versions of the runtime.
|
||||
|
||||
if (4 == sizeof(void *))
|
||||
g->max_stacks = 1200; // Only 1GB on 32-bit machines
|
||||
else
|
||||
g->max_stacks = 2400; // 2GB on 64-bit machines
|
||||
|
||||
// If we have 2400 1MB stacks, that is 2 gb. If we reach this
|
||||
// limit on a single-socket machine, we may have other
|
||||
// problems. Is 2400 too small for large multicore machines?
|
||||
|
||||
// TBD(jsukha, 11/27/2012): I set this limit on stacks to be a
|
||||
// value independent of P. When running on a Xeon Phi with
|
||||
// small values of P, I recall seeing a few microbenchmarks
|
||||
// (e.g., fib) where a limit of 10*P seemed to be
|
||||
// unnecessarily slowing things down.
|
||||
//
|
||||
// That being said, the code has changed sufficiently that
|
||||
// this observation may no longer be true.
|
||||
//
|
||||
// Note: in general, the worst-case number of stacks required
|
||||
// for a Cilk computation with spawn depth "d" on P workers is
|
||||
// O(Pd). Code with unbalanced recursion may run into issues
|
||||
// with this stack usage.
|
||||
|
||||
g->max_steal_failures = 128; // TBD: depend on max_workers?
|
||||
g->stack_size = 0; // 0 unless set by the user
|
||||
|
||||
// Assume no record or replay log for now
|
||||
g->record_replay_file_name = NULL;
|
||||
g->record_or_replay = RECORD_REPLAY_NONE; // set by user
|
||||
|
||||
if (always_force_reduce())
|
||||
g->force_reduce = true;
|
||||
else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_FORCE_REDUCE"))
|
||||
store_bool(&g->force_reduce, envstr);
|
||||
|
||||
if (under_ptool)
|
||||
g->P = 1; // Ignore environment variable if under cilkscreen
|
||||
else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_NWORKERS"))
|
||||
// Set P to environment variable, but limit to no less than 1
|
||||
// and no more than 16 times the number of hardware threads.
|
||||
store_int(&g->P, envstr, 1, 16 * hardware_cpu_count);
|
||||
|
||||
if (cilkos_getenv(envstr, sizeof(envstr), "CILK_MAX_USER_WORKERS"))
|
||||
// Set max_user_workers to environment variable, but limit to no
|
||||
// less than 1 and no more 16 times the number of hardware
|
||||
// threads. If not specified, defaults (somewhat arbitrarily) to
|
||||
// the larger of 3 and twice the number of hardware threads.
|
||||
store_int(&g->max_user_workers, envstr, 1, 16*hardware_cpu_count);
|
||||
|
||||
if (cilkos_getenv(envstr, sizeof(envstr), "CILK_STEAL_FAILURES"))
|
||||
// Set the number of times a worker should fail to steal before
|
||||
// it looks to see whether it should suspend itself.
|
||||
store_int<unsigned>(&g->max_steal_failures, envstr, 1, INT_MAX);
|
||||
|
||||
// Compute the total number of workers to allocate. Subtract one from
|
||||
// nworkers and user workers so that the first user worker isn't
|
||||
// factored in twice.
|
||||
//
|
||||
// total_workers must be computed now to support __cilkrts_get_total_workers
|
||||
g->total_workers = g->P + calc_max_user_workers(g) - 1;
|
||||
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
// RecordReplay: See if we've been asked to replay a log
|
||||
len = cilkos_getenv(envstr, 0, "CILK_REPLAY_LOG");
|
||||
if (len > 0)
|
||||
{
|
||||
len += 1; // Allow for trailing NUL
|
||||
g->record_or_replay = REPLAY_LOG;
|
||||
g->record_replay_file_name = (char *)__cilkrts_malloc(len);
|
||||
cilkos_getenv(g->record_replay_file_name, len, "CILK_REPLAY_LOG");
|
||||
}
|
||||
|
||||
// RecordReplay: See if we've been asked to record a log
|
||||
len = cilkos_getenv(envstr, 0, "CILK_RECORD_LOG");
|
||||
if (len > 0)
|
||||
{
|
||||
if (RECORD_REPLAY_NONE != g->record_or_replay)
|
||||
cilkos_warning("CILK_RECORD_LOG ignored since CILK_REPLAY_LOG is defined.\n");
|
||||
else
|
||||
{
|
||||
len += 1; // Allow for trailing NUL
|
||||
g->record_or_replay = RECORD_LOG;
|
||||
g->record_replay_file_name = (char *)__cilkrts_malloc(len);
|
||||
cilkos_getenv(g->record_replay_file_name, len, "CILK_RECORD_LOG");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
cilkg_user_settable_values_initialized = true;
|
||||
}
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
int cilkg_calc_total_workers()
|
||||
{
|
||||
global_state_t* g = cilkg_get_user_settable_values();
|
||||
|
||||
// Compute the total number of workers to allocate. Subtract one from
|
||||
// nworkers and user workers so that the first user worker isn't
|
||||
// factored in twice.
|
||||
return g->P + calc_max_user_workers(g) - 1;
|
||||
}
|
||||
|
||||
// Should be called while holding the global lock.
|
||||
global_state_t* cilkg_init_global_state()
|
||||
{
|
||||
if (cilkg_singleton_ptr)
|
||||
return cilkg_singleton_ptr;
|
||||
|
||||
// Get partially-initialized global state.
|
||||
global_state_t* g = cilkg_get_user_settable_values();
|
||||
|
||||
if (g->max_stacks > 0) {
|
||||
|
||||
// nstacks is currently honored on non-Windows systems only.
|
||||
|
||||
// Set an upper bound on the number of stacks that are allocated. If
|
||||
// nstacks is set, each worker gets up to one stack in its cache so that
|
||||
// no one worker can hog all of the free stacks and keep work from being
|
||||
// stolen by the other workers.
|
||||
|
||||
// nstacks corresponds to the number of stacks that will be allocated by
|
||||
// the runtime apart from the initial stack created for each thread by
|
||||
// the system. Therefore, if a user asks for n stacks, and there are
|
||||
// p workers created, the total number of stacks is actually n + p.
|
||||
|
||||
// This feature is primarily for MIC which has flat memory
|
||||
// instead of virtual addresses and tends to run out really quickly.
|
||||
// It is not implemented for Windows and it's non-intuitive
|
||||
// interaction with the local stack cache is specifically to help out
|
||||
// MIC.
|
||||
|
||||
// About max_stacks / P stacks, except we require at least 1
|
||||
// per pool.
|
||||
if (((int)g->max_stacks / g->P) < g->fiber_pool_size)
|
||||
g->fiber_pool_size = g->max_stacks / g->P;
|
||||
|
||||
if (g->fiber_pool_size <= 0) {
|
||||
g->fiber_pool_size = 1;
|
||||
}
|
||||
|
||||
if ((int)g->max_stacks < g->P)
|
||||
g->max_stacks = g->P;
|
||||
|
||||
g->global_fiber_pool_size = g->P * (g->fiber_pool_size+1);
|
||||
}
|
||||
|
||||
// Number of bytes/address - validation for debugger integration
|
||||
g->addr_size = sizeof(void *);
|
||||
|
||||
__cilkrts_init_stats(&g->stats);
|
||||
|
||||
__cilkrts_frame_malloc_global_init(g);
|
||||
|
||||
g->Q = 0;
|
||||
g->total_workers = cilkg_calc_total_workers();
|
||||
g->system_workers = g->P - 1; // system_workers is here for the debugger.
|
||||
g->work_done = 0;
|
||||
g->workers_running = 0;
|
||||
g->ltqsize = 1024; /* FIXME */
|
||||
|
||||
g->stack_size = cilkos_validate_stack_size(g->stack_size);
|
||||
g->failure_to_allocate_stack = 0;
|
||||
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
void cilkg_publish_global_state(global_state_t* g)
|
||||
{
|
||||
|
||||
// TBD: which one of these needs to be executed first? I say
|
||||
// cilkg_singleton_ptr needs to be set last, with a mfence in
|
||||
// between, since it is the flag that cilkg_is_published_is
|
||||
// checking for.
|
||||
__cilkrts_global_state = g;
|
||||
__cilkrts_fence();
|
||||
cilkg_singleton_ptr = g;
|
||||
}
|
||||
|
||||
void cilkg_deinit_global_state()
|
||||
{
|
||||
cilkg_singleton_ptr = NULL;
|
||||
__cilkrts_global_state = NULL;
|
||||
}
|
||||
|
||||
int cilkg_is_published(void)
|
||||
{
|
||||
return NULL != cilkg_singleton_ptr;
|
||||
}
|
||||
|
||||
int cilkg_set_param(const char* param, const char* value)
|
||||
{
|
||||
return set_param_imp(cilkg_get_user_settable_values(), param, value);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int cilkg_set_param_w(const wchar_t* param, const wchar_t* value)
|
||||
{
|
||||
return set_param_imp(cilkg_get_user_settable_values(), param, value);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C++" {
|
||||
// C++ scheduler function (that may throw exceptions)
|
||||
typedef void cpp_scheduler_t(__cilkrts_worker *w);
|
||||
}
|
||||
|
||||
void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w)
|
||||
{
|
||||
global_state_t* g = cilkg_get_global_state();
|
||||
CILK_ASSERT(g->scheduler);
|
||||
|
||||
cpp_scheduler_t* scheduler = (cpp_scheduler_t*) g->scheduler;
|
||||
|
||||
try {
|
||||
scheduler(w);
|
||||
} catch (...) {
|
||||
__cilkrts_bug("Exception escaped Cilk context");
|
||||
}
|
||||
}
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
/* End global_state.cpp */
|
417
libcilkrts/runtime/global_state.h
Normal file
417
libcilkrts/runtime/global_state.h
Normal file
|
@ -0,0 +1,417 @@
|
|||
/* global_state.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file global_state.h
|
||||
*
|
||||
* @brief The global_state_t structure contains most of the global context
|
||||
* maintained by the Intel Cilk runtime.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_GLOBAL_STATE_DOT_H
|
||||
#define INCLUDED_GLOBAL_STATE_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
|
||||
#include "frame_malloc.h"
|
||||
#include "stats.h"
|
||||
#include "bug.h"
|
||||
#include "cilk_fiber.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Non-null place-holder for a stack handle that has no meaningful value.
|
||||
*/
|
||||
#define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
|
||||
|
||||
/**
|
||||
* States for record_or_replay
|
||||
*/
|
||||
enum record_replay_t {
|
||||
RECORD_REPLAY_NONE,
|
||||
RECORD_LOG,
|
||||
REPLAY_LOG
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The global state is a structure that is shared by all workers in
|
||||
* Cilk.
|
||||
*
|
||||
* Make the structure ready for use by calling
|
||||
* cilkg_init_global_state() and then cilkg_publish_global_state().
|
||||
*
|
||||
* The same global lock should be held while both of these methods are
|
||||
* called. These methods are split because it is useful to execute
|
||||
* other runtime initialization code in between.
|
||||
*
|
||||
* After cilkg_publish_global_state() has completed, Cilk runtime
|
||||
* methods may call cilkg_get_global_state() to look at the published
|
||||
* value without holding the global lock.
|
||||
*
|
||||
* Finally, clean up the global state by calling
|
||||
* cilkg_deinit_global_state(). This method should be called only
|
||||
* after all calls to cilkg_get_global_state() have completed, and
|
||||
* while holding the global lock.
|
||||
*
|
||||
* Before initialization and after deinitialization, the fields in the
|
||||
* global state have unspecified values, except for a few special
|
||||
* fields labeled "USER SETTING", which can be read and written before
|
||||
* initialization and after deinitialization.
|
||||
*/
|
||||
|
||||
struct global_state_t { /* COMMON_PORTABLE */
|
||||
|
||||
/* Fields described as "(fixed)" should not be changed after
|
||||
* initialization.
|
||||
*/
|
||||
|
||||
/*************************************************************************
|
||||
* Note that debugger integration must reach into the
|
||||
* global state! The debugger integration is depending on the
|
||||
* offsets of the addr_size, system_workers, total_workers,
|
||||
* stealing_disabled, sysdep, and workers. If these offsets change, the
|
||||
* debugger integration library will need to be changed to match!!!
|
||||
*************************************************************************/
|
||||
|
||||
int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
|
||||
|
||||
int system_workers; ///< Number of system workers (fixed)
|
||||
|
||||
/**
|
||||
* @brief USER SETTING: Maximum number of user workers that can be
|
||||
* bound to cilk workers.
|
||||
*
|
||||
* 0 unless set by user. Call cilkg_calc_max_user_workers to get
|
||||
* the value.
|
||||
*/
|
||||
int max_user_workers;
|
||||
|
||||
int total_workers; ///< Total number of worker threads allocated (fixed)
|
||||
|
||||
int workers_running; ///< True when system workers have beens started */
|
||||
|
||||
/// Set by debugger to disable stealing (fixed)
|
||||
int stealing_disabled;
|
||||
|
||||
/// System-dependent part of the global state
|
||||
struct global_sysdep_state *sysdep;
|
||||
|
||||
/// Array of worker structures.
|
||||
__cilkrts_worker **workers;
|
||||
|
||||
/******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
|
||||
|
||||
/// Number of frames in each worker's lazy task queue
|
||||
__STDNS size_t ltqsize;
|
||||
|
||||
/**
|
||||
* @brief USER SETTING: Force all possible reductions.
|
||||
*
|
||||
* TRUE if running a p-tool that requires reducers to call the reduce()
|
||||
* method even if no actual stealing occurs.
|
||||
*
|
||||
* When set to TRUE, runtime will simulate steals, forcing calls to the
|
||||
* the reduce() methods of reducers.
|
||||
*
|
||||
*/
|
||||
int force_reduce;
|
||||
|
||||
/// USER SETTING: Per-worker fiber pool size
|
||||
int fiber_pool_size;
|
||||
|
||||
/// USER SETTING: Global fiber pool size
|
||||
int global_fiber_pool_size;
|
||||
|
||||
/**
|
||||
* @brief TRUE when workers should exit scheduling loop so we can
|
||||
* shut down the runtime and free the global state.
|
||||
*
|
||||
* @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
|
||||
* by idle workers. We need to ensure that it's not in a cache line which
|
||||
* may be invalidated by other cores. The surrounding fields are either
|
||||
* constant after initialization or not used until shutdown (stats) so we
|
||||
* should be OK.
|
||||
*/
|
||||
volatile int work_done;
|
||||
|
||||
int under_ptool; ///< True when running under a serial PIN tool
|
||||
|
||||
statistics stats; ///< Statistics on use of runtime
|
||||
|
||||
/**
|
||||
* @brief USER SETTING: Maximum number of stacks the runtime will
|
||||
* allocate (apart from those created by the OS when worker
|
||||
* threads are created).
|
||||
*
|
||||
* If max_stacks == 0,there is no pre-defined maximum.
|
||||
*/
|
||||
unsigned max_stacks;
|
||||
|
||||
/// Size of each stack
|
||||
size_t stack_size;
|
||||
|
||||
/// Global cache for per-worker memory
|
||||
struct __cilkrts_frame_cache frame_malloc;
|
||||
|
||||
/// Global fiber pool
|
||||
cilk_fiber_pool fiber_pool;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Track whether the runtime has failed to allocate a
|
||||
* stack.
|
||||
*
|
||||
* Setting this flag prevents multiple warnings from being
|
||||
* issued.
|
||||
*/
|
||||
int failure_to_allocate_stack;
|
||||
|
||||
/**
|
||||
* @brief USER SETTING: indicate record or replay log.
|
||||
* Set to NULL if not used in this run.
|
||||
*/
|
||||
char *record_replay_file_name;
|
||||
|
||||
/**
|
||||
* @brief Record/replay state.
|
||||
* Valid states are:
|
||||
* RECORD_REPLAY_NONE - Not recording or replaying a log
|
||||
* RECORD_LOG - Recording a log for replay later
|
||||
* REPLAY_LOG - Replay a log recorded earlier
|
||||
*/
|
||||
enum record_replay_t record_or_replay;
|
||||
|
||||
/**
|
||||
* @brief Buffer to force max_steal_failures to appear on a
|
||||
* different cache line from the previous member variables.
|
||||
*
|
||||
* This padding is needed because max_steal_failures is read
|
||||
* constantly and other modified values in the global state will
|
||||
* cause thrashing.
|
||||
*/
|
||||
char cache_buf[64];
|
||||
|
||||
/**
|
||||
* @brief Maximum number of times a thread should fail to steal
|
||||
* before checking if Cilk is shutting down.
|
||||
*/
|
||||
unsigned int max_steal_failures;
|
||||
|
||||
/// Pointer to scheduler entry point
|
||||
void (*scheduler)(__cilkrts_worker *w);
|
||||
|
||||
/**
|
||||
* @brief Buffer to force P and Q to appear on a different cache
|
||||
* line from the previous member variables.
|
||||
*/
|
||||
char cache_buf_2[64];
|
||||
|
||||
int P; ///< USER SETTING: number of system workers + 1 (fixed)
|
||||
int Q; ///< Number of user threads currently bound to workers
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Initialize the global state object. This method must both
|
||||
* complete before referencing any fields in the global state, except
|
||||
* those specified as "user-settable values".
|
||||
*/
|
||||
global_state_t* cilkg_init_global_state();
|
||||
|
||||
/**
|
||||
* @brief Publish the global state object, so that
|
||||
* cilkg_is_published can return true.
|
||||
*
|
||||
* @param g - the global state created by cilkg_init_global_state() to
|
||||
* publish.
|
||||
*
|
||||
* After the global state object has been published, a thread should
|
||||
* not modify this state unless it has exclusive access (i.e., holds
|
||||
* the global lock).
|
||||
*/
|
||||
void cilkg_publish_global_state(global_state_t* g);
|
||||
|
||||
/**
|
||||
* @brief Return true if the global state has been fully initialized
|
||||
* and published, and has not been deinitialized.
|
||||
*/
|
||||
int cilkg_is_published(void);
|
||||
|
||||
/**
|
||||
* @brief De-initializes the global state object. Must be called to free
|
||||
* resources when the global state is no longer needed.
|
||||
*/
|
||||
void cilkg_deinit_global_state(void);
|
||||
|
||||
/**
|
||||
* @brief Returns the global state object. Result is valid only if the
|
||||
* global state has been published (see cilkg_publish_global_state()).
|
||||
*/
|
||||
static inline
|
||||
global_state_t* cilkg_get_global_state(void)
|
||||
{
|
||||
// "private" extern declaration:
|
||||
extern global_state_t *cilkg_singleton_ptr;
|
||||
|
||||
__CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
|
||||
return cilkg_singleton_ptr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Implementation of __cilkrts_set_params.
|
||||
*
|
||||
* Set user controllable parameters
|
||||
* @param param - string specifying parameter to be set
|
||||
* @param value - string specifying new value
|
||||
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
||||
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
||||
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
||||
*
|
||||
* @attention The wide character version __cilkrts_set_param_w() is available
|
||||
* only on Windows.
|
||||
*
|
||||
* Allowable parameter names:
|
||||
*
|
||||
* - "nworkers" - number of processors that should run Cilk code.
|
||||
* The value is a string of digits to be parsed by strtol.
|
||||
*
|
||||
* - "force reduce" - test reducer callbacks by allocating new views
|
||||
* for every spawn within which a reducer is accessed. This can
|
||||
* significantly reduce performance. The value is "1" or "true"
|
||||
* to enable, "0" or "false" to disable.
|
||||
* @warning Enabling "force reduce" when running with more than a single
|
||||
* worker is currently broken.
|
||||
*
|
||||
* - "max user workers" - (Not publicly documented) Sets the number of slots
|
||||
* allocated for user worker threads
|
||||
*
|
||||
* - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
|
||||
* the per-worker stack cache. Range 1 .. 42. See
|
||||
* cilkg_init_global_state for details.
|
||||
*
|
||||
* - "shared stacks" - (Not publicly documented) Maximum number of stacks
|
||||
* we'll hold in the global stack cache. Maximum value is 42. See
|
||||
* __cilkrts_make_global_state for details
|
||||
*
|
||||
* - "nstacks" - (Not publicly documented at this time, though it may be
|
||||
* exposed in the future) Sets the maximum number of stacks permitted at one
|
||||
* time. If the runtime reaches this maximum, it will cease to allocate
|
||||
* stacks and the app will lose parallelism. 0 means unlimited. Default is
|
||||
* unlimited. Minimum is twice the number of worker threads, though that
|
||||
* cannot be tested at this time.
|
||||
*/
|
||||
int cilkg_set_param(const char* param, const char* value);
|
||||
#ifdef _WIN32
|
||||
/**
|
||||
* @brief Implementation of __cilkrts_set_params for Unicode characters on
|
||||
* Windows. See the documentation on @ref cilkg_set_param for more details.
|
||||
*
|
||||
* Set user controllable parameters
|
||||
* @param param - string specifying parameter to be set
|
||||
* @param value - string specifying new value
|
||||
* @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
|
||||
* CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
|
||||
* CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
|
||||
*/
|
||||
int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief implementation of __cilkrts_get_nworkers()
|
||||
*/
|
||||
static inline
|
||||
int cilkg_get_nworkers(void)
|
||||
{
|
||||
// "private" extern declaration
|
||||
extern global_state_t* cilkg_get_user_settable_values(void);
|
||||
return cilkg_get_user_settable_values()->P;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief implementation of __cilkrts_get_total_workers()
|
||||
*/
|
||||
static inline
|
||||
int cilkg_get_total_workers(void)
|
||||
{
|
||||
// "private" extern declaration
|
||||
extern int cilkg_calc_total_workers(void);
|
||||
|
||||
// This number can fluctate until initialization so we
|
||||
// compute it from scratch
|
||||
return cilkg_calc_total_workers();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief implementation of __cilkrts_get_force_reduce()
|
||||
*/
|
||||
static inline
|
||||
int cilkg_get_force_reduce(void)
|
||||
{
|
||||
// "private" extern declaration
|
||||
extern global_state_t* cilkg_get_user_settable_values(void);
|
||||
return cilkg_get_user_settable_values()->force_reduce;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief implementation of __cilkrts_get_stack_size()
|
||||
*/
|
||||
static inline
|
||||
size_t cilkg_get_stack_size(void)
|
||||
{
|
||||
// "private" extern declaration
|
||||
extern global_state_t* cilkg_get_user_settable_values(void);
|
||||
return cilkg_get_user_settable_values()->stack_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Run the scheduler function stored in the global_state
|
||||
*
|
||||
* Look up the scheduler function in global_state and run it. Report a fatal
|
||||
* error if an exception escapes the scheduler function.
|
||||
*
|
||||
* @param w - Worker structure to associate with the current thread.
|
||||
*
|
||||
* @attention The scheduler field of the global state must be set before this
|
||||
* function is called.
|
||||
*/
|
||||
void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
|
48
libcilkrts/runtime/jmpbuf.c
Normal file
48
libcilkrts/runtime/jmpbuf.c
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* jmpbuf.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "jmpbuf.h"
|
||||
|
||||
/*
|
||||
* C99 requires that every inline function with external linkage have
|
||||
* one extern declaration in the program.
|
||||
*/
|
||||
extern char *__cilkrts_get_sp(__cilkrts_stack_frame *sf);
|
||||
extern ptrdiff_t __cilkrts_get_frame_size(__cilkrts_stack_frame *sf);
|
||||
|
||||
/* End jmpbuf.c */
|
136
libcilkrts/runtime/jmpbuf.h
Normal file
136
libcilkrts/runtime/jmpbuf.h
Normal file
|
@ -0,0 +1,136 @@
|
|||
/* jmpbuf.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file jmpbuf.h
|
||||
*
|
||||
* @brief Macros and functions to access the _JUMP_BUFFER initialized by a
|
||||
* call to CILK_SETJMP before a cilk_spawn or cilk_sync. The definition of
|
||||
* CILK_SETJMP and CILK_LONGJMP are OS dependent and in abi.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_JMPBUF_DOT_H
|
||||
#define INCLUDED_JMPBUF_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <internal/abi.h>
|
||||
#include <stddef.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
#if 0 /* defined CILK_USE_C_SETJMP && defined JB_RSP */
|
||||
# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_RSP]
|
||||
# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_RBP]
|
||||
# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
|
||||
#elif 0 /* defined CILK_USE_C_SETJMP && defined JB_SP */
|
||||
# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_SP]
|
||||
# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_BP]
|
||||
# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
|
||||
#elif defined _WIN64
|
||||
# define JMPBUF_SP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rsp
|
||||
# define JMPBUF_FP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rbp
|
||||
# define JMPBUF_PC(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rip
|
||||
#elif defined _WIN32
|
||||
/** Fetch stack pointer from a __cilkrts_stack_frame */
|
||||
# define JMPBUF_SP(ctx) (ctx).Esp
|
||||
/** Fetch frame pointer from a __cilkrts_stack_frame */
|
||||
# define JMPBUF_FP(ctx) (ctx).Ebp
|
||||
/** Fetch program counter from a __cilkrts_stack_frame */
|
||||
# define JMPBUF_PC(ctx) (ctx).Eip
|
||||
#else /* defined __GNUC__ || defined __ICC */
|
||||
/* word 0 is frame address
|
||||
* word 1 is resume address
|
||||
* word 2 is stack address */
|
||||
# define JMPBUF_FP(ctx) (ctx)[0]
|
||||
# define JMPBUF_PC(ctx) (ctx)[1]
|
||||
# define JMPBUF_SP(ctx) (ctx)[2]
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Get frame pointer from jump buffer in__cilkrts_stack_frame.
|
||||
*/
|
||||
#define FP(SF) JMPBUF_FP((SF)->ctx)
|
||||
|
||||
/**
|
||||
* @brief Get program counter from jump buffer in__cilkrts_stack_frame.
|
||||
*/
|
||||
#define PC(SF) JMPBUF_PC((SF)->ctx)
|
||||
|
||||
/**
|
||||
* @brief Get stack pointer from jump buffer in__cilkrts_stack_frame.
|
||||
*/
|
||||
#define SP(SF) JMPBUF_SP((SF)->ctx)
|
||||
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Fetch the stack pointer from a __cilkrts_stack_frame. The jmpbuf was
|
||||
* initialized before a cilk_spawn or cilk_sync.
|
||||
*
|
||||
* @param sf __cilkrts_stack_frame containing the jmpbuf.
|
||||
*
|
||||
* @return the stack pointer from the ctx.
|
||||
*/
|
||||
inline char *__cilkrts_get_sp(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
return (char *)SP(sf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the frame size from __cilkrts_stack_frame. The jmpbuf was
|
||||
* initialized before a cilk_spawn or cilk_sync.
|
||||
*
|
||||
* @warning Returning an arbitrary value on Windows!
|
||||
*
|
||||
* @param sf __cilkrts_stack_frame containing the jmpbuf.
|
||||
*
|
||||
* @return the stack pointer from the ctx.
|
||||
*/
|
||||
inline ptrdiff_t __cilkrts_get_frame_size(__cilkrts_stack_frame *sf)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (0 == SP(sf))
|
||||
return 256; // Arbitrary!
|
||||
#endif
|
||||
return (ptrdiff_t)FP(sf) - (ptrdiff_t)SP(sf);
|
||||
}
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_JMPBUF_DOT_H)
|
369
libcilkrts/runtime/linux-symbols.ver
Normal file
369
libcilkrts/runtime/linux-symbols.ver
Normal file
|
@ -0,0 +1,369 @@
|
|||
/*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
CILKABI0
|
||||
{
|
||||
global:
|
||||
__cilkrts_bind_thread;
|
||||
__cilkrts_cilk_for_32;
|
||||
__cilkrts_cilk_for_64;
|
||||
__cilkrts_debugger_notification;
|
||||
__cilkrts_dump_stats;
|
||||
__cilkrts_end_cilk;
|
||||
__cilkrts_enter_frame;
|
||||
__cilkrts_enter_frame_fast;
|
||||
__cilkrts_get_force_reduce;
|
||||
__cilkrts_get_nworkers;
|
||||
__cilkrts_get_tls_worker;
|
||||
__cilkrts_get_tls_worker_fast;
|
||||
__cilkrts_get_total_workers;
|
||||
__cilkrts_get_worker_number;
|
||||
__cilkrts_global_state;
|
||||
__cilkrts_hyper_create;
|
||||
__cilkrts_hyper_destroy;
|
||||
__cilkrts_hyper_lookup;
|
||||
__cilkrts_hyperobject_alloc;
|
||||
__cilkrts_hyperobject_dealloc;
|
||||
__cilkrts_hyperobject_noop_destroy;
|
||||
__cilkrts_init;
|
||||
__cilkrts_irml_version;
|
||||
__cilkrts_leave_frame;
|
||||
__cilkrts_metacall;
|
||||
__cilkrts_rethrow;
|
||||
__cilkrts_return_exception;
|
||||
__cilkrts_set_param;
|
||||
__cilkrts_sync;
|
||||
__cilkrts_synched;
|
||||
__cilkrts_worker_stub;
|
||||
local: *;
|
||||
};
|
||||
|
||||
CILKABI1
|
||||
{
|
||||
global:
|
||||
__cilkrts_bind_thread_1;
|
||||
__cilkrts_bump_loop_rank;
|
||||
__cilkrts_bump_loop_rank_internal;
|
||||
__cilkrts_bump_worker_rank;
|
||||
__cilkrts_bump_worker_rank_internal;
|
||||
__cilkrts_enter_frame_1;
|
||||
__cilkrts_enter_frame_fast_1;
|
||||
__cilkrts_get_pedigree_info;
|
||||
__cilkrts_get_pedigree_internal;
|
||||
__cilkrts_get_sf;
|
||||
__cilkrts_get_stack_size;
|
||||
__cilkrts_get_worker_rank;
|
||||
__cilkrts_save_fp_ctrl_state;
|
||||
__cilkrts_stack_alloc;
|
||||
__cilkrts_stack_free;
|
||||
__cilkrts_watch_stack;
|
||||
} CILKABI0;
|
||||
|
||||
CILKLIB1.02
|
||||
{
|
||||
global:
|
||||
cilk_c_reducer_max_identity_char;
|
||||
cilk_c_reducer_max_identity_double;
|
||||
cilk_c_reducer_max_identity_float;
|
||||
cilk_c_reducer_max_identity_int;
|
||||
cilk_c_reducer_max_identity_long;
|
||||
cilk_c_reducer_max_identity_longdouble;
|
||||
cilk_c_reducer_max_identity_longlong;
|
||||
cilk_c_reducer_max_identity_schar;
|
||||
cilk_c_reducer_max_identity_short;
|
||||
cilk_c_reducer_max_identity_uchar;
|
||||
cilk_c_reducer_max_identity_uint;
|
||||
cilk_c_reducer_max_identity_ulong;
|
||||
cilk_c_reducer_max_identity_ulonglong;
|
||||
cilk_c_reducer_max_identity_unsigned;
|
||||
cilk_c_reducer_max_identity_ushort;
|
||||
cilk_c_reducer_max_identity_wchar_t;
|
||||
cilk_c_reducer_max_index_identity_char;
|
||||
cilk_c_reducer_max_index_identity_double;
|
||||
cilk_c_reducer_max_index_identity_float;
|
||||
cilk_c_reducer_max_index_identity_int;
|
||||
cilk_c_reducer_max_index_identity_long;
|
||||
cilk_c_reducer_max_index_identity_longdouble;
|
||||
cilk_c_reducer_max_index_identity_longlong;
|
||||
cilk_c_reducer_max_index_identity_schar;
|
||||
cilk_c_reducer_max_index_identity_short;
|
||||
cilk_c_reducer_max_index_identity_uchar;
|
||||
cilk_c_reducer_max_index_identity_uint;
|
||||
cilk_c_reducer_max_index_identity_ulong;
|
||||
cilk_c_reducer_max_index_identity_ulonglong;
|
||||
cilk_c_reducer_max_index_identity_unsigned;
|
||||
cilk_c_reducer_max_index_identity_ushort;
|
||||
cilk_c_reducer_max_index_identity_wchar_t;
|
||||
cilk_c_reducer_max_index_reduce_char;
|
||||
cilk_c_reducer_max_index_reduce_double;
|
||||
cilk_c_reducer_max_index_reduce_float;
|
||||
cilk_c_reducer_max_index_reduce_int;
|
||||
cilk_c_reducer_max_index_reduce_long;
|
||||
cilk_c_reducer_max_index_reduce_longdouble;
|
||||
cilk_c_reducer_max_index_reduce_longlong;
|
||||
cilk_c_reducer_max_index_reduce_schar;
|
||||
cilk_c_reducer_max_index_reduce_short;
|
||||
cilk_c_reducer_max_index_reduce_uchar;
|
||||
cilk_c_reducer_max_index_reduce_uint;
|
||||
cilk_c_reducer_max_index_reduce_ulong;
|
||||
cilk_c_reducer_max_index_reduce_ulonglong;
|
||||
cilk_c_reducer_max_index_reduce_unsigned;
|
||||
cilk_c_reducer_max_index_reduce_ushort;
|
||||
cilk_c_reducer_max_index_reduce_wchar_t;
|
||||
cilk_c_reducer_max_reduce_char;
|
||||
cilk_c_reducer_max_reduce_double;
|
||||
cilk_c_reducer_max_reduce_float;
|
||||
cilk_c_reducer_max_reduce_int;
|
||||
cilk_c_reducer_max_reduce_long;
|
||||
cilk_c_reducer_max_reduce_longdouble;
|
||||
cilk_c_reducer_max_reduce_longlong;
|
||||
cilk_c_reducer_max_reduce_schar;
|
||||
cilk_c_reducer_max_reduce_short;
|
||||
cilk_c_reducer_max_reduce_uchar;
|
||||
cilk_c_reducer_max_reduce_uint;
|
||||
cilk_c_reducer_max_reduce_ulong;
|
||||
cilk_c_reducer_max_reduce_ulonglong;
|
||||
cilk_c_reducer_max_reduce_unsigned;
|
||||
cilk_c_reducer_max_reduce_ushort;
|
||||
cilk_c_reducer_max_reduce_wchar_t;
|
||||
cilk_c_reducer_min_identity_char;
|
||||
cilk_c_reducer_min_identity_double;
|
||||
cilk_c_reducer_min_identity_float;
|
||||
cilk_c_reducer_min_identity_int;
|
||||
cilk_c_reducer_min_identity_long;
|
||||
cilk_c_reducer_min_identity_longdouble;
|
||||
cilk_c_reducer_min_identity_longlong;
|
||||
cilk_c_reducer_min_identity_schar;
|
||||
cilk_c_reducer_min_identity_short;
|
||||
cilk_c_reducer_min_identity_uchar;
|
||||
cilk_c_reducer_min_identity_uint;
|
||||
cilk_c_reducer_min_identity_ulong;
|
||||
cilk_c_reducer_min_identity_ulonglong;
|
||||
cilk_c_reducer_min_identity_unsigned;
|
||||
cilk_c_reducer_min_identity_ushort;
|
||||
cilk_c_reducer_min_identity_wchar_t;
|
||||
cilk_c_reducer_min_index_identity_char;
|
||||
cilk_c_reducer_min_index_identity_double;
|
||||
cilk_c_reducer_min_index_identity_float;
|
||||
cilk_c_reducer_min_index_identity_int;
|
||||
cilk_c_reducer_min_index_identity_long;
|
||||
cilk_c_reducer_min_index_identity_longdouble;
|
||||
cilk_c_reducer_min_index_identity_longlong;
|
||||
cilk_c_reducer_min_index_identity_schar;
|
||||
cilk_c_reducer_min_index_identity_short;
|
||||
cilk_c_reducer_min_index_identity_uchar;
|
||||
cilk_c_reducer_min_index_identity_uint;
|
||||
cilk_c_reducer_min_index_identity_ulong;
|
||||
cilk_c_reducer_min_index_identity_ulonglong;
|
||||
cilk_c_reducer_min_index_identity_unsigned;
|
||||
cilk_c_reducer_min_index_identity_ushort;
|
||||
cilk_c_reducer_min_index_identity_wchar_t;
|
||||
cilk_c_reducer_min_index_reduce_char;
|
||||
cilk_c_reducer_min_index_reduce_double;
|
||||
cilk_c_reducer_min_index_reduce_float;
|
||||
cilk_c_reducer_min_index_reduce_int;
|
||||
cilk_c_reducer_min_index_reduce_long;
|
||||
cilk_c_reducer_min_index_reduce_longdouble;
|
||||
cilk_c_reducer_min_index_reduce_longlong;
|
||||
cilk_c_reducer_min_index_reduce_schar;
|
||||
cilk_c_reducer_min_index_reduce_short;
|
||||
cilk_c_reducer_min_index_reduce_uchar;
|
||||
cilk_c_reducer_min_index_reduce_uint;
|
||||
cilk_c_reducer_min_index_reduce_ulong;
|
||||
cilk_c_reducer_min_index_reduce_ulonglong;
|
||||
cilk_c_reducer_min_index_reduce_unsigned;
|
||||
cilk_c_reducer_min_index_reduce_ushort;
|
||||
cilk_c_reducer_min_index_reduce_wchar_t;
|
||||
cilk_c_reducer_min_reduce_char;
|
||||
cilk_c_reducer_min_reduce_double;
|
||||
cilk_c_reducer_min_reduce_float;
|
||||
cilk_c_reducer_min_reduce_int;
|
||||
cilk_c_reducer_min_reduce_long;
|
||||
cilk_c_reducer_min_reduce_longdouble;
|
||||
cilk_c_reducer_min_reduce_longlong;
|
||||
cilk_c_reducer_min_reduce_schar;
|
||||
cilk_c_reducer_min_reduce_short;
|
||||
cilk_c_reducer_min_reduce_uchar;
|
||||
cilk_c_reducer_min_reduce_uint;
|
||||
cilk_c_reducer_min_reduce_ulong;
|
||||
cilk_c_reducer_min_reduce_ulonglong;
|
||||
cilk_c_reducer_min_reduce_unsigned;
|
||||
cilk_c_reducer_min_reduce_ushort;
|
||||
cilk_c_reducer_min_reduce_wchar_t;
|
||||
cilk_c_reducer_opadd_identity_char;
|
||||
cilk_c_reducer_opadd_identity_double;
|
||||
cilk_c_reducer_opadd_identity_float;
|
||||
cilk_c_reducer_opadd_identity_int;
|
||||
cilk_c_reducer_opadd_identity_long;
|
||||
cilk_c_reducer_opadd_identity_longdouble;
|
||||
cilk_c_reducer_opadd_identity_longlong;
|
||||
cilk_c_reducer_opadd_identity_schar;
|
||||
cilk_c_reducer_opadd_identity_short;
|
||||
cilk_c_reducer_opadd_identity_uchar;
|
||||
cilk_c_reducer_opadd_identity_uint;
|
||||
cilk_c_reducer_opadd_identity_ulong;
|
||||
cilk_c_reducer_opadd_identity_ulonglong;
|
||||
cilk_c_reducer_opadd_identity_unsigned;
|
||||
cilk_c_reducer_opadd_identity_ushort;
|
||||
cilk_c_reducer_opadd_identity_wchar_t;
|
||||
cilk_c_reducer_opadd_reduce_char;
|
||||
cilk_c_reducer_opadd_reduce_double;
|
||||
cilk_c_reducer_opadd_reduce_float;
|
||||
cilk_c_reducer_opadd_reduce_int;
|
||||
cilk_c_reducer_opadd_reduce_long;
|
||||
cilk_c_reducer_opadd_reduce_longdouble;
|
||||
cilk_c_reducer_opadd_reduce_longlong;
|
||||
cilk_c_reducer_opadd_reduce_schar;
|
||||
cilk_c_reducer_opadd_reduce_short;
|
||||
cilk_c_reducer_opadd_reduce_uchar;
|
||||
cilk_c_reducer_opadd_reduce_uint;
|
||||
cilk_c_reducer_opadd_reduce_ulong;
|
||||
cilk_c_reducer_opadd_reduce_ulonglong;
|
||||
cilk_c_reducer_opadd_reduce_unsigned;
|
||||
cilk_c_reducer_opadd_reduce_ushort;
|
||||
cilk_c_reducer_opadd_reduce_wchar_t;
|
||||
cilk_c_reducer_opand_identity_char;
|
||||
cilk_c_reducer_opand_identity_int;
|
||||
cilk_c_reducer_opand_identity_long;
|
||||
cilk_c_reducer_opand_identity_longlong;
|
||||
cilk_c_reducer_opand_identity_schar;
|
||||
cilk_c_reducer_opand_identity_short;
|
||||
cilk_c_reducer_opand_identity_uchar;
|
||||
cilk_c_reducer_opand_identity_uint;
|
||||
cilk_c_reducer_opand_identity_ulong;
|
||||
cilk_c_reducer_opand_identity_ulonglong;
|
||||
cilk_c_reducer_opand_identity_unsigned;
|
||||
cilk_c_reducer_opand_identity_ushort;
|
||||
cilk_c_reducer_opand_identity_wchar_t;
|
||||
cilk_c_reducer_opand_reduce_char;
|
||||
cilk_c_reducer_opand_reduce_int;
|
||||
cilk_c_reducer_opand_reduce_long;
|
||||
cilk_c_reducer_opand_reduce_longlong;
|
||||
cilk_c_reducer_opand_reduce_schar;
|
||||
cilk_c_reducer_opand_reduce_short;
|
||||
cilk_c_reducer_opand_reduce_uchar;
|
||||
cilk_c_reducer_opand_reduce_uint;
|
||||
cilk_c_reducer_opand_reduce_ulong;
|
||||
cilk_c_reducer_opand_reduce_ulonglong;
|
||||
cilk_c_reducer_opand_reduce_unsigned;
|
||||
cilk_c_reducer_opand_reduce_ushort;
|
||||
cilk_c_reducer_opand_reduce_wchar_t;
|
||||
cilk_c_reducer_opmul_identity_char;
|
||||
cilk_c_reducer_opmul_identity_double;
|
||||
cilk_c_reducer_opmul_identity_float;
|
||||
cilk_c_reducer_opmul_identity_int;
|
||||
cilk_c_reducer_opmul_identity_long;
|
||||
cilk_c_reducer_opmul_identity_longdouble;
|
||||
cilk_c_reducer_opmul_identity_longlong;
|
||||
cilk_c_reducer_opmul_identity_schar;
|
||||
cilk_c_reducer_opmul_identity_short;
|
||||
cilk_c_reducer_opmul_identity_uchar;
|
||||
cilk_c_reducer_opmul_identity_uint;
|
||||
cilk_c_reducer_opmul_identity_ulong;
|
||||
cilk_c_reducer_opmul_identity_ulonglong;
|
||||
cilk_c_reducer_opmul_identity_unsigned;
|
||||
cilk_c_reducer_opmul_identity_ushort;
|
||||
cilk_c_reducer_opmul_identity_wchar_t;
|
||||
cilk_c_reducer_opmul_reduce_char;
|
||||
cilk_c_reducer_opmul_reduce_double;
|
||||
cilk_c_reducer_opmul_reduce_float;
|
||||
cilk_c_reducer_opmul_reduce_int;
|
||||
cilk_c_reducer_opmul_reduce_long;
|
||||
cilk_c_reducer_opmul_reduce_longdouble;
|
||||
cilk_c_reducer_opmul_reduce_longlong;
|
||||
cilk_c_reducer_opmul_reduce_schar;
|
||||
cilk_c_reducer_opmul_reduce_short;
|
||||
cilk_c_reducer_opmul_reduce_uchar;
|
||||
cilk_c_reducer_opmul_reduce_uint;
|
||||
cilk_c_reducer_opmul_reduce_ulong;
|
||||
cilk_c_reducer_opmul_reduce_ulonglong;
|
||||
cilk_c_reducer_opmul_reduce_unsigned;
|
||||
cilk_c_reducer_opmul_reduce_ushort;
|
||||
cilk_c_reducer_opmul_reduce_wchar_t;
|
||||
cilk_c_reducer_opor_identity_char;
|
||||
cilk_c_reducer_opor_identity_int;
|
||||
cilk_c_reducer_opor_identity_long;
|
||||
cilk_c_reducer_opor_identity_longlong;
|
||||
cilk_c_reducer_opor_identity_schar;
|
||||
cilk_c_reducer_opor_identity_short;
|
||||
cilk_c_reducer_opor_identity_uchar;
|
||||
cilk_c_reducer_opor_identity_uint;
|
||||
cilk_c_reducer_opor_identity_ulong;
|
||||
cilk_c_reducer_opor_identity_ulonglong;
|
||||
cilk_c_reducer_opor_identity_unsigned;
|
||||
cilk_c_reducer_opor_identity_ushort;
|
||||
cilk_c_reducer_opor_identity_wchar_t;
|
||||
cilk_c_reducer_opor_reduce_char;
|
||||
cilk_c_reducer_opor_reduce_int;
|
||||
cilk_c_reducer_opor_reduce_long;
|
||||
cilk_c_reducer_opor_reduce_longlong;
|
||||
cilk_c_reducer_opor_reduce_schar;
|
||||
cilk_c_reducer_opor_reduce_short;
|
||||
cilk_c_reducer_opor_reduce_uchar;
|
||||
cilk_c_reducer_opor_reduce_uint;
|
||||
cilk_c_reducer_opor_reduce_ulong;
|
||||
cilk_c_reducer_opor_reduce_ulonglong;
|
||||
cilk_c_reducer_opor_reduce_unsigned;
|
||||
cilk_c_reducer_opor_reduce_ushort;
|
||||
cilk_c_reducer_opor_reduce_wchar_t;
|
||||
cilk_c_reducer_opxor_identity_char;
|
||||
cilk_c_reducer_opxor_identity_int;
|
||||
cilk_c_reducer_opxor_identity_long;
|
||||
cilk_c_reducer_opxor_identity_longlong;
|
||||
cilk_c_reducer_opxor_identity_schar;
|
||||
cilk_c_reducer_opxor_identity_short;
|
||||
cilk_c_reducer_opxor_identity_uchar;
|
||||
cilk_c_reducer_opxor_identity_uint;
|
||||
cilk_c_reducer_opxor_identity_ulong;
|
||||
cilk_c_reducer_opxor_identity_ulonglong;
|
||||
cilk_c_reducer_opxor_identity_unsigned;
|
||||
cilk_c_reducer_opxor_identity_ushort;
|
||||
cilk_c_reducer_opxor_identity_wchar_t;
|
||||
cilk_c_reducer_opxor_reduce_char;
|
||||
cilk_c_reducer_opxor_reduce_int;
|
||||
cilk_c_reducer_opxor_reduce_long;
|
||||
cilk_c_reducer_opxor_reduce_longlong;
|
||||
cilk_c_reducer_opxor_reduce_schar;
|
||||
cilk_c_reducer_opxor_reduce_short;
|
||||
cilk_c_reducer_opxor_reduce_uchar;
|
||||
cilk_c_reducer_opxor_reduce_uint;
|
||||
cilk_c_reducer_opxor_reduce_ulong;
|
||||
cilk_c_reducer_opxor_reduce_ulonglong;
|
||||
cilk_c_reducer_opxor_reduce_unsigned;
|
||||
cilk_c_reducer_opxor_reduce_ushort;
|
||||
cilk_c_reducer_opxor_reduce_wchar_t;
|
||||
};
|
68
libcilkrts/runtime/local_state.c
Normal file
68
libcilkrts/runtime/local_state.c
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* local_state.c -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "local_state.h"
|
||||
#include "bug.h"
|
||||
#include "full_frame.h"
|
||||
|
||||
void run_scheduling_stack_fcn(__cilkrts_worker *w)
|
||||
{
|
||||
scheduling_stack_fcn_t fcn = w->l->post_suspend;
|
||||
full_frame *ff2 = w->l->frame_ff;
|
||||
__cilkrts_stack_frame *sf2 = w->l->suspended_stack;
|
||||
|
||||
w->l->post_suspend = 0;
|
||||
w->l->suspended_stack = 0;
|
||||
|
||||
// Conceptually, after clearing w->l->frame_ff,
|
||||
// w no longer owns the full frame ff.
|
||||
// The next time another (possibly different) worker takes
|
||||
// ownership of ff will be at a provably_good_steal on ff.
|
||||
w->l->frame_ff = NULL;
|
||||
|
||||
CILK_ASSERT(fcn);
|
||||
CILK_ASSERT(ff2);
|
||||
fcn(w, ff2, sf2);
|
||||
|
||||
// After we run the scheduling stack function, we shouldn't
|
||||
// (still) not have a full frame.
|
||||
CILK_ASSERT(NULL == w->l->frame_ff);
|
||||
}
|
||||
|
||||
/* End local_state.c */
|
424
libcilkrts/runtime/local_state.h
Normal file
424
libcilkrts/runtime/local_state.h
Normal file
|
@ -0,0 +1,424 @@
|
|||
/* local_state.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file local_state.h
|
||||
*
|
||||
* @brief The local_state structure contains additional OS-independent
|
||||
* information that's associated with a worker, but doesn't need to be visible
|
||||
* to the code generated by the compiler.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_LOCAL_STATE_DOT_H
|
||||
#define INCLUDED_LOCAL_STATE_DOT_H
|
||||
|
||||
#include <internal/abi.h>
|
||||
#include "worker_mutex.h"
|
||||
#include "global_state.h"
|
||||
#include "record-replay.h"
|
||||
#include "signal_node.h"
|
||||
|
||||
#include <setjmp.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#ifndef _WIN32
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/* Opaque types. */
|
||||
|
||||
struct full_frame;
|
||||
struct free_list;
|
||||
struct pending_exception_info;
|
||||
/// Opaque type for replay entry.
|
||||
typedef struct replay_entry_t replay_entry_t;
|
||||
|
||||
/**
|
||||
* @brief Magic numbers for local_state, used for debugging
|
||||
*/
|
||||
typedef unsigned long long ls_magic_t;
|
||||
|
||||
/**
|
||||
* @brief Scheduling stack function: A function that is decided on the program stack,
|
||||
* but that must be executed on the scheduling stack.
|
||||
*/
|
||||
typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w,
|
||||
struct full_frame *ff,
|
||||
__cilkrts_stack_frame *sf);
|
||||
|
||||
/**
|
||||
* @brief Type of this worker.
|
||||
**/
|
||||
typedef enum cilk_worker_type
|
||||
{
|
||||
WORKER_FREE, ///< Unused worker - available to be bound to user threads
|
||||
WORKER_SYSTEM, ///< Worker created by runtime - able to steal from any worker
|
||||
WORKER_USER ///< User thread - able to steal only from team members
|
||||
} cilk_worker_type;
|
||||
|
||||
|
||||
/**
|
||||
* @brief The local_state structure contains additional OS-independent
|
||||
* information that's associated with a worker, but doesn't need to be
|
||||
* visible to the compiler.
|
||||
*
|
||||
* No compiler-generated code should need to know the layout of this
|
||||
* structure.
|
||||
*
|
||||
* The fields of this struct can be classified as either local or
|
||||
* shared.
|
||||
*
|
||||
* Local: This field is only accessed by the thread bound to this
|
||||
* worker struct. Local fields can be freely accessed without
|
||||
* acquiring locks.
|
||||
*
|
||||
* Shared: This field may be accessed by multiple worker threads.
|
||||
* Accesses to shared fields usually requires locks, except in
|
||||
* special situations where one can prove that locks are
|
||||
* unnecessary.
|
||||
*
|
||||
* The fields of this can also be classified as "read-only" if the
|
||||
* field does not change after it is initialized. Otherwise, the
|
||||
* field is "read/write". Read-only fields do not require locks to
|
||||
* access (ignoring the synchronization that might be needed for
|
||||
* initialization if this can occur in parallel).
|
||||
*
|
||||
* Finally, we explicitly classify some fields as "synchronization"
|
||||
* fields if they are used as part of a synchronization protocol in
|
||||
* the runtime. These variables are generally shared and read/write.
|
||||
* Mostly, this category includes lock variables and other variables
|
||||
* that are involved in synchronization protocols (i.e., the THE
|
||||
* protocol).
|
||||
*/
|
||||
struct local_state /* COMMON_PORTABLE */
|
||||
{
|
||||
/** This value should be in the first field in any local_state */
|
||||
# define WORKER_MAGIC_0 ((ls_magic_t)0xe0831a4a940c60b8ULL)
|
||||
|
||||
/**
|
||||
* Should be WORKER_MAGIC_0 or the local_state has been corrupted
|
||||
* This magic field is shared because it is read on lock acquisitions.
|
||||
*
|
||||
* [shared read-only]
|
||||
*/
|
||||
ls_magic_t worker_magic_0;
|
||||
|
||||
/**
|
||||
* Mutex used to serialize access to the local_state
|
||||
* Synchronization field. [shared read/write]
|
||||
*/
|
||||
struct mutex lock;
|
||||
|
||||
/**
|
||||
* Flag that indicates that the worker is interested in grabbing
|
||||
* LOCK, and thus thieves should leave the worker alone.
|
||||
* Written only by self, may be read by others.
|
||||
*
|
||||
* Synchronization field. [shared read/write]
|
||||
*/
|
||||
int do_not_steal;
|
||||
|
||||
/**
|
||||
* Lock that all thieves grab in order to compete for the right
|
||||
* to disturb this worker.
|
||||
*
|
||||
* Synchronization field. [shared read/write]
|
||||
*/
|
||||
struct mutex steal_lock;
|
||||
|
||||
/**
|
||||
* Full frame that the worker is working on.
|
||||
*
|
||||
* While a worker w is executing, a thief may change
|
||||
* w->l->frame_ff (on a successful steal) after acquiring w's
|
||||
* lock.
|
||||
*
|
||||
* Unlocked accesses to w->l->frame_ff are safe (by w itself) when
|
||||
* w's deque is empty, or when stealing from w has been disabled.
|
||||
*
|
||||
* [shared read/write]
|
||||
*/
|
||||
struct full_frame *frame_ff;
|
||||
|
||||
/**
|
||||
* Full frame that the worker will be working on next
|
||||
*
|
||||
* This field is normally local for a worker w. Another worker v
|
||||
* may modify w->l->next_frame_ff, however, in the special case
|
||||
* when v is returning a frame to a user thread w since w is the
|
||||
* team leader.
|
||||
*
|
||||
* [shared read/write]
|
||||
*/
|
||||
struct full_frame *next_frame_ff;
|
||||
|
||||
/**
|
||||
* This is set iff this is a WORKER_USER and there has been a steal. It
|
||||
* points to the first frame that was stolen since the team was last fully
|
||||
* sync'd. Only this worker may continue past a sync in this function.
|
||||
*
|
||||
* This field is set by a thief for a victim that is a user
|
||||
* thread, while holding the victim's lock.
|
||||
* It can be cleared without a lock by the worker that will
|
||||
* continue exuecting past the sync.
|
||||
*
|
||||
* [shared read/write]
|
||||
*/
|
||||
struct full_frame *last_full_frame;
|
||||
|
||||
/**
|
||||
* Team on which this worker is a participant. When a user worker enters,
|
||||
* its team is its own worker struct and it can never change teams. When a
|
||||
* system worker steals, it adopts the team of its victim.
|
||||
*
|
||||
* When a system worker w steals, it reads victim->l->team and
|
||||
* joins this team. w->l->team is constant until the next time w
|
||||
* returns control to the runtime.
|
||||
* We must acquire the worker lock to change w->l->team.
|
||||
*
|
||||
* @note This field is 64-byte aligned because it is the first in
|
||||
* the group of shared read-only fields. We want this group to
|
||||
* fall on a different cache line from the previous group, which
|
||||
* is shared read-write.
|
||||
*
|
||||
* [shared read-only]
|
||||
*/
|
||||
__attribute__((aligned(64)))
|
||||
__cilkrts_worker *team;
|
||||
|
||||
/**
|
||||
* Type of this worker
|
||||
*
|
||||
* This field changes only when a worker binds or unbinds.
|
||||
* Otherwise, the field is read-only while the worker is bound.
|
||||
*
|
||||
* [shared read-only]
|
||||
*/
|
||||
cilk_worker_type type;
|
||||
|
||||
/**
|
||||
* Lazy task queue of this worker - an array of pointers to stack frames.
|
||||
*
|
||||
* Read-only because deques are a fixed size in the current
|
||||
* implementation.
|
||||
*
|
||||
* @note This field is 64-byte aligned because it is the first in
|
||||
* the group of local fields. We want this group to fall on a
|
||||
* different cache line from the previous group, which is shared
|
||||
* read-only.
|
||||
*
|
||||
* [local read-only]
|
||||
*/
|
||||
__attribute__((aligned(64)))
|
||||
__cilkrts_stack_frame **ltq;
|
||||
|
||||
/**
|
||||
* Pool of fibers waiting to be reused.
|
||||
* [local read/write]
|
||||
*/
|
||||
cilk_fiber_pool fiber_pool;
|
||||
|
||||
/**
|
||||
* The fiber for the scheduling stacks.
|
||||
* [local read/write]
|
||||
*/
|
||||
cilk_fiber* scheduling_fiber;
|
||||
|
||||
/**
|
||||
* Saved pointer to the leaf node in thread-local storage, when a
|
||||
* user thread is imported. This pointer gets set to a
|
||||
* meaningful value when binding a user thread, and cleared on
|
||||
* unbind.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
__cilkrts_pedigree* original_pedigree_leaf;
|
||||
|
||||
/**
|
||||
* State of the random number generator
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
unsigned rand_seed;
|
||||
|
||||
/**
|
||||
* Function to execute after transferring onto the scheduling stack.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
scheduling_stack_fcn_t post_suspend;
|
||||
|
||||
/**
|
||||
* __cilkrts_stack_frame we suspended when we transferred onto the
|
||||
* scheduling stack.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
__cilkrts_stack_frame *suspended_stack;
|
||||
|
||||
/**
|
||||
* cilk_fiber that should be freed after returning from a
|
||||
* spawn with a stolen parent or after stalling at a sync.
|
||||
|
||||
* We calculate the stack to free when executing a reduction on
|
||||
* the user stack, but we can not actually release the stack
|
||||
* until control longjmps onto a runtime scheduling stack.
|
||||
*
|
||||
* This field is used to pass information to the runtime across
|
||||
* the longjmp onto the scheduling stack.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
cilk_fiber* fiber_to_free;
|
||||
|
||||
/**
|
||||
* Saved exception object for an exception that is being passed to
|
||||
* our parent
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
struct pending_exception_info *pending_exception;
|
||||
|
||||
/**
|
||||
* Buckets for the memory allocator
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
struct free_list *free_list[FRAME_MALLOC_NBUCKETS];
|
||||
|
||||
/**
|
||||
* Potential function for the memory allocator
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
size_t bucket_potential[FRAME_MALLOC_NBUCKETS];
|
||||
|
||||
/**
|
||||
* Support for statistics
|
||||
*
|
||||
* Useful only when CILK_PROFIlE is compiled in.
|
||||
* [local read/write]
|
||||
*/
|
||||
statistics* stats;
|
||||
|
||||
/**
|
||||
* Count indicates number of failures since last successful steal. This is
|
||||
* used by the scheduler to reduce contention on shared flags.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
unsigned int steal_failure_count;
|
||||
|
||||
/**
|
||||
* 1 if work was stolen from another worker. When true, this will flag
|
||||
* setup_for_execution_pedigree to increment the pedigree when we resume
|
||||
* execution to match the increment that would have been done on a return
|
||||
* from a spawn helper.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
int work_stolen;
|
||||
|
||||
/**
|
||||
* File pointer for record or replay
|
||||
* Does FILE * work on Windows?
|
||||
* During record, the file will be opened in write-only mode.
|
||||
* During replay, the file will be opened in read-only mode.
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
FILE *record_replay_fptr;
|
||||
|
||||
/**
|
||||
* Root of array of replay entries - NULL if we're not replaying a log
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
replay_entry_t *replay_list_root;
|
||||
|
||||
/**
|
||||
* Current replay entry - NULL if we're not replaying a log
|
||||
*
|
||||
* [local read/write]
|
||||
*/
|
||||
replay_entry_t *replay_list_entry;
|
||||
|
||||
/**
|
||||
* Separate the signal_node from other things in the local_state by the
|
||||
* sizeof a cache line for performance reasons.
|
||||
*
|
||||
* unused
|
||||
*/
|
||||
char buf[64];
|
||||
|
||||
/**
|
||||
* Signal object for waking/sleeping the worker. This should be a pointer
|
||||
* to avoid the possibility of caching problems.
|
||||
*
|
||||
* [shared read-only]
|
||||
*/
|
||||
signal_node_t *signal_node;
|
||||
|
||||
/** This value should be in the last field in any local_state */
|
||||
# define WORKER_MAGIC_1 ((ls_magic_t)0x16164afb0ea0dff9ULL)
|
||||
|
||||
/**
|
||||
* Should be WORKER_MAGIC_1 or the local_state has been corrupted
|
||||
* This magic field is shared because it is read on lock acquisitions.
|
||||
* [shared read-only]
|
||||
*/
|
||||
ls_magic_t worker_magic_1;
|
||||
};
|
||||
|
||||
/**
|
||||
* Perform cleanup according to the function set before the longjmp().
|
||||
*
|
||||
* Call this after longjmp() has completed and the worker is back on a
|
||||
* scheduling stack.
|
||||
*
|
||||
* @param w __cilkrts_worker currently executing.
|
||||
*/
|
||||
void run_scheduling_stack_fcn(__cilkrts_worker *w);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_LOCAL_STATE_DOT_H)
|
318
libcilkrts/runtime/mac-symbols.txt
Normal file
318
libcilkrts/runtime/mac-symbols.txt
Normal file
|
@ -0,0 +1,318 @@
|
|||
# Exported symbol list:
|
||||
___cilkrts_bind_thread
|
||||
___cilkrts_bind_thread_1
|
||||
___cilkrts_bump_loop_rank
|
||||
___cilkrts_bump_loop_rank_internal
|
||||
___cilkrts_bump_worker_rank
|
||||
___cilkrts_bump_worker_rank_internal
|
||||
___cilkrts_cilk_for_32
|
||||
___cilkrts_cilk_for_64
|
||||
___cilkrts_debugger_notification
|
||||
___cilkrts_dump_stats
|
||||
___cilkrts_end_cilk
|
||||
___cilkrts_enter_frame
|
||||
___cilkrts_enter_frame_1
|
||||
___cilkrts_enter_frame_fast
|
||||
___cilkrts_enter_frame_fast_1
|
||||
___cilkrts_get_force_reduce
|
||||
___cilkrts_get_nworkers
|
||||
___cilkrts_get_pedigree_info
|
||||
___cilkrts_get_pedigree_internal
|
||||
___cilkrts_get_sf
|
||||
___cilkrts_get_stack_size
|
||||
___cilkrts_get_tls_worker
|
||||
___cilkrts_get_tls_worker_fast
|
||||
___cilkrts_get_total_workers
|
||||
___cilkrts_get_worker_number
|
||||
___cilkrts_get_worker_rank
|
||||
___cilkrts_global_state
|
||||
___cilkrts_hyper_create
|
||||
___cilkrts_hyper_destroy
|
||||
___cilkrts_hyper_lookup
|
||||
___cilkrts_hyperobject_alloc
|
||||
___cilkrts_hyperobject_dealloc
|
||||
___cilkrts_hyperobject_noop_destroy
|
||||
___cilkrts_init
|
||||
___cilkrts_irml_version
|
||||
___cilkrts_leave_frame
|
||||
___cilkrts_metacall
|
||||
___cilkrts_rethrow
|
||||
___cilkrts_return_exception
|
||||
___cilkrts_save_fp_ctrl_state
|
||||
___cilkrts_set_param
|
||||
___cilkrts_stack_alloc
|
||||
___cilkrts_stack_free
|
||||
___cilkrts_sync
|
||||
___cilkrts_synched
|
||||
___cilkrts_watch_stack
|
||||
___cilkrts_worker_stub
|
||||
_cilk_c_reducer_max_identity_char
|
||||
_cilk_c_reducer_max_identity_double
|
||||
_cilk_c_reducer_max_identity_float
|
||||
_cilk_c_reducer_max_identity_int
|
||||
_cilk_c_reducer_max_identity_long
|
||||
_cilk_c_reducer_max_identity_longdouble
|
||||
_cilk_c_reducer_max_identity_longlong
|
||||
_cilk_c_reducer_max_identity_schar
|
||||
_cilk_c_reducer_max_identity_short
|
||||
_cilk_c_reducer_max_identity_uchar
|
||||
_cilk_c_reducer_max_identity_uint
|
||||
_cilk_c_reducer_max_identity_ulong
|
||||
_cilk_c_reducer_max_identity_ulonglong
|
||||
_cilk_c_reducer_max_identity_unsigned
|
||||
_cilk_c_reducer_max_identity_ushort
|
||||
_cilk_c_reducer_max_identity_wchar_t
|
||||
_cilk_c_reducer_max_index_identity_char
|
||||
_cilk_c_reducer_max_index_identity_double
|
||||
_cilk_c_reducer_max_index_identity_float
|
||||
_cilk_c_reducer_max_index_identity_int
|
||||
_cilk_c_reducer_max_index_identity_long
|
||||
_cilk_c_reducer_max_index_identity_longdouble
|
||||
_cilk_c_reducer_max_index_identity_longlong
|
||||
_cilk_c_reducer_max_index_identity_schar
|
||||
_cilk_c_reducer_max_index_identity_short
|
||||
_cilk_c_reducer_max_index_identity_uchar
|
||||
_cilk_c_reducer_max_index_identity_uint
|
||||
_cilk_c_reducer_max_index_identity_ulong
|
||||
_cilk_c_reducer_max_index_identity_ulonglong
|
||||
_cilk_c_reducer_max_index_identity_unsigned
|
||||
_cilk_c_reducer_max_index_identity_ushort
|
||||
_cilk_c_reducer_max_index_identity_wchar_t
|
||||
_cilk_c_reducer_max_index_reduce_char
|
||||
_cilk_c_reducer_max_index_reduce_double
|
||||
_cilk_c_reducer_max_index_reduce_float
|
||||
_cilk_c_reducer_max_index_reduce_int
|
||||
_cilk_c_reducer_max_index_reduce_long
|
||||
_cilk_c_reducer_max_index_reduce_longdouble
|
||||
_cilk_c_reducer_max_index_reduce_longlong
|
||||
_cilk_c_reducer_max_index_reduce_schar
|
||||
_cilk_c_reducer_max_index_reduce_short
|
||||
_cilk_c_reducer_max_index_reduce_uchar
|
||||
_cilk_c_reducer_max_index_reduce_uint
|
||||
_cilk_c_reducer_max_index_reduce_ulong
|
||||
_cilk_c_reducer_max_index_reduce_ulonglong
|
||||
_cilk_c_reducer_max_index_reduce_unsigned
|
||||
_cilk_c_reducer_max_index_reduce_ushort
|
||||
_cilk_c_reducer_max_index_reduce_wchar_t
|
||||
_cilk_c_reducer_max_reduce_char
|
||||
_cilk_c_reducer_max_reduce_double
|
||||
_cilk_c_reducer_max_reduce_float
|
||||
_cilk_c_reducer_max_reduce_int
|
||||
_cilk_c_reducer_max_reduce_long
|
||||
_cilk_c_reducer_max_reduce_longdouble
|
||||
_cilk_c_reducer_max_reduce_longlong
|
||||
_cilk_c_reducer_max_reduce_schar
|
||||
_cilk_c_reducer_max_reduce_short
|
||||
_cilk_c_reducer_max_reduce_uchar
|
||||
_cilk_c_reducer_max_reduce_uint
|
||||
_cilk_c_reducer_max_reduce_ulong
|
||||
_cilk_c_reducer_max_reduce_ulonglong
|
||||
_cilk_c_reducer_max_reduce_unsigned
|
||||
_cilk_c_reducer_max_reduce_ushort
|
||||
_cilk_c_reducer_max_reduce_wchar_t
|
||||
_cilk_c_reducer_min_identity_char
|
||||
_cilk_c_reducer_min_identity_double
|
||||
_cilk_c_reducer_min_identity_float
|
||||
_cilk_c_reducer_min_identity_int
|
||||
_cilk_c_reducer_min_identity_long
|
||||
_cilk_c_reducer_min_identity_longdouble
|
||||
_cilk_c_reducer_min_identity_longlong
|
||||
_cilk_c_reducer_min_identity_schar
|
||||
_cilk_c_reducer_min_identity_short
|
||||
_cilk_c_reducer_min_identity_uchar
|
||||
_cilk_c_reducer_min_identity_uint
|
||||
_cilk_c_reducer_min_identity_ulong
|
||||
_cilk_c_reducer_min_identity_ulonglong
|
||||
_cilk_c_reducer_min_identity_unsigned
|
||||
_cilk_c_reducer_min_identity_ushort
|
||||
_cilk_c_reducer_min_identity_wchar_t
|
||||
_cilk_c_reducer_min_index_identity_char
|
||||
_cilk_c_reducer_min_index_identity_double
|
||||
_cilk_c_reducer_min_index_identity_float
|
||||
_cilk_c_reducer_min_index_identity_int
|
||||
_cilk_c_reducer_min_index_identity_long
|
||||
_cilk_c_reducer_min_index_identity_longdouble
|
||||
_cilk_c_reducer_min_index_identity_longlong
|
||||
_cilk_c_reducer_min_index_identity_schar
|
||||
_cilk_c_reducer_min_index_identity_short
|
||||
_cilk_c_reducer_min_index_identity_uchar
|
||||
_cilk_c_reducer_min_index_identity_uint
|
||||
_cilk_c_reducer_min_index_identity_ulong
|
||||
_cilk_c_reducer_min_index_identity_ulonglong
|
||||
_cilk_c_reducer_min_index_identity_unsigned
|
||||
_cilk_c_reducer_min_index_identity_ushort
|
||||
_cilk_c_reducer_min_index_identity_wchar_t
|
||||
_cilk_c_reducer_min_index_reduce_char
|
||||
_cilk_c_reducer_min_index_reduce_double
|
||||
_cilk_c_reducer_min_index_reduce_float
|
||||
_cilk_c_reducer_min_index_reduce_int
|
||||
_cilk_c_reducer_min_index_reduce_long
|
||||
_cilk_c_reducer_min_index_reduce_longdouble
|
||||
_cilk_c_reducer_min_index_reduce_longlong
|
||||
_cilk_c_reducer_min_index_reduce_schar
|
||||
_cilk_c_reducer_min_index_reduce_short
|
||||
_cilk_c_reducer_min_index_reduce_uchar
|
||||
_cilk_c_reducer_min_index_reduce_uint
|
||||
_cilk_c_reducer_min_index_reduce_ulong
|
||||
_cilk_c_reducer_min_index_reduce_ulonglong
|
||||
_cilk_c_reducer_min_index_reduce_unsigned
|
||||
_cilk_c_reducer_min_index_reduce_ushort
|
||||
_cilk_c_reducer_min_index_reduce_wchar_t
|
||||
_cilk_c_reducer_min_reduce_char
|
||||
_cilk_c_reducer_min_reduce_double
|
||||
_cilk_c_reducer_min_reduce_float
|
||||
_cilk_c_reducer_min_reduce_int
|
||||
_cilk_c_reducer_min_reduce_long
|
||||
_cilk_c_reducer_min_reduce_longdouble
|
||||
_cilk_c_reducer_min_reduce_longlong
|
||||
_cilk_c_reducer_min_reduce_schar
|
||||
_cilk_c_reducer_min_reduce_short
|
||||
_cilk_c_reducer_min_reduce_uchar
|
||||
_cilk_c_reducer_min_reduce_uint
|
||||
_cilk_c_reducer_min_reduce_ulong
|
||||
_cilk_c_reducer_min_reduce_ulonglong
|
||||
_cilk_c_reducer_min_reduce_unsigned
|
||||
_cilk_c_reducer_min_reduce_ushort
|
||||
_cilk_c_reducer_min_reduce_wchar_t
|
||||
_cilk_c_reducer_opadd_identity_char
|
||||
_cilk_c_reducer_opadd_identity_double
|
||||
_cilk_c_reducer_opadd_identity_float
|
||||
_cilk_c_reducer_opadd_identity_int
|
||||
_cilk_c_reducer_opadd_identity_long
|
||||
_cilk_c_reducer_opadd_identity_longdouble
|
||||
_cilk_c_reducer_opadd_identity_longlong
|
||||
_cilk_c_reducer_opadd_identity_schar
|
||||
_cilk_c_reducer_opadd_identity_short
|
||||
_cilk_c_reducer_opadd_identity_uchar
|
||||
_cilk_c_reducer_opadd_identity_uint
|
||||
_cilk_c_reducer_opadd_identity_ulong
|
||||
_cilk_c_reducer_opadd_identity_ulonglong
|
||||
_cilk_c_reducer_opadd_identity_unsigned
|
||||
_cilk_c_reducer_opadd_identity_ushort
|
||||
_cilk_c_reducer_opadd_identity_wchar_t
|
||||
_cilk_c_reducer_opadd_reduce_char
|
||||
_cilk_c_reducer_opadd_reduce_double
|
||||
_cilk_c_reducer_opadd_reduce_float
|
||||
_cilk_c_reducer_opadd_reduce_int
|
||||
_cilk_c_reducer_opadd_reduce_long
|
||||
_cilk_c_reducer_opadd_reduce_longdouble
|
||||
_cilk_c_reducer_opadd_reduce_longlong
|
||||
_cilk_c_reducer_opadd_reduce_schar
|
||||
_cilk_c_reducer_opadd_reduce_short
|
||||
_cilk_c_reducer_opadd_reduce_uchar
|
||||
_cilk_c_reducer_opadd_reduce_uint
|
||||
_cilk_c_reducer_opadd_reduce_ulong
|
||||
_cilk_c_reducer_opadd_reduce_ulonglong
|
||||
_cilk_c_reducer_opadd_reduce_unsigned
|
||||
_cilk_c_reducer_opadd_reduce_ushort
|
||||
_cilk_c_reducer_opadd_reduce_wchar_t
|
||||
_cilk_c_reducer_opand_identity_char
|
||||
_cilk_c_reducer_opand_identity_int
|
||||
_cilk_c_reducer_opand_identity_long
|
||||
_cilk_c_reducer_opand_identity_longlong
|
||||
_cilk_c_reducer_opand_identity_schar
|
||||
_cilk_c_reducer_opand_identity_short
|
||||
_cilk_c_reducer_opand_identity_uchar
|
||||
_cilk_c_reducer_opand_identity_uint
|
||||
_cilk_c_reducer_opand_identity_ulong
|
||||
_cilk_c_reducer_opand_identity_ulonglong
|
||||
_cilk_c_reducer_opand_identity_unsigned
|
||||
_cilk_c_reducer_opand_identity_ushort
|
||||
_cilk_c_reducer_opand_identity_wchar_t
|
||||
_cilk_c_reducer_opand_reduce_char
|
||||
_cilk_c_reducer_opand_reduce_int
|
||||
_cilk_c_reducer_opand_reduce_long
|
||||
_cilk_c_reducer_opand_reduce_longlong
|
||||
_cilk_c_reducer_opand_reduce_schar
|
||||
_cilk_c_reducer_opand_reduce_short
|
||||
_cilk_c_reducer_opand_reduce_uchar
|
||||
_cilk_c_reducer_opand_reduce_uint
|
||||
_cilk_c_reducer_opand_reduce_ulong
|
||||
_cilk_c_reducer_opand_reduce_ulonglong
|
||||
_cilk_c_reducer_opand_reduce_unsigned
|
||||
_cilk_c_reducer_opand_reduce_ushort
|
||||
_cilk_c_reducer_opand_reduce_wchar_t
|
||||
_cilk_c_reducer_opmul_identity_char
|
||||
_cilk_c_reducer_opmul_identity_double
|
||||
_cilk_c_reducer_opmul_identity_float
|
||||
_cilk_c_reducer_opmul_identity_int
|
||||
_cilk_c_reducer_opmul_identity_long
|
||||
_cilk_c_reducer_opmul_identity_longdouble
|
||||
_cilk_c_reducer_opmul_identity_longlong
|
||||
_cilk_c_reducer_opmul_identity_schar
|
||||
_cilk_c_reducer_opmul_identity_short
|
||||
_cilk_c_reducer_opmul_identity_uchar
|
||||
_cilk_c_reducer_opmul_identity_uint
|
||||
_cilk_c_reducer_opmul_identity_ulong
|
||||
_cilk_c_reducer_opmul_identity_ulonglong
|
||||
_cilk_c_reducer_opmul_identity_unsigned
|
||||
_cilk_c_reducer_opmul_identity_ushort
|
||||
_cilk_c_reducer_opmul_identity_wchar_t
|
||||
_cilk_c_reducer_opmul_reduce_char
|
||||
_cilk_c_reducer_opmul_reduce_double
|
||||
_cilk_c_reducer_opmul_reduce_float
|
||||
_cilk_c_reducer_opmul_reduce_int
|
||||
_cilk_c_reducer_opmul_reduce_long
|
||||
_cilk_c_reducer_opmul_reduce_longdouble
|
||||
_cilk_c_reducer_opmul_reduce_longlong
|
||||
_cilk_c_reducer_opmul_reduce_schar
|
||||
_cilk_c_reducer_opmul_reduce_short
|
||||
_cilk_c_reducer_opmul_reduce_uchar
|
||||
_cilk_c_reducer_opmul_reduce_uint
|
||||
_cilk_c_reducer_opmul_reduce_ulong
|
||||
_cilk_c_reducer_opmul_reduce_ulonglong
|
||||
_cilk_c_reducer_opmul_reduce_unsigned
|
||||
_cilk_c_reducer_opmul_reduce_ushort
|
||||
_cilk_c_reducer_opmul_reduce_wchar_t
|
||||
_cilk_c_reducer_opor_identity_char
|
||||
_cilk_c_reducer_opor_identity_int
|
||||
_cilk_c_reducer_opor_identity_long
|
||||
_cilk_c_reducer_opor_identity_longlong
|
||||
_cilk_c_reducer_opor_identity_schar
|
||||
_cilk_c_reducer_opor_identity_short
|
||||
_cilk_c_reducer_opor_identity_uchar
|
||||
_cilk_c_reducer_opor_identity_uint
|
||||
_cilk_c_reducer_opor_identity_ulong
|
||||
_cilk_c_reducer_opor_identity_ulonglong
|
||||
_cilk_c_reducer_opor_identity_unsigned
|
||||
_cilk_c_reducer_opor_identity_ushort
|
||||
_cilk_c_reducer_opor_identity_wchar_t
|
||||
_cilk_c_reducer_opor_reduce_char
|
||||
_cilk_c_reducer_opor_reduce_int
|
||||
_cilk_c_reducer_opor_reduce_long
|
||||
_cilk_c_reducer_opor_reduce_longlong
|
||||
_cilk_c_reducer_opor_reduce_schar
|
||||
_cilk_c_reducer_opor_reduce_short
|
||||
_cilk_c_reducer_opor_reduce_uchar
|
||||
_cilk_c_reducer_opor_reduce_uint
|
||||
_cilk_c_reducer_opor_reduce_ulong
|
||||
_cilk_c_reducer_opor_reduce_ulonglong
|
||||
_cilk_c_reducer_opor_reduce_unsigned
|
||||
_cilk_c_reducer_opor_reduce_ushort
|
||||
_cilk_c_reducer_opor_reduce_wchar_t
|
||||
_cilk_c_reducer_opxor_identity_char
|
||||
_cilk_c_reducer_opxor_identity_int
|
||||
_cilk_c_reducer_opxor_identity_long
|
||||
_cilk_c_reducer_opxor_identity_longlong
|
||||
_cilk_c_reducer_opxor_identity_schar
|
||||
_cilk_c_reducer_opxor_identity_short
|
||||
_cilk_c_reducer_opxor_identity_uchar
|
||||
_cilk_c_reducer_opxor_identity_uint
|
||||
_cilk_c_reducer_opxor_identity_ulong
|
||||
_cilk_c_reducer_opxor_identity_ulonglong
|
||||
_cilk_c_reducer_opxor_identity_unsigned
|
||||
_cilk_c_reducer_opxor_identity_ushort
|
||||
_cilk_c_reducer_opxor_identity_wchar_t
|
||||
_cilk_c_reducer_opxor_reduce_char
|
||||
_cilk_c_reducer_opxor_reduce_int
|
||||
_cilk_c_reducer_opxor_reduce_long
|
||||
_cilk_c_reducer_opxor_reduce_longlong
|
||||
_cilk_c_reducer_opxor_reduce_schar
|
||||
_cilk_c_reducer_opxor_reduce_short
|
||||
_cilk_c_reducer_opxor_reduce_uchar
|
||||
_cilk_c_reducer_opxor_reduce_uint
|
||||
_cilk_c_reducer_opxor_reduce_ulong
|
||||
_cilk_c_reducer_opxor_reduce_ulonglong
|
||||
_cilk_c_reducer_opxor_reduce_unsigned
|
||||
_cilk_c_reducer_opxor_reduce_ushort
|
||||
_cilk_c_reducer_opxor_reduce_wchar_t
|
167
libcilkrts/runtime/metacall_impl.c
Normal file
167
libcilkrts/runtime/metacall_impl.c
Normal file
|
@ -0,0 +1,167 @@
|
|||
/* metacall_impl.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "metacall_impl.h"
|
||||
|
||||
NOINLINE
|
||||
CILK_API_VOID
|
||||
__cilkrts_metacall(unsigned int tool, unsigned int code, void *data)
|
||||
{
|
||||
#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
// The metacall type, code and data are packed together into a single
|
||||
// struct which will be interpreted by the tool. This function is the
|
||||
// one and only use of a "cilkscreen_metacall" annotation
|
||||
metacall_data_t d = { tool, code, data };
|
||||
|
||||
// Note that Inspector uses probe mode, and is implementing the metacall
|
||||
// interface to force the runtime to run with a single worker. So
|
||||
// __cilkrts_metacall must use __notify_intrinsic instead of
|
||||
// __notify_zc_intrinsic
|
||||
__notify_intrinsic("cilkscreen_metacall", &d);
|
||||
#endif // ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
}
|
||||
|
||||
int __cilkrts_running_under_sequential_ptool(void)
|
||||
{
|
||||
static int running_under_sequential_ptool = -1;
|
||||
volatile char c = ~0;
|
||||
|
||||
// If we haven't been called before, see if we're running under Cilkscreen
|
||||
// or Cilkview
|
||||
if (-1 == running_under_sequential_ptool)
|
||||
{
|
||||
// metacall #2 writes 0 in C if we are running under
|
||||
// a p-tools that requires serial execution, and is a
|
||||
// no-op otherwise
|
||||
//
|
||||
// Note that removing the volatile is required to prevent the compiler
|
||||
// from assuming that the value has not changed
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM,
|
||||
HYPER_ZERO_IF_SEQUENTIAL_PTOOL, (void *)&c);
|
||||
|
||||
running_under_sequential_ptool = (0 == c);
|
||||
}
|
||||
|
||||
return running_under_sequential_ptool;
|
||||
}
|
||||
|
||||
/*
|
||||
* __cilkrts_cilkscreen_establish_c_stack
|
||||
*
|
||||
* Notify Cilkscreen of the extent of the stack
|
||||
*/
|
||||
|
||||
void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end)
|
||||
{
|
||||
char *limits[2] = {begin, end};
|
||||
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ESTABLISH_C_STACK, limits);
|
||||
}
|
||||
|
||||
#ifdef WORKSPAN // Workspan stuff - remove when we're sure what we can drop
|
||||
|
||||
void __cilkview_workspan_start(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_START, 0);
|
||||
}
|
||||
|
||||
void __cilkview_workspan_stop(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_STOP, 0);
|
||||
}
|
||||
|
||||
void __cilkview_workspan_dump(const char *str) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_DUMP, (void*)str);
|
||||
}
|
||||
|
||||
|
||||
void __cilkview_workspan_reset(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_RESET, 0);
|
||||
}
|
||||
|
||||
|
||||
void __cilkview_use_default_grain(void) {
|
||||
__cilkrts_metacall(HYPER_USE_DEFAULT_GRAIN, 0);
|
||||
}
|
||||
|
||||
void __cilkview_get_workspan_data(unsigned long long *values, int size)
|
||||
{
|
||||
void *data[2];
|
||||
|
||||
/* reset counters to zero in case we are not running under
|
||||
a p-tool */
|
||||
|
||||
values[0] = 0;
|
||||
|
||||
data[0] = (void*) values;
|
||||
data[1] = (void*) &size;
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_QUERY, &data);
|
||||
}
|
||||
|
||||
void __cilkview_workspan_connected (int *flag) {
|
||||
*flag = 0;
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_CONNECTED, (void *)flag);
|
||||
}
|
||||
|
||||
void __cilkview_workspan_suspend() {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_SUSPEND, 0);
|
||||
}
|
||||
|
||||
void __cilkview_workspan_resume() {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_RESUME, 0);
|
||||
}
|
||||
|
||||
/* depreciated interfaces */
|
||||
void __cilkometer_workspan_start(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_START, 0);
|
||||
}
|
||||
|
||||
void __cilkometer_workspan_stop(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_STOP, 0);
|
||||
}
|
||||
|
||||
void __cilkometer_workspan_dump(const char *str) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_DUMP, (void*)str);
|
||||
}
|
||||
|
||||
|
||||
void __cilkometer_workspan_reset(void) {
|
||||
__cilkrts_metacall(HYPER_WORKSPAN_RESET, 0);
|
||||
}
|
||||
|
||||
#endif // WORKSPAN
|
||||
|
||||
/* End metacall_impl.c */
|
123
libcilkrts/runtime/metacall_impl.h
Normal file
123
libcilkrts/runtime/metacall_impl.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
/* metacall_impl.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2010-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file metacall_impl.h
|
||||
*
|
||||
* @brief Meta-function calls to be used within the Cilk runtime system.
|
||||
*
|
||||
* These differ from the macros in cilkscreen.h and cilkview.h because they go
|
||||
* through the __cilkrts_metacall interface, which ensures that the operation
|
||||
* is performed even when instrumentation is disabled.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_CILKRTS_METACALL_H
|
||||
#define INCLUDED_CILKRTS_METACALL_H
|
||||
|
||||
#include "rts-common.h"
|
||||
#include <internal/metacall.h>
|
||||
#include <cilk/common.h>
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* This function is effectively an unconditional call from the runtime into
|
||||
* a tool. It is used for operations that must be performed by the tool,
|
||||
* even when the tool is not instrumenting. For example, Cilkscreen always
|
||||
* recognizes the address of this function and performs the action specified
|
||||
* in the contained metadata.
|
||||
*
|
||||
* Note that this function MUST NOT BE INLINED within the runtime. This must
|
||||
* be the ONLY instance of the cilkscreen_metacall metadata.
|
||||
*/
|
||||
CILK_API_VOID
|
||||
__cilkrts_metacall(unsigned int tool, unsigned int code, void *data);
|
||||
|
||||
/**
|
||||
* Return non-zero if running under Cilkscreen or Cilkview
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
int __cilkrts_running_under_sequential_ptool(void);
|
||||
|
||||
/**
|
||||
* Disable Cilkscreen implementation
|
||||
*/
|
||||
#define __cilkrts_cilkscreen_disable_instrumentation() \
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_DISABLE_INSTRUMENTATION, 0)
|
||||
|
||||
/**
|
||||
* Enable Cilkscreen implementation
|
||||
*/
|
||||
#define __cilkrts_cilkscreen_enable_instrumentation() \
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ENABLE_INSTRUMENTATION, 0)
|
||||
|
||||
/**
|
||||
* Set the worker on entering runtime.
|
||||
*
|
||||
* @attention Deprecated in favor of __cilkrts_cilkscreen_ignore_block. The
|
||||
* begin/enter pairs in the current metadata mean Cilkscreen no longer has to
|
||||
* have improper knowledge of the __cilkrts_worker or __cilkrts_stack_frame
|
||||
* structures.
|
||||
*/
|
||||
#define __cilkrts_cilkscreen_establish_worker(w) \
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ESTABLISH_WORKER, w)
|
||||
|
||||
/**
|
||||
* Notify Cilkscreen of the extent of the stack.
|
||||
*
|
||||
* @param[in] begin Start (low address) of stack
|
||||
* @param[in] end One past high address of stack
|
||||
*/
|
||||
void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end);
|
||||
|
||||
/**
|
||||
* Tell tools to ignore a block of memory - currently the global state and
|
||||
* memory allocated for workers.
|
||||
*/
|
||||
#define __cilkrts_cilkscreen_ignore_block(_begin, _end) \
|
||||
{ \
|
||||
void *block[2] = {_begin, _end}; \
|
||||
__cilkrts_metacall(METACALL_TOOL_SYSTEM, \
|
||||
HYPER_IGNORE_MEMORY_BLOCK, \
|
||||
block); \
|
||||
}
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif /* ! defined(INCLUDED_CILKRTS_METACALL_H) */
|
508
libcilkrts/runtime/os-unix.c
Normal file
508
libcilkrts/runtime/os-unix.c
Normal file
|
@ -0,0 +1,508 @@
|
|||
/* os-unix.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifdef __linux__
|
||||
// define _GNU_SOURCE before *any* #include.
|
||||
// Even <stdint.h> will break later #includes if this macro is not
|
||||
// already defined when it is #included.
|
||||
# define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include "os.h"
|
||||
#include "bug.h"
|
||||
#include "cilk_malloc.h"
|
||||
#include <internal/abi.h>
|
||||
|
||||
#if defined __linux__
|
||||
# include <sys/sysinfo.h>
|
||||
# include <sys/syscall.h>
|
||||
#elif defined __APPLE__
|
||||
# include <sys/sysctl.h>
|
||||
// Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
|
||||
#elif defined __FreeBSD__
|
||||
// No additional include files
|
||||
#elif defined __CYGWIN__
|
||||
// Cygwin on Windows - no additional include files
|
||||
#elif defined __VXWORKS__
|
||||
# include <vxWorks.h>
|
||||
# include <vxCpuLib.h>
|
||||
# include <taskLib.h>
|
||||
#else
|
||||
# error "Unsupported OS"
|
||||
#endif
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
|
||||
|
||||
// /* Thread-local storage */
|
||||
// #ifdef _WIN32
|
||||
// typedef unsigned cilkos_tls_key_t;
|
||||
// #else
|
||||
// typedef pthread_key_t cilkos_tls_key_t;
|
||||
// #endif
|
||||
// cilkos_tls_key_t cilkos_allocate_tls_key();
|
||||
// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
|
||||
// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
|
||||
|
||||
#if !defined CILK_WORKER_TLS
|
||||
static int cilk_keys_defined;
|
||||
static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER > 0
|
||||
static pthread_key_t fiber_key;
|
||||
#endif
|
||||
|
||||
static void *serial_worker;
|
||||
|
||||
|
||||
// This destructor is called when a pthread dies to deallocate the
|
||||
// pedigree node.
|
||||
static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
|
||||
{
|
||||
__cilkrts_pedigree* pedigree_tls
|
||||
= (__cilkrts_pedigree*)pedigree_tls_ptr;
|
||||
if (pedigree_tls) {
|
||||
// Assert that we have either one or two nodes
|
||||
// left in the pedigree chain.
|
||||
// If we have more, then something is going wrong...
|
||||
CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
|
||||
__cilkrts_free(pedigree_tls);
|
||||
}
|
||||
}
|
||||
|
||||
void __cilkrts_init_tls_variables(void)
|
||||
{
|
||||
int status;
|
||||
/* This will be called once in serial execution before any
|
||||
Cilk parallelism so we do not need to worry about races
|
||||
on cilk_keys_defined. */
|
||||
if (cilk_keys_defined)
|
||||
return;
|
||||
status = pthread_key_create(&worker_key, NULL);
|
||||
CILK_ASSERT (status == 0);
|
||||
status = pthread_key_create(&pedigree_leaf_key,
|
||||
__cilkrts_pedigree_leaf_destructor);
|
||||
CILK_ASSERT (status == 0);
|
||||
status = pthread_key_create(&tbb_interop_key, NULL);
|
||||
CILK_ASSERT (status == 0);
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER > 0
|
||||
status = pthread_key_create(&fiber_key, NULL);
|
||||
CILK_ASSERT (status == 0);
|
||||
#endif
|
||||
cilk_keys_defined = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
COMMON_SYSDEP
|
||||
void* cilkos_get_current_thread_id(void)
|
||||
{
|
||||
return (void*)pthread_self();
|
||||
}
|
||||
|
||||
|
||||
CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1))
|
||||
return (__cilkrts_worker *)pthread_getspecific(worker_key);
|
||||
else
|
||||
return serial_worker;
|
||||
|
||||
}
|
||||
|
||||
CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
|
||||
{
|
||||
return (__cilkrts_worker *)pthread_getspecific(worker_key);
|
||||
}
|
||||
|
||||
COMMON_SYSDEP
|
||||
__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1))
|
||||
return (__cilk_tbb_stack_op_thunk *)
|
||||
pthread_getspecific(tbb_interop_key);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This counter should be updated atomically.
|
||||
static int __cilkrts_global_pedigree_tls_counter = -1;
|
||||
|
||||
COMMON_SYSDEP
|
||||
__cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
|
||||
{
|
||||
__cilkrts_pedigree *pedigree_tls;
|
||||
if (__builtin_expect(cilk_keys_defined, 1)) {
|
||||
pedigree_tls =
|
||||
(struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!pedigree_tls && create_new) {
|
||||
// This call creates two nodes, X and Y.
|
||||
// X == pedigree_tls[0] is the leaf node, which gets copied
|
||||
// in and out of a user worker w when w binds and unbinds.
|
||||
// Y == pedigree_tls[1] is the root node,
|
||||
// which is a constant node that represents the user worker
|
||||
// thread w.
|
||||
pedigree_tls = (__cilkrts_pedigree*)
|
||||
__cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
|
||||
|
||||
// This call sets the TLS pointer to the new node.
|
||||
__cilkrts_set_tls_pedigree_leaf(pedigree_tls);
|
||||
|
||||
pedigree_tls[0].rank = 0;
|
||||
pedigree_tls[0].parent = &pedigree_tls[1];
|
||||
|
||||
// Create Y, whose rank begins as the global counter value.
|
||||
pedigree_tls[1].rank =
|
||||
__sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
|
||||
|
||||
pedigree_tls[1].parent = NULL;
|
||||
CILK_ASSERT(pedigree_tls[1].rank != -1);
|
||||
}
|
||||
return pedigree_tls;
|
||||
}
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER > 0
|
||||
COMMON_SYSDEP
|
||||
cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1))
|
||||
return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_worker(__cilkrts_worker *w)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1)) {
|
||||
int status;
|
||||
status = pthread_setspecific(worker_key, w);
|
||||
CILK_ASSERT (status == 0);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
serial_worker = w;
|
||||
}
|
||||
}
|
||||
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1)) {
|
||||
int status;
|
||||
status = pthread_setspecific(tbb_interop_key, t);
|
||||
CILK_ASSERT (status == 0);
|
||||
return;
|
||||
}
|
||||
abort();
|
||||
}
|
||||
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1)) {
|
||||
int status;
|
||||
status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
|
||||
CILK_ASSERT (status == 0);
|
||||
return;
|
||||
}
|
||||
abort();
|
||||
}
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER > 0
|
||||
COMMON_SYSDEP
|
||||
void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
|
||||
{
|
||||
if (__builtin_expect(cilk_keys_defined, 1)) {
|
||||
int status;
|
||||
status = pthread_setspecific(fiber_key, fiber);
|
||||
CILK_ASSERT (status == 0);
|
||||
return;
|
||||
}
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
void __cilkrts_init_tls_variables(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (__linux__) && ! defined(ANDROID)
|
||||
/*
|
||||
* Get the thread id, rather than the pid. In the case of MIC offload, it's
|
||||
* possible that we have multiple threads entering Cilk, and each has a
|
||||
* different affinity.
|
||||
*/
|
||||
static pid_t linux_gettid(void)
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
}
|
||||
|
||||
/*
|
||||
* On Linux we look at the thread affinity mask and restrict ourself to one
|
||||
* thread for each of the hardware contexts to which we are bound.
|
||||
* Therefore if user does
|
||||
* % taskset 0-1 cilkProgram
|
||||
* # restrict execution to hardware contexts zero and one
|
||||
* the Cilk program will only use two threads even if it is running on a
|
||||
* machine that has 32 hardware contexts.
|
||||
* This is the right thing to do, because the threads are restricted to two
|
||||
* hardware contexts by the affinity mask set by taskset, and if we were to
|
||||
* create extra threads they would simply oversubscribe the hardware resources
|
||||
* we can use.
|
||||
* This is particularly important on MIC in offload mode, where the affinity
|
||||
* mask is set by the offload library to force the offload code away from
|
||||
* cores that have offload support threads running on them.
|
||||
*/
|
||||
static int linux_get_affinity_count (int tid)
|
||||
{
|
||||
cpu_set_t process_mask;
|
||||
|
||||
// Extract the thread affinity mask
|
||||
int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
|
||||
|
||||
if (0 != err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We have extracted the mask OK, so now we can count the number of threads
|
||||
// in it. This is linear in the maximum number of CPUs available, We
|
||||
// could do a logarithmic version, if we assume the format of the mask,
|
||||
// but it's not really worth it. We only call this at thread startup
|
||||
// anyway.
|
||||
int available_procs = 0;
|
||||
int i;
|
||||
for (i = 0; i < CPU_SETSIZE; i++)
|
||||
{
|
||||
if (CPU_ISSET(i, &process_mask))
|
||||
{
|
||||
available_procs++;
|
||||
}
|
||||
}
|
||||
|
||||
return available_procs;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __cilkrts_hardware_cpu_count
|
||||
*
|
||||
* Returns the number of available CPUs on this hardware. This is architecture-
|
||||
* specific.
|
||||
*/
|
||||
|
||||
COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
|
||||
{
|
||||
#if defined ANDROID
|
||||
return sysconf (_SC_NPROCESSORS_ONLN);
|
||||
#elif defined __MIC__
|
||||
/// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
|
||||
/// on KNC. Also, ignore the last core.
|
||||
int P = sysconf (_SC_NPROCESSORS_ONLN);
|
||||
return P/2 - 2;
|
||||
#elif defined __linux__
|
||||
int affinity_count = linux_get_affinity_count(linux_gettid());
|
||||
|
||||
return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
|
||||
#elif defined __APPLE__
|
||||
int count = 0;
|
||||
int cmd[2] = { CTL_HW, HW_NCPU };
|
||||
size_t len = sizeof count;
|
||||
int status = sysctl(cmd, 2, &count, &len, 0, 0);
|
||||
assert(status >= 0);
|
||||
assert((unsigned)count == count);
|
||||
|
||||
return count;
|
||||
#elif defined __FreeBSD__ || defined __CYGWIN__
|
||||
int ncores = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
return ncores;
|
||||
// Just get the number of processors
|
||||
// return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined __VXWORKS__
|
||||
return __builtin_popcount( vxCpuEnabledGet() );
|
||||
#else
|
||||
#error "Unknown architecture"
|
||||
#endif
|
||||
}
|
||||
|
||||
COMMON_SYSDEP void __cilkrts_sleep(void)
|
||||
{
|
||||
#ifdef __VXWORKS__
|
||||
taskDelay(1);
|
||||
#else
|
||||
usleep(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
COMMON_SYSDEP void __cilkrts_yield(void)
|
||||
{
|
||||
#if __APPLE__ || __FreeBSD__ || __VXWORKS__
|
||||
// On MacOS, call sched_yield to yield quantum. I'm not sure why we
|
||||
// don't do this on Linux also.
|
||||
sched_yield();
|
||||
#elif defined(__MIC__)
|
||||
// On MIC, pthread_yield() really trashes things. Arch's measurements
|
||||
// showed that calling _mm_delay_32() (or doing nothing) was a better
|
||||
// option. Delaying 1024 clock cycles is a reasonable compromise between
|
||||
// giving up the processor and latency starting up when work becomes
|
||||
// available
|
||||
_mm_delay_32(1024);
|
||||
#elif defined(ANDROID)
|
||||
// On Android, call sched_yield to yield quantum. I'm not sure why we
|
||||
// don't do this on Linux also.
|
||||
sched_yield();
|
||||
#else
|
||||
// On Linux, call pthread_yield (which in turn will call sched_yield)
|
||||
// to yield quantum.
|
||||
pthread_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
|
||||
const char* varname)
|
||||
{
|
||||
CILK_ASSERT(value);
|
||||
CILK_ASSERT(varname);
|
||||
|
||||
const char* envstr = getenv(varname);
|
||||
if (envstr)
|
||||
{
|
||||
size_t len = strlen(envstr);
|
||||
if (len > vallen - 1)
|
||||
return len + 1;
|
||||
|
||||
strcpy(value, envstr);
|
||||
return len;
|
||||
}
|
||||
else
|
||||
{
|
||||
value[0] = '\0';
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unrecoverable error: Print an error message and abort execution.
|
||||
*/
|
||||
COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
|
||||
{
|
||||
va_list l;
|
||||
fflush(NULL);
|
||||
fprintf(stderr, "Cilk error: ");
|
||||
va_start(l, fmt);
|
||||
vfprintf(stderr, fmt, l);
|
||||
va_end(l);
|
||||
fprintf(stderr, "Exiting.\n");
|
||||
fflush(stderr);
|
||||
|
||||
abort();
|
||||
}
|
||||
|
||||
/*
|
||||
* Print a warning message and return.
|
||||
*/
|
||||
COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
|
||||
{
|
||||
va_list l;
|
||||
fflush(NULL);
|
||||
fprintf(stderr, "Cilk warning: ");
|
||||
va_start(l, fmt);
|
||||
vfprintf(stderr, fmt, l);
|
||||
va_end(l);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) init_once()
|
||||
{
|
||||
/*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
|
||||
__cilkrts_init_tls_variables();
|
||||
}
|
||||
|
||||
|
||||
#define PAGE 4096
|
||||
#define CILK_MIN_STACK_SIZE (4*PAGE)
|
||||
// Default size for the stacks that we create in Cilk for Unix.
|
||||
#define CILK_DEFAULT_STACK_SIZE 0x100000
|
||||
|
||||
/*
|
||||
* Convert the user's specified stack size into a "reasonable" value
|
||||
* for this OS.
|
||||
*/
|
||||
size_t cilkos_validate_stack_size(size_t specified_stack_size) {
|
||||
// Convert any negative value to the default.
|
||||
if (specified_stack_size == 0) {
|
||||
CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
|
||||
return CILK_DEFAULT_STACK_SIZE;
|
||||
}
|
||||
// Round values in between 0 and CILK_MIN_STACK_SIZE up to
|
||||
// CILK_MIN_STACK_SIZE.
|
||||
if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
|
||||
return CILK_MIN_STACK_SIZE;
|
||||
}
|
||||
if ((specified_stack_size % PAGE) > 0) {
|
||||
// Round the user's stack size value up to nearest page boundary.
|
||||
return (PAGE * (1 + specified_stack_size / PAGE));
|
||||
}
|
||||
return specified_stack_size;
|
||||
}
|
||||
|
||||
long cilkos_atomic_add(volatile long* p, long x)
|
||||
{
|
||||
return __sync_add_and_fetch(p, x);
|
||||
}
|
||||
|
||||
/* End os-unix.c */
|
236
libcilkrts/runtime/os.h
Normal file
236
libcilkrts/runtime/os.h
Normal file
|
@ -0,0 +1,236 @@
|
|||
/* os.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file os.h
|
||||
*
|
||||
* @brief Low-level operating-system dependent facilities, not dependent on
|
||||
* any Cilk facilities.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_OS_DOT_H
|
||||
#define INCLUDED_OS_DOT_H
|
||||
|
||||
#include "rts-common.h"
|
||||
#include "cilk/common.h"
|
||||
#include "cilk-tbb-interop.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <cstddef>
|
||||
#else
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
|
||||
// /* Thread-local storage */
|
||||
// #ifdef _WIN32
|
||||
// typedef unsigned cilkos_tls_key_t;
|
||||
// #else
|
||||
// typedef pthread_key_t cilkos_tls_key_t;
|
||||
// #endif
|
||||
// cilkos_tls_key_t cilkos_allocate_tls_key();
|
||||
// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
|
||||
// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
|
||||
|
||||
/* The RTS assumes that some thread-local state exists that stores the
|
||||
worker and reducer map currently associated with a thread. These routines
|
||||
manipulate this state. */
|
||||
|
||||
/** @brief Thread-local state for cilk fibers. */
|
||||
typedef struct cilk_fiber_sysdep cilk_fiber_sysdep;
|
||||
|
||||
/** @brief Initialize all TLS variables for Cilk. */
|
||||
COMMON_SYSDEP void __cilkrts_init_tls_variables(void);
|
||||
|
||||
/** @brief Set worker struct in TLS. */
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_worker(__cilkrts_worker *w) cilk_nothrow;
|
||||
|
||||
/** @brief Get stack_op for TBB-interop structures from TLS. */
|
||||
COMMON_SYSDEP
|
||||
__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void);
|
||||
|
||||
/** @brief Set stack_op for TBB-interop structures in TLS. */
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t);
|
||||
|
||||
/**
|
||||
* @brief Get the pointer to the pedigree leaf node from TLS.
|
||||
*
|
||||
* Function to get a pointer to the thread's pedigree leaf node. This
|
||||
* pointer can be NULL.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
__cilkrts_pedigree * __cilkrts_get_tls_pedigree_leaf(int create_new);
|
||||
|
||||
/**
|
||||
* @brief Sets the pointer to the pedigree leaf node in TLS.
|
||||
*
|
||||
* If the previous pointer value was not NULL, it is the caller's
|
||||
* responsibility to ensure that previous pointer value is saved and
|
||||
* freed.
|
||||
*
|
||||
* @param pedigree_leaf The leaf node to store into TLS.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf);
|
||||
|
||||
|
||||
#if SUPPORT_GET_CURRENT_FIBER > 0
|
||||
/**
|
||||
* @brief Get the cilk_fiber from TLS.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void);
|
||||
|
||||
/**
|
||||
* @brief Set the cilk_fiber in TLS.
|
||||
*
|
||||
* @param fiber The fiber to store into TLS.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Function for returning the current thread id.
|
||||
* @warning This function is useful for debugging purposes only.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void* cilkos_get_current_thread_id(void);
|
||||
|
||||
/** @brief Return number of CPUs supported by this hardware, using whatever definition
|
||||
of CPU is considered appropriate. */
|
||||
COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void);
|
||||
|
||||
/** @brief Get current value of timer */
|
||||
COMMON_SYSDEP unsigned long long __cilkrts_getticks(void);
|
||||
|
||||
/* Machine instructions */
|
||||
|
||||
/// Stall execution for a few cycles.
|
||||
COMMON_SYSDEP void __cilkrts_short_pause(void);
|
||||
/// Wrapper for xchg instruction
|
||||
COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x);
|
||||
|
||||
// Defines __cilkrts_fence - A macro for x86, a function call for other
|
||||
// architectures
|
||||
#include "os-fence.h"
|
||||
|
||||
COMMON_SYSDEP void __cilkrts_sleep(void); ///< Sleep briefly
|
||||
COMMON_SYSDEP void __cilkrts_yield(void); ///< Yield quantum
|
||||
|
||||
/**
|
||||
* @brief Gets environment variable 'varname' and copy its value into 'value'.
|
||||
*
|
||||
* If the entire value, including the null terminator fits into 'vallen'
|
||||
* bytes, then returns the length of the value excluding the null. Otherwise,
|
||||
* leaves the contents of 'value' undefined and returns the number of
|
||||
* characters needed to store the environment variable's value, *including*
|
||||
* the null terminator.
|
||||
*
|
||||
* @param value Buffer to store value.
|
||||
* @param vallen Length of value buffer
|
||||
* @param varname Name of the environment variable.
|
||||
* @return Length of value buffer (excluding the null).
|
||||
*/
|
||||
COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
|
||||
const char* varname);
|
||||
|
||||
/**
|
||||
* @brief Unrecoverable error: Print an error message and abort execution.
|
||||
*/
|
||||
COMMON_SYSDEP void cilkos_error(const char *fmt, ...);
|
||||
|
||||
/**
|
||||
* @brief Print a warning message and return.
|
||||
*/
|
||||
COMMON_SYSDEP void cilkos_warning(const char *fmt, ...);
|
||||
|
||||
/**
|
||||
* @brief Convert the user's specified stack size into a "reasonable"
|
||||
* value for the current OS.
|
||||
*
|
||||
* @param specified_stack_size User-specified stack size.
|
||||
* @return New stack size value, modified for the OS.
|
||||
*/
|
||||
COMMON_SYSDEP size_t cilkos_validate_stack_size(size_t specified_stack_size);
|
||||
|
||||
/**
|
||||
* @brief Atomic addition: computes *p += x.
|
||||
*
|
||||
* @param p Pointer to value to update
|
||||
* @param x Value of x.
|
||||
*/
|
||||
COMMON_SYSDEP long cilkos_atomic_add(volatile long* p, long x);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
/**
|
||||
* @brief Windows-only low-level functions for processor groups.
|
||||
*/
|
||||
typedef struct _GROUP_AFFINITY GROUP_AFFINITY;
|
||||
|
||||
/**
|
||||
* @brief Probe the executing OS to see if it supports processor
|
||||
* groups. These functions are expected to be available in Windows 7
|
||||
* or later.
|
||||
*/
|
||||
void win_init_processor_groups(void);
|
||||
|
||||
unsigned long win_get_active_processor_count(unsigned short GroupNumber);
|
||||
unsigned short win_get_active_processor_group_count(void);
|
||||
int win_set_thread_group_affinity(/*HANDLE*/ void* hThread,
|
||||
const GROUP_AFFINITY *GroupAffinity,
|
||||
GROUP_AFFINITY* PreviousGroupAffinity);
|
||||
|
||||
/**
|
||||
* @brief Cleans up any state allocated in TLS.
|
||||
*
|
||||
* Only defined for Windows because Linux calls destructors for each
|
||||
* thread-local variable.
|
||||
*/
|
||||
void __cilkrts_per_thread_tls_cleanup(void);
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_OS_DOT_H)
|
193
libcilkrts/runtime/os_mutex-unix.c
Normal file
193
libcilkrts/runtime/os_mutex-unix.c
Normal file
|
@ -0,0 +1,193 @@
|
|||
/* os_mutex-unix.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#include "os_mutex.h"
|
||||
#include "bug.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
|
||||
// contains notification macros for VTune.
|
||||
#include "cilk-ittnotify.h"
|
||||
|
||||
/*
|
||||
* OS Mutex functions.
|
||||
*
|
||||
* Not to be confused with the spinlock mutexes implemented in cilk_mutex.c
|
||||
*/
|
||||
|
||||
struct os_mutex {
|
||||
pthread_mutex_t mutex; ///< On Linux, os_mutex is implemented with a pthreads mutex
|
||||
};
|
||||
|
||||
// Unix implementation of the global OS mutex. This will be created by the
|
||||
// first call to global_os_mutex_lock() and *NEVER* destroyed. On gcc-based
|
||||
// systems there's no way to guarantee the ordering of constructors and
|
||||
// destructors, so we can't be guaranteed that our destructor for a static
|
||||
// object will be called *after* any static destructors that may use Cilk
|
||||
// in the user's application
|
||||
static struct os_mutex *global_os_mutex = NULL;
|
||||
|
||||
/* Sometimes during shared library load malloc doesn't work.
|
||||
To handle that case, preallocate space for one mutex. */
|
||||
static struct os_mutex static_mutex;
|
||||
static int static_mutex_used;
|
||||
|
||||
struct os_mutex *__cilkrts_os_mutex_create(void)
|
||||
{
|
||||
int status;
|
||||
struct os_mutex *mutex = (struct os_mutex *)malloc(sizeof(struct os_mutex));
|
||||
pthread_mutexattr_t attr;
|
||||
|
||||
ITT_SYNC_CREATE(mutex, "OS Mutex");
|
||||
|
||||
if (!mutex) {
|
||||
if (static_mutex_used) {
|
||||
__cilkrts_bug("Cilk RTS library initialization failed");
|
||||
} else {
|
||||
static_mutex_used = 1;
|
||||
mutex = &static_mutex;
|
||||
}
|
||||
}
|
||||
|
||||
status = pthread_mutexattr_init(&attr);
|
||||
CILK_ASSERT (status == 0);
|
||||
#if defined DEBUG || CILK_LIB_DEBUG
|
||||
#ifdef PTHREAD_MUTEX_ERRORCHECK
|
||||
status = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
|
||||
#else
|
||||
status = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK_NP);
|
||||
#endif
|
||||
CILK_ASSERT (status == 0);
|
||||
#endif
|
||||
status = pthread_mutex_init (&mutex->mutex, &attr);
|
||||
CILK_ASSERT (status == 0);
|
||||
pthread_mutexattr_destroy(&attr);
|
||||
|
||||
return mutex;
|
||||
}
|
||||
|
||||
void __cilkrts_os_mutex_lock(struct os_mutex *p)
|
||||
{
|
||||
int status;
|
||||
status = pthread_mutex_lock (&p->mutex);
|
||||
ITT_SYNC_ACQUIRED(p);
|
||||
if (__builtin_expect(status, 0) == 0)
|
||||
return;
|
||||
if (status == EDEADLK)
|
||||
__cilkrts_bug("Cilk runtime error: deadlock acquiring mutex %p\n",
|
||||
p);
|
||||
else
|
||||
__cilkrts_bug("Cilk runtime error %d acquiring mutex %p\n",
|
||||
status, p);
|
||||
}
|
||||
|
||||
int __cilkrts_os_mutex_trylock(struct os_mutex *p)
|
||||
{
|
||||
int status;
|
||||
status = pthread_mutex_trylock (&p->mutex);
|
||||
return (status == 0);
|
||||
}
|
||||
|
||||
void __cilkrts_os_mutex_unlock(struct os_mutex *p)
|
||||
{
|
||||
int status;
|
||||
ITT_SYNC_RELEASING(p);
|
||||
status = pthread_mutex_unlock (&p->mutex);
|
||||
CILK_ASSERT(status == 0);
|
||||
}
|
||||
|
||||
void __cilkrts_os_mutex_destroy(struct os_mutex *p)
|
||||
{
|
||||
pthread_mutex_destroy (&p->mutex);
|
||||
if (p == &static_mutex) {
|
||||
static_mutex_used = 0;
|
||||
} else {
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* create_global_os_mutex
|
||||
*
|
||||
* Function used with pthread_once to initialize the global OS mutex. Since
|
||||
* pthread_once requires a function which takes no parameters and has no
|
||||
* return value, the global OS mutex will be stored in the static (global
|
||||
* to the compilation unit) variable "global_os_mutex."
|
||||
*
|
||||
*
|
||||
* global_os_mutex will never be destroyed.
|
||||
*/
|
||||
static void create_global_os_mutex(void)
|
||||
{
|
||||
CILK_ASSERT(NULL == global_os_mutex);
|
||||
global_os_mutex = __cilkrts_os_mutex_create();
|
||||
}
|
||||
|
||||
void global_os_mutex_lock(void)
|
||||
{
|
||||
// pthread_once_t used with pthread_once to guarantee that
|
||||
// create_global_os_mutex() is only called once
|
||||
static pthread_once_t global_os_mutex_is_initialized = PTHREAD_ONCE_INIT;
|
||||
|
||||
// Execute create_global_os_mutex once in a thread-safe manner
|
||||
// Note that create_global_os_mutex returns the mutex in the static
|
||||
// (global to the module) variable "global_os_mutex"
|
||||
pthread_once(&global_os_mutex_is_initialized,
|
||||
create_global_os_mutex);
|
||||
|
||||
// We'd better have allocated a global_os_mutex
|
||||
CILK_ASSERT(NULL != global_os_mutex);
|
||||
|
||||
// Acquire the global OS mutex
|
||||
__cilkrts_os_mutex_lock(global_os_mutex);
|
||||
}
|
||||
|
||||
void global_os_mutex_unlock(void)
|
||||
{
|
||||
// We'd better have allocated a global_os_mutex. This means you should
|
||||
// have called global_os_mutex_lock() before calling
|
||||
// global_os_mutex_unlock(), but this is the only check for it.
|
||||
CILK_ASSERT(NULL != global_os_mutex);
|
||||
|
||||
// Release the global OS mutex
|
||||
__cilkrts_os_mutex_unlock(global_os_mutex);
|
||||
}
|
||||
|
||||
/* End os_mutex-unix.c */
|
135
libcilkrts/runtime/os_mutex.h
Normal file
135
libcilkrts/runtime/os_mutex.h
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* os_mutex.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file os_mutex.h
|
||||
*
|
||||
* @brief Portable interface to operating-system mutexes.
|
||||
*
|
||||
* Do not confuse os_mutex with Cilk runtime-specific spinlock mutexes.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_OS_MUTEX_DOT_H
|
||||
#define INCLUDED_OS_MUTEX_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include "rts-common.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/// Opaque type
|
||||
typedef struct os_mutex os_mutex;
|
||||
|
||||
/**
|
||||
* Allocate and initialize an os_mutex
|
||||
*
|
||||
* @return A pointer to the initialized os_mutex
|
||||
*/
|
||||
COMMON_SYSDEP os_mutex* __cilkrts_os_mutex_create(void);
|
||||
|
||||
/**
|
||||
* Acquire the os_mutex for exclusive use
|
||||
*
|
||||
* @param m The os_mutex that is to be acquired.
|
||||
*/
|
||||
COMMON_SYSDEP void __cilkrts_os_mutex_lock(os_mutex *m);
|
||||
|
||||
/**
|
||||
* Try to acquire the os_mutex.
|
||||
*
|
||||
* @param m The os_mutex to try to acquire
|
||||
* @return 0 if the lock acquire failed
|
||||
* @return nonzero if the lock was acquired
|
||||
*/
|
||||
COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m);
|
||||
|
||||
/**
|
||||
* Release the os_mutex
|
||||
*
|
||||
* @param m The os_mutex that is to be released.
|
||||
*/
|
||||
COMMON_SYSDEP void __cilkrts_os_mutex_unlock(os_mutex *m);
|
||||
|
||||
/**
|
||||
* Release any resources and deallocate the os_mutex.
|
||||
*
|
||||
* @param m The os_mutex that is to be deallocated.
|
||||
*/
|
||||
COMMON_SYSDEP void __cilkrts_os_mutex_destroy(os_mutex *m);
|
||||
|
||||
/**
|
||||
* Acquire the global os_mutex for exclusive use. The global os_mutex
|
||||
* will be initialized the first time this function is called in a
|
||||
* thread-safe manner.
|
||||
*/
|
||||
COMMON_SYSDEP void global_os_mutex_lock();
|
||||
|
||||
/**
|
||||
* Release the global os_mutex. global_os_mutex_lock() must have been
|
||||
* called first.
|
||||
*/
|
||||
COMMON_SYSDEP void global_os_mutex_unlock();
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
/**
|
||||
* @brief Create the global OS mutex - Windows only.
|
||||
*
|
||||
* On Windows we use DllMain() to create the global OS mutex when cilkrts20.dll
|
||||
* is loaded. As opposed to Linux/MacOS where we use pthread_once to implement
|
||||
* a singleton since there are no guarantees about constructor or destructor
|
||||
* ordering between shared objects.
|
||||
*/
|
||||
NON_COMMON void global_os_mutex_create();
|
||||
|
||||
/**
|
||||
* @brief Destroy the global OS mutex - Windows only
|
||||
*
|
||||
* On Windows we use DllMain() to destroy the global OS mutex when
|
||||
* cilkrts20.dll is unloaded. As opposed to Linux/MacOS where we cannot
|
||||
* know when it's safe to destroy the global OS mutex since there are no
|
||||
* guarantees about constructor or destructor ordering.
|
||||
*/
|
||||
NON_COMMON void global_os_mutex_destroy();
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_OS_MUTEX_DOT_H)
|
112
libcilkrts/runtime/pedigrees.c
Normal file
112
libcilkrts/runtime/pedigrees.c
Normal file
|
@ -0,0 +1,112 @@
|
|||
/* pedigrees.c -*-C-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2007-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "pedigrees.h"
|
||||
#include "local_state.h"
|
||||
|
||||
/*************************************************************
|
||||
Pedigree API code.
|
||||
*************************************************************/
|
||||
|
||||
/*
|
||||
* C99 requires that every inline function with external linkage have one
|
||||
* extern declaration in the program (with the inline definition in scope).
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
extern void update_pedigree_on_leave_frame(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf);
|
||||
|
||||
void __cilkrts_set_pedigree_leaf(__cilkrts_pedigree *leaf)
|
||||
{
|
||||
__cilkrts_set_tls_pedigree_leaf(leaf);
|
||||
}
|
||||
|
||||
void load_pedigree_leaf_into_user_worker(__cilkrts_worker *w)
|
||||
{
|
||||
__cilkrts_pedigree *pedigree_leaf;
|
||||
CILK_ASSERT(w->l->type == WORKER_USER);
|
||||
pedigree_leaf = __cilkrts_get_tls_pedigree_leaf(1);
|
||||
w->pedigree = *pedigree_leaf;
|
||||
|
||||
// Save a pointer to the old leaf.
|
||||
// We'll need to restore it later.
|
||||
CILK_ASSERT(w->l->original_pedigree_leaf == NULL);
|
||||
w->l->original_pedigree_leaf = pedigree_leaf;
|
||||
|
||||
__cilkrts_set_tls_pedigree_leaf(&w->pedigree);
|
||||
|
||||
// Check that this new pedigree root has at least two values.
|
||||
CILK_ASSERT(w->pedigree.parent);
|
||||
CILK_ASSERT(w->pedigree.parent->parent == NULL);
|
||||
}
|
||||
|
||||
void save_pedigree_leaf_from_user_worker(__cilkrts_worker *w)
|
||||
{
|
||||
CILK_ASSERT(w->l->type == WORKER_USER);
|
||||
|
||||
// Existing leaf in tls should be for the current worker.
|
||||
// This assert is expensive to check though.
|
||||
// CILK_ASSERT(&w->pedigree == __cilkrts_get_tls_pedigree_leaf(0));
|
||||
CILK_ASSERT(w->l->original_pedigree_leaf);
|
||||
|
||||
// w should finish with a pedigree node that points to
|
||||
// the same root that we just looked up.
|
||||
|
||||
// TODO: This assert should be valid.
|
||||
// But we are removing it now to make exceptions (without pedigrees) work.
|
||||
// Currently, reading the pedigree after an exception is caught
|
||||
// fails because the pedigree chain not restored correctly.
|
||||
// CILK_ASSERT(w->l->original_pedigree_leaf->next == w->pedigree.parent);
|
||||
w->l->original_pedigree_leaf->rank = w->pedigree.rank;
|
||||
|
||||
// Save that leaf pointer back into tls.
|
||||
__cilkrts_set_tls_pedigree_leaf(w->l->original_pedigree_leaf);
|
||||
// Null out worker's leaf for paranoia.
|
||||
w->l->original_pedigree_leaf = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Local Variables: **
|
||||
c-file-style:"bsd" **
|
||||
c-basic-offset:4 **
|
||||
indent-tabs-mode:nil **
|
||||
End: **
|
||||
*/
|
130
libcilkrts/runtime/pedigrees.h
Normal file
130
libcilkrts/runtime/pedigrees.h
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* pedigrees.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_PEDIGREES_DOT_H
|
||||
#define INCLUDED_PEDIGREES_DOT_H
|
||||
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <internal/abi.h>
|
||||
|
||||
#include "rts-common.h"
|
||||
#include "global_state.h"
|
||||
#include "os.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* @file pedigrees.h
|
||||
*
|
||||
* @brief pedigrees.h declares common routines related to pedigrees
|
||||
* and the pedigree API.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sets the leaf pedigree node for the current user thread.
|
||||
*
|
||||
* A typical implementation stores this pedigree node in thread-local
|
||||
* storage.
|
||||
*
|
||||
* Preconditions:
|
||||
* - Current thread should be a user thread.
|
||||
*
|
||||
* @param leaf The pedigree node to store as a leaf.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void __cilkrts_set_pedigree_leaf(__cilkrts_pedigree* leaf);
|
||||
|
||||
|
||||
/**
|
||||
* Load the pedigree leaf node from thread-local storage into the
|
||||
* current user worker. This method should execute as a part of
|
||||
* binding the user thread to a worker.
|
||||
*
|
||||
* Preconditions:
|
||||
*
|
||||
* - w should be the worker for the current thread
|
||||
* - w should be a user thread.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void load_pedigree_leaf_into_user_worker(__cilkrts_worker *w);
|
||||
|
||||
/**
|
||||
* Save the pedigree leaf node from the worker into thread-local
|
||||
* storage. This method should execute as part of unbinding a user
|
||||
* thread from a worker.
|
||||
*
|
||||
* Preconditions:
|
||||
*
|
||||
* - w should be the worker for the current thread
|
||||
* - w should be a user thread.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
void save_pedigree_leaf_from_user_worker(__cilkrts_worker *w);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Update pedigree for a worker when leaving a frame.
|
||||
*
|
||||
* If this is the frame of a spawn helper (indicated by the
|
||||
* CILK_FRAME_DETACHED flag) we must update the pedigree. The
|
||||
* pedigree points to nodes allocated on the stack. Failing to
|
||||
* update it will result in a accvio/segfault if the pedigree is
|
||||
* walked. This must happen for all spawn helper frames, even if
|
||||
* we're processing an exception.
|
||||
*/
|
||||
COMMON_PORTABLE
|
||||
inline void update_pedigree_on_leave_frame(__cilkrts_worker *w,
|
||||
__cilkrts_stack_frame *sf)
|
||||
{
|
||||
// Update the worker's pedigree information if this is an ABI 1 or later
|
||||
// frame
|
||||
if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
|
||||
{
|
||||
w->pedigree.rank = sf->spawn_helper_pedigree.rank + 1;
|
||||
w->pedigree.parent = sf->spawn_helper_pedigree.parent;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_PEDIGREES_DOT_H)
|
770
libcilkrts/runtime/record-replay.cpp
Normal file
770
libcilkrts/runtime/record-replay.cpp
Normal file
|
@ -0,0 +1,770 @@
|
|||
/* record-replay.cpp -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Implementation of the record/replay functionality for Cilk Plus
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <stdlib.h>
|
||||
|
||||
// clang is really strict about printf formats, so use the annoying integer
|
||||
// printf macros. Unfortunately they're not avaiable on Windows
|
||||
#ifdef _WIN32
|
||||
#define PRIu64 "llu"
|
||||
#else
|
||||
#define __STDC_FORMAT_MACROS 1
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#include "record-replay.h"
|
||||
#include "bug.h"
|
||||
#include "internal/abi.h"
|
||||
#include "local_state.h"
|
||||
#include "full_frame.h"
|
||||
#include "global_state.h"
|
||||
#include "cilk_malloc.h"
|
||||
#include "os.h" // for cilkos_error()
|
||||
|
||||
#if RECORD_ON_REPLAY
|
||||
#pragma message ("*** Record on Replay is enabled!")
|
||||
#endif
|
||||
|
||||
// Defined to write sequence number to the logs. Note that you cannot
|
||||
// diff logs with sequence numbers because the numbers may increment in
|
||||
// different orders.
|
||||
//#define INCLUDE_SEQUENCE_NUMBER 1
|
||||
|
||||
const int PED_VERSION = 1; // Log recording version
|
||||
|
||||
// Log types
|
||||
enum ped_type_t
|
||||
{
|
||||
ped_type_unknown,
|
||||
ped_type_steal,
|
||||
ped_type_sync,
|
||||
ped_type_orphaned,
|
||||
ped_type_last // Flags end of the list
|
||||
};
|
||||
|
||||
// Log type strings
|
||||
#define PED_TYPE_STR_STEAL "Steal"
|
||||
#define PED_TYPE_STR_SYNC "Sync"
|
||||
#define PED_TYPE_STR_WORKERS "Workers"
|
||||
#define PED_TYPE_STR_ORPHANED "Orphaned"
|
||||
|
||||
#define PED_TYPE_SIZE 16 // Buffer size for the type of pedigree. Must
|
||||
// hold largest pedigree record type string.
|
||||
#define PEDIGREE_BUFF_SIZE 512 // Buffer size for the string representation
|
||||
// of a pedigree.
|
||||
|
||||
/**
|
||||
* Data we store for a replay log entry
|
||||
*/
|
||||
typedef struct replay_entry_t
|
||||
{
|
||||
uint64_t *m_reverse_pedigree; /**< Reverse pedigree for replay log entry */
|
||||
ped_type_t m_type; /**< Type of replay log entry */
|
||||
int16_t m_pedigree_len; /**< Number of terms in reverse pedigree */
|
||||
int16_t m_value; /**< Victim for STEALs, 0 if matching steal found for ORPHANs */
|
||||
|
||||
/**
|
||||
* Load data read from the log into the entry
|
||||
*/
|
||||
bool load(const char *type, const char *pedigee_str, int32_t value1, int32_t value2)
|
||||
{
|
||||
// Convert the type into an enum
|
||||
if (0 == strcmp(type, PED_TYPE_STR_STEAL))
|
||||
{
|
||||
m_type = ped_type_steal;
|
||||
m_value = (int16_t)value1; // Victim
|
||||
}
|
||||
else
|
||||
{
|
||||
m_value = -1; // Victim not valid
|
||||
if (0 == strcmp(type, PED_TYPE_STR_SYNC))
|
||||
m_type = ped_type_sync;
|
||||
else if (0 == strcmp(type, PED_TYPE_STR_ORPHANED))
|
||||
m_type = ped_type_orphaned;
|
||||
else
|
||||
{
|
||||
m_type = ped_type_unknown;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the pedigree
|
||||
m_pedigree_len = 0;
|
||||
|
||||
const char *p = pedigee_str;
|
||||
char *end;
|
||||
|
||||
uint64_t temp_pedigree[PEDIGREE_BUFF_SIZE/2];
|
||||
|
||||
while(1)
|
||||
{
|
||||
temp_pedigree[m_pedigree_len++] = (uint64_t)strtol(p, &end, 10);
|
||||
if ('\0' == *end)
|
||||
break;
|
||||
p = end + 1;
|
||||
}
|
||||
|
||||
// Allocate memory to hold the pedigree.
|
||||
// Copy the pedigree in reverse order since that's the order we'll
|
||||
// traverse it
|
||||
m_reverse_pedigree =
|
||||
(uint64_t *)__cilkrts_malloc(sizeof(int64_t) * m_pedigree_len);
|
||||
for (int n = 0; n < m_pedigree_len; n++)
|
||||
m_reverse_pedigree[n] = temp_pedigree[(m_pedigree_len - 1) - n];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match this entry against the data supplied. This includes walking the
|
||||
* pedigree from the specified node.
|
||||
*/
|
||||
bool match (ped_type_t type, const __cilkrts_pedigree *node, int victim = -1)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
// If the type isn't what they're seeking, we don't have a match
|
||||
if (type != m_type)
|
||||
return false;
|
||||
|
||||
// If we're looking for a STEAL, then the victim must match
|
||||
if ((type == ped_type_steal) && (victim != m_value))
|
||||
return false;
|
||||
|
||||
// Compare the current pedigree against what was recorded
|
||||
while ((NULL != node) && (i < m_pedigree_len))
|
||||
{
|
||||
// If we've got a pedigree rank difference, then we don't have
|
||||
// a match
|
||||
if (node->rank != m_reverse_pedigree[i])
|
||||
return false;
|
||||
node = node->parent;
|
||||
i++;
|
||||
}
|
||||
|
||||
// Make sure we exhausted both the pedigree chain and the recorded
|
||||
// pedigree
|
||||
return ((NULL == node) && (i == m_pedigree_len));
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance to the next entry, skipping any ORPHANED records we didn't see
|
||||
* a matching STEAL for
|
||||
*/
|
||||
replay_entry_t *next_entry()
|
||||
{
|
||||
replay_entry_t *entry = this;
|
||||
|
||||
// You can't go beyond the end
|
||||
if (ped_type_last == entry->m_type)
|
||||
return entry;
|
||||
|
||||
// Advance to the next entry
|
||||
entry++;
|
||||
|
||||
// Skip any ORPHANED records that don't have a matching steal. We
|
||||
// initialized the value field to -1 for ORPHANED. After loading all
|
||||
// the log data, we iterated through all the STEAL records setting the
|
||||
// matching ORPHANED record's value field to 0. So if an ORPHANED
|
||||
// record's value field is still -1, it doesn't have a matching STEAL
|
||||
// record, and I don't know why we chose not to return from the
|
||||
// spawned function.
|
||||
while ((ped_type_orphaned == entry->m_type) && (-1 == entry->m_value))
|
||||
{
|
||||
entry++;
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any allocated resources
|
||||
*/
|
||||
void unload()
|
||||
{
|
||||
__cilkrts_free(m_reverse_pedigree);
|
||||
m_reverse_pedigree = NULL;
|
||||
}
|
||||
|
||||
} replay_entry_t;
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Walk the pedigree and generate a string representation with underscores
|
||||
* between terms. Currently does a recursive walk to generate a forward
|
||||
* pedigree.
|
||||
*
|
||||
* @param p The buffer that is to be filled. Assumed to be PEDIGREE_BUFF_SIZE
|
||||
* characters long
|
||||
* @param pnode The initial pedigree term to be written.
|
||||
*
|
||||
* @return A pointer into the pedigree string buffer after a term has been
|
||||
* written.
|
||||
*/
|
||||
static
|
||||
char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode)
|
||||
{
|
||||
CILK_ASSERT(pnode);
|
||||
if (pnode->parent)
|
||||
{
|
||||
p = walk_pedigree_nodes(p, pnode->parent);
|
||||
p += sprintf(p, "_");
|
||||
}
|
||||
|
||||
return p + sprintf(p, "%" PRIu64, pnode->rank);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a record to a replay log file.
|
||||
*
|
||||
* @param w The worker we're writing the pedigree for.
|
||||
* @param type The type of the pedigree record, as a string
|
||||
* @param initial_node The initial pedigree node to be written, or NULL if
|
||||
* there is no pedigree for this record type.
|
||||
* @param i1 First integer value to be written to the record.
|
||||
* @param i2 Second integer value to be written to the record. Only applies
|
||||
* to STEAL records. Defaults to -1 (unused). The second value is always
|
||||
* written to make parsing easier.
|
||||
*/
|
||||
static
|
||||
void write_to_replay_log (__cilkrts_worker *w, const char *type,
|
||||
const __cilkrts_pedigree *initial_node,
|
||||
int i1 = -1, int i2 = -1)
|
||||
{
|
||||
char pedigree[PEDIGREE_BUFF_SIZE];
|
||||
|
||||
// If we don't have an initial pedigree node, just use "0" to fill the slot
|
||||
if (NULL == initial_node)
|
||||
strcpy(pedigree, "0");
|
||||
else
|
||||
walk_pedigree_nodes(pedigree, initial_node);
|
||||
|
||||
#ifndef INCLUDE_SEQUENCE_NUMBER
|
||||
// Simply write the record
|
||||
fprintf(w->l->record_replay_fptr, "%s %s %d %d\n",
|
||||
type, pedigree, i1, i2);
|
||||
#else
|
||||
// Write the record with a sequence number. The sequence number should
|
||||
// always be the last term, and ignored on read
|
||||
|
||||
static long volatile seq_num = 0;
|
||||
long write_num;
|
||||
|
||||
// Atomic increment functions are compiler/OS-specific
|
||||
#ifdef _WIN32
|
||||
write_num = _InterlockedIncrement(&seq_num);
|
||||
#else /* GCC */
|
||||
write_num = __sync_add_and_fetch(&seq_num, 1);
|
||||
#endif // _WIN32
|
||||
|
||||
fprintf(w->l->record_replay_fptr, "%s %s %d %d %ld\n",
|
||||
type, pedigree, i1, i2, write_num);
|
||||
#endif // INCLUDE_SEQUENCE_NUMBER
|
||||
|
||||
fflush(w->l->record_replay_fptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record data for a successful steal.
|
||||
*
|
||||
* The pedigree for a STEAL record is the pedigree of the stolen frame.
|
||||
*
|
||||
* @note It's assumed that replay_record_steal() has already checked that we're
|
||||
* recording a log and that the record/replay functionality has not been
|
||||
* compiled out.
|
||||
*
|
||||
* @param w The worker stealing a frame.
|
||||
* @param victim_id The ID of the worker which had it's frame stolen.
|
||||
*/
|
||||
void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id)
|
||||
{
|
||||
// Follow the pedigree chain using worker's stack frame
|
||||
CILK_ASSERT(w->l->next_frame_ff);
|
||||
CILK_ASSERT(w->l->next_frame_ff->call_stack);
|
||||
|
||||
// Record steal: STEAL pedigree victim_id thief_id
|
||||
write_to_replay_log (w, PED_TYPE_STR_STEAL,
|
||||
&(w->l->next_frame_ff->call_stack->parent_pedigree),
|
||||
victim_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record data for the worker that continues from a sync
|
||||
*
|
||||
* The pedigree for a SYNC record is the pedigree at the sync.
|
||||
*
|
||||
* @note It's assumed that replay_record_sync() has already checked that we're
|
||||
* recording a log and that the record/replay functionality has not been
|
||||
* compiled out.
|
||||
*
|
||||
* @param w The worker continuing from a sync.
|
||||
*/
|
||||
void replay_record_sync_internal(__cilkrts_worker *w)
|
||||
{
|
||||
// Record sync: SYNC pedigree last_worker_id
|
||||
write_to_replay_log (w, PED_TYPE_STR_SYNC, &w->pedigree);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record the pedigree of an attempt to return to a stolen parent
|
||||
*
|
||||
* The pedigree for an ORPHANED record is the pedigree of our parent
|
||||
*
|
||||
* @note It's assumed that replay_record_orphaned() has already checked that
|
||||
* we're recording a log and that the record/replay functionality has not
|
||||
* been compiled out.
|
||||
*
|
||||
* @param w The worker continuing noting that it has been orphaned.
|
||||
*/
|
||||
void replay_record_orphaned_internal(__cilkrts_worker *w)
|
||||
{
|
||||
// Record steal: ORPHANED pedigree self
|
||||
write_to_replay_log (w, PED_TYPE_STR_ORPHANED, w->pedigree.parent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to match a SYNC record. We have a match when this worker was
|
||||
* recorded returning from the current call to __cilkrts_sync() with the
|
||||
* same pedigree and this was the worker that continued from the sync, since
|
||||
* it was the last to sync.
|
||||
*
|
||||
* If we find a match, the caller is expected to stall it is the last worker
|
||||
* to reach a sync so it will be the worker to continue from the sync.
|
||||
*
|
||||
* @note It's assumed that replay_match_sync_pedigree() has already returned
|
||||
* if we're not replaying a log, or if record/replay functionality has
|
||||
* been compiled out.
|
||||
*
|
||||
* @param w The worker we're checking to see if we've got a match
|
||||
*/
|
||||
int replay_match_sync_pedigree_internal(__cilkrts_worker *w)
|
||||
{
|
||||
// Return true if we have a match
|
||||
if (w->l->replay_list_entry->match(ped_type_sync, &w->pedigree))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance to the next log entry from a SYNC record. Consume the current
|
||||
* SYNC record on this worker and advance to the next one.
|
||||
*
|
||||
* @note It's assumed that replay_advance_from_sync() has already returned if
|
||||
* we're not replaying a log, or if record/replay functionality has been
|
||||
* compiled out.
|
||||
*
|
||||
* @param w The worker whose replay log we're advancing.
|
||||
*/
|
||||
void replay_advance_from_sync_internal (__cilkrts_worker *w)
|
||||
{
|
||||
// The current replay entry must be a SYNC
|
||||
CILK_ASSERT(ped_type_sync == w->l->replay_list_entry->m_type);
|
||||
|
||||
// Advance to the next entry
|
||||
w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
|
||||
}
|
||||
|
||||
/**
|
||||
* Called from random_steal() to override the ID of the randomly chosen victim
|
||||
* worker which this worker will attempt to steal from. Returns the worker id
|
||||
* of the next victim this worker was recorded stealing from, or -1 if the
|
||||
* next record in the log is not a STEAL.
|
||||
*
|
||||
* @note This call does NOT attempt to match the pedigree. That will be done
|
||||
* by replay_match_victim_pedigree() after random_steal() has locked the victim
|
||||
* worker.
|
||||
*
|
||||
* @param w The __cilkrts_worker we're executing on. The worker's replay log
|
||||
* is checked for a STEAL record. If we've got one, the stolen worker ID is
|
||||
* returned.
|
||||
*
|
||||
* @return -1 if the next record is not a STEAL
|
||||
* @return recorded stolen worker ID if we've got a matching STEAL record
|
||||
*/
|
||||
int replay_get_next_recorded_victim_internal(__cilkrts_worker *w)
|
||||
{
|
||||
// If the next record isn't a STEAL, abort the attempt to steal work
|
||||
if (ped_type_steal != w->l->replay_list_entry->m_type)
|
||||
return -1;
|
||||
|
||||
// Return the victim's worker ID from the STEAL record. We'll check
|
||||
// the pedigree after random_steal has locked the victim worker.
|
||||
return w->l->replay_list_entry->m_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called from random_steal() to determine if we have a STEAL record that
|
||||
* matches the pedigree at the head of the victim worker. If we do have a
|
||||
* match, the STEAL record is consumed.
|
||||
*
|
||||
* @note It's assumed that replay_match_victim_pedigree() has already returned if
|
||||
* we're not replaying a log, or if record/replay functionality has been
|
||||
* compiled out.
|
||||
*
|
||||
* @return 1 if we have a match
|
||||
* @return 0 if the current replay record isn't a STEAL record, or the victim
|
||||
* isn't correct, or the pedigree doesn't match.
|
||||
*/
|
||||
int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim)
|
||||
{
|
||||
// If we don't have a match, return 0
|
||||
if (! w->l->replay_list_entry->match(ped_type_steal,
|
||||
&((*victim->head)->parent_pedigree),
|
||||
victim->self))
|
||||
return 0;
|
||||
|
||||
// Consume this entry
|
||||
w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
|
||||
|
||||
// Return success
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the frame we're about to return to was recorded as being stolen,
|
||||
* stall until it is.
|
||||
*
|
||||
* @note It's assumed that replay_wait_for_steal_if_parent_was_stolen() has
|
||||
* already returned if we're not replaying a log, or if record/replay
|
||||
* functionality has been compiled out.
|
||||
*
|
||||
* @param w The worker we're executing on.
|
||||
*/
|
||||
void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w)
|
||||
{
|
||||
// If our parent wasn't recorded orphanen, return now
|
||||
if (! w->l->replay_list_entry->match (ped_type_orphaned,
|
||||
w->pedigree.parent))
|
||||
return;
|
||||
|
||||
// Stall until our parent is stolen. Note that we're comparing head
|
||||
// and tail, not head and exc. The steal is not completed until tail
|
||||
// is modified.
|
||||
while (!((w->tail - 1) < w->head))
|
||||
__cilkrts_sleep();
|
||||
|
||||
// Consume the entry
|
||||
w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate memory for the list of logged events.
|
||||
*
|
||||
* This function will read through the file and count the number of records
|
||||
* so it can estimate how big a buffer to allocate for the array or replay
|
||||
* entries. It will then rewind the file to the beginning so it can be
|
||||
* loaded into memory.
|
||||
*
|
||||
* @param w The worker we're loading the file for.
|
||||
* @param f The file of replay data we're scanning.
|
||||
*/
|
||||
static
|
||||
void allocate_replay_list(__cilkrts_worker *w, FILE *f)
|
||||
{
|
||||
// Count the number of entries - yeah, it's a hack, but it lets me
|
||||
// allocate the space all at once instead of in chunks
|
||||
char buf[1024];
|
||||
int entries = 1; // Include "LAST" node
|
||||
|
||||
while (! feof(f))
|
||||
{
|
||||
if (fgets(buf, 1024, f))
|
||||
{
|
||||
// Skip the Workers record - should only be in file for Worker 0
|
||||
if (0 != strncmp(PED_TYPE_STR_WORKERS, buf, sizeof(PED_TYPE_STR_WORKERS)-1))
|
||||
entries++;
|
||||
}
|
||||
}
|
||||
|
||||
w->l->replay_list_root =
|
||||
(replay_entry_t *)__cilkrts_malloc(entries * sizeof(replay_entry_t));
|
||||
w->l->replay_list_root[entries - 1].m_type = ped_type_last;
|
||||
|
||||
// Reset the file to the beginning
|
||||
rewind(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the replay log for a worker into memory.
|
||||
*
|
||||
* @param w The worker we're loading the replay for.
|
||||
*/
|
||||
static
|
||||
void load_recorded_log(__cilkrts_worker *w)
|
||||
{
|
||||
char ped_type[PED_TYPE_SIZE];
|
||||
char ped_str[PEDIGREE_BUFF_SIZE];
|
||||
int32_t i1 = -1, i2 = -1;
|
||||
int fret;
|
||||
char local_replay_file_name[512];
|
||||
FILE *f;
|
||||
|
||||
// Open the log for reading
|
||||
sprintf(local_replay_file_name, "%s%d.cilklog", w->g->record_replay_file_name, w->self);
|
||||
f = fopen(local_replay_file_name, "r");
|
||||
|
||||
// Make sure we found a log!
|
||||
CILK_ASSERT (NULL != f);
|
||||
|
||||
// Initialize the replay_list
|
||||
allocate_replay_list(w, f);
|
||||
replay_entry_t *entry = w->l->replay_list_root;
|
||||
|
||||
// Read the data out and add it to our tables
|
||||
while (! feof(f))
|
||||
{
|
||||
#ifndef INCLUDE_SEQUENCE_NUMBER
|
||||
fret = fscanf(f, "%s %s %d %d\n", ped_type, ped_str, &i1, &i2);
|
||||
if(EOF == fret)
|
||||
break;
|
||||
|
||||
// We must have read 4 fields
|
||||
CILK_ASSERT(4 == fret);
|
||||
#else
|
||||
int32_t write_num;
|
||||
fret = fscanf(f, "%s %s %d %d %d\n", ped_type, ped_str,
|
||||
&i1, &i2, &write_num);
|
||||
if(EOF == fret)
|
||||
break;
|
||||
|
||||
// We must have read 5 fields
|
||||
CILK_ASSERT(5 == fret);
|
||||
#endif // INCLUDE_SEQUENCE_NUMBER
|
||||
|
||||
// Load the data into the entry
|
||||
if (0 == strcmp(ped_type, PED_TYPE_STR_WORKERS))
|
||||
{
|
||||
// Verify we're replaying with the same number of workers we recorded with
|
||||
if (i1 != w->g->P)
|
||||
{
|
||||
// Fatal error - does not return
|
||||
cilkos_error("Cannot continue replay: number of workers(%d) doesn't match "
|
||||
"that from the recording(%d).\n", w->g->P, i1);
|
||||
}
|
||||
|
||||
// Verify that we understand this version of the pedigree file
|
||||
if (PED_VERSION != i2)
|
||||
{
|
||||
// Fatal error - does not return
|
||||
cilkos_error("Pedigree file version %d doesn't match current "
|
||||
"version %d - cannot continue.\n",
|
||||
i2, PED_VERSION);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
entry->load(ped_type, ped_str, i1, i2);
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we've filled the allocated memory. We initialized the last
|
||||
// entry in
|
||||
CILK_ASSERT(ped_type_last == entry->m_type);
|
||||
w->l->replay_list_entry = w->l->replay_list_root;
|
||||
|
||||
// Close the log and return
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a recorded log to match STEALs againsted ORPHANED records.
|
||||
*
|
||||
* @param g Cilk Runtime global state. Passed to access the worker array so
|
||||
* we can scan a worker's ORPHANED entries for one that matches a STEAL entry.
|
||||
* @param entry The root of a replay_list for a worker.
|
||||
*/
|
||||
static
|
||||
void scan_for_matching_steals(global_state_t *g, replay_entry_t *entry)
|
||||
{
|
||||
// Iterate over all of the entries
|
||||
while (ped_type_last != entry->m_type)
|
||||
{
|
||||
// Look for STEALs. That will tell us which worker the frame was
|
||||
// stolen from
|
||||
if (ped_type_steal == entry->m_type)
|
||||
{
|
||||
bool found = false;
|
||||
|
||||
// Validate the worker ID and make sure we've got a list
|
||||
CILK_ASSERT((entry->m_value >= 0) && (entry->m_value < g->total_workers));
|
||||
replay_entry_t *victim_entry = g->workers[entry->m_value]->l->replay_list_root;
|
||||
CILK_ASSERT(NULL != victim_entry);
|
||||
|
||||
// Scan the victim's list for the matching ORPHANED record
|
||||
while ((ped_type_last != victim_entry->m_type) && ! found)
|
||||
{
|
||||
if (ped_type_orphaned == victim_entry->m_type)
|
||||
{
|
||||
if (entry->m_pedigree_len == victim_entry->m_pedigree_len)
|
||||
{
|
||||
if (0 == memcmp(entry->m_reverse_pedigree,
|
||||
victim_entry->m_reverse_pedigree,
|
||||
entry->m_pedigree_len * sizeof(int64_t)))
|
||||
{
|
||||
// Note that this ORPHANED record has a matching steal
|
||||
victim_entry->m_value = 0;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
victim_entry++;
|
||||
}
|
||||
}
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initialize per-worker data for record or replay - See record-replay.h
|
||||
* for full routine header.
|
||||
*/
|
||||
void replay_init_workers(global_state_t *g)
|
||||
{
|
||||
int i;
|
||||
char worker_file_name[512];
|
||||
|
||||
// If we're not recording or replaying a log, we're done. All of the
|
||||
// fields in the global_state_t or local_state_t are already initialized
|
||||
// to default values.
|
||||
if (RECORD_REPLAY_NONE == g->record_or_replay)
|
||||
return;
|
||||
|
||||
// If we're replaying a log, read each worker's log and construct the
|
||||
// in-memory log
|
||||
if (REPLAY_LOG == g->record_or_replay)
|
||||
{
|
||||
// Read all of the data
|
||||
for (i = 0; i < g->total_workers; ++i)
|
||||
{
|
||||
// This function will also initialize and fill the worker's
|
||||
// replay list
|
||||
load_recorded_log(g->workers[i]);
|
||||
}
|
||||
|
||||
// Scan for orphans with no matching steal. Mark them so they'll be
|
||||
// skipped as we advance through the log.
|
||||
for (i = 0; i < g->total_workers; ++i)
|
||||
{
|
||||
scan_for_matching_steals(g, g->workers[i]->l->replay_list_root);
|
||||
}
|
||||
|
||||
// If we're recording the logs while replaying, create the log files.
|
||||
// This will only be used for debugging. Create the logs in the
|
||||
// current directory. It should be as good a place as any...
|
||||
#if RECORD_ON_REPLAY
|
||||
for(i = 0; i < g->total_workers; ++i)
|
||||
{
|
||||
__cilkrts_worker *w = g->workers[i];
|
||||
sprintf(worker_file_name, "replay_log_%d.cilklog", w->self);
|
||||
w->l->record_replay_fptr = fopen(worker_file_name, "w+");
|
||||
CILK_ASSERT(NULL != w->l->record_replay_fptr);
|
||||
}
|
||||
|
||||
// Record the number of workers, file version in Worker 0's file
|
||||
write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
|
||||
#endif // RECORD_ON_REPLAY
|
||||
}
|
||||
|
||||
// If we're recording, create the log files
|
||||
if (RECORD_LOG == g->record_or_replay)
|
||||
{
|
||||
for(i = 0; i < g->total_workers; ++i)
|
||||
{
|
||||
__cilkrts_worker *w = g->workers[i];
|
||||
sprintf(worker_file_name, "%s%d.cilklog",
|
||||
g->record_replay_file_name,
|
||||
w->self);
|
||||
w->l->record_replay_fptr = fopen(worker_file_name, "w+");
|
||||
CILK_ASSERT(NULL != w->l->record_replay_fptr);
|
||||
}
|
||||
|
||||
// Record the number of workers, file version in Worker 0's file
|
||||
write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do any necessary cleanup for the logs - See record-replay.h for full
|
||||
* routine header.
|
||||
*/
|
||||
void replay_term(global_state_t *g)
|
||||
{
|
||||
// Free memory for the record/replay log file name, if we've got one
|
||||
if (g->record_replay_file_name)
|
||||
__cilkrts_free(g->record_replay_file_name);
|
||||
|
||||
// Per-worker cleanup
|
||||
for(int i = 0; i < g->total_workers; ++i)
|
||||
{
|
||||
__cilkrts_worker *w = g->workers[i];
|
||||
|
||||
// Close the log files, if we've opened them
|
||||
if(w->l->record_replay_fptr)
|
||||
fclose(w->l->record_replay_fptr);
|
||||
|
||||
if (w->l->replay_list_root)
|
||||
{
|
||||
// We should have consumed the entire list
|
||||
CILK_ASSERT(ped_type_last == w->l->replay_list_entry->m_type);
|
||||
|
||||
replay_entry_t *entry = w->l->replay_list_root;
|
||||
while (ped_type_last != entry->m_type)
|
||||
{
|
||||
// Free the pedigree memory for each entry
|
||||
entry->unload();
|
||||
entry++;
|
||||
}
|
||||
__cilkrts_free(w->l->replay_list_root);
|
||||
w->l->replay_list_root = NULL;
|
||||
w->l->replay_list_entry = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
432
libcilkrts/runtime/record-replay.h
Normal file
432
libcilkrts/runtime/record-replay.h
Normal file
|
@ -0,0 +1,432 @@
|
|||
/* record_replay.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2012-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file record-replay.h
|
||||
*
|
||||
* @brief record-replay.h and .cpp encapsulate most of the functionality to
|
||||
* record and play back a Cilk Plus application.
|
||||
*
|
||||
* Recording is directed by the setting of the CILK_RECORD_LOG environment
|
||||
* variable. If it's defined, the value specifies the root we'll use to
|
||||
* generate files for each worker using the following format string:
|
||||
* "%s%d.cilklog", where the integer is the value of w->self.
|
||||
*
|
||||
* Replay is directed by the setting of the CILK_REPLAY_LOG environment
|
||||
* variable, interpreted the same way as CILK_RECORD_LOG. If both
|
||||
* CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
|
||||
* and the attempt to record a log will be ignored.
|
||||
*
|
||||
* Recording is relatively straightforward. We write all information about a
|
||||
* worker to a per-worker file.
|
||||
*
|
||||
* Each pedigree record consists of the following fields. All fields must be
|
||||
* present in every record to make parsing easy.
|
||||
* - Type - A string identifying the pedigree record. See the PED_TYPE_STR_
|
||||
* macros for the currently defined values.
|
||||
* - Pedigree - A string of pedigree values, with underscores between
|
||||
* adjacent values.
|
||||
* - i1 - Record type-specific value. -1 if not used.
|
||||
* - i2 - Record type-specific value. -1 if not used.
|
||||
*
|
||||
* WORKERS record - only written to the file for worker 0. Note that this is
|
||||
* the first worker in the workers array. Worker 0 is the first system worker,
|
||||
* *NOT* a user worker.
|
||||
* - Type: "Workers"
|
||||
* - Pedigree: Always "0" - ignored
|
||||
* - i1: Number of workers (g->P) when we recorded the log. A mismatch when
|
||||
* we attempt to replay the log will result in aborting the execution.
|
||||
* - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
|
||||
*
|
||||
* STEAL record - written after a successful steal.
|
||||
* - Type: "Steal"
|
||||
* - Pedigree: Pedigree of stolen frame
|
||||
* - i1: Worker the frame was stolen from
|
||||
* - i2: -1
|
||||
*
|
||||
* SYNC record - written after a worker continues from a sync.
|
||||
* - Type: "Sync"
|
||||
* - Pedigree: Pedigree of sync. Note that this is the pedigree *before*
|
||||
* the pedigree in incremented in setup_for_execution_pedigree().
|
||||
* - i1: -1
|
||||
* - i2: -1
|
||||
*
|
||||
* ORPHANED record - saved on a return to a stolen parent.
|
||||
* - Type: "Orphaned"
|
||||
* - Pedigree: Pedigree of the parent frame *before* the pedigree is
|
||||
* incremented by the return
|
||||
* - i1: -1
|
||||
* - i2: -1
|
||||
*
|
||||
* On replay, the data is loaded into a per-worker array, and the data is
|
||||
* consumed in order as needed.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_RECORD_REPLAY_DOT_H
|
||||
#define INCLUDED_RECORD_REPLAY_DOT_H
|
||||
|
||||
#include "cilk/common.h"
|
||||
#include "global_state.h"
|
||||
|
||||
/**
|
||||
* Define CILK_RECORD_REPLAY to enable record/replay functionality. If
|
||||
* CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
|
||||
* record-replay.h will be stubbed out. Since they're declared as inline,
|
||||
* functions, the resulting build should have no performance impact due to
|
||||
* the implementation or record/replay.
|
||||
*/
|
||||
#define CILK_RECORD_REPLAY 1
|
||||
|
||||
/**
|
||||
* Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This
|
||||
* should only be needed when debugging the replay functionality. This should
|
||||
* always be defined as 0 when record-replay.h is checked in.
|
||||
*/
|
||||
#define RECORD_ON_REPLAY 0
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
// Declarations of internal record/replay functions. The inlined versions
|
||||
// further down do some preliminary testing (like if we're not recording or
|
||||
// replaying) and will stub out the functionality if we've compiled out the
|
||||
// record/replay feature
|
||||
int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
|
||||
void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
|
||||
void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
|
||||
void replay_record_sync_internal(__cilkrts_worker *w);
|
||||
void replay_record_orphaned_internal(__cilkrts_worker *w);
|
||||
int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
|
||||
void replay_advance_from_sync_internal (__cilkrts_worker *w);
|
||||
int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
// Publically defined record/replay API
|
||||
|
||||
/**
|
||||
* If we're replaying a log, wait for our parent to be stolen if it was when
|
||||
* the log was recorded. If record/replay is compiled out, this is a noop.
|
||||
*
|
||||
* @param w The __cilkrts_worker we're executing on. The worker's replay
|
||||
* list will be checked for a ORPHANED record with a matching pedigree. If
|
||||
* there is a match, the ORPHANED record will be consumed.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
|
||||
{
|
||||
// Only check if we're replaying a log
|
||||
if (REPLAY_LOG == w->g->record_or_replay)
|
||||
replay_wait_for_steal_if_parent_was_stolen_internal(w);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
|
||||
{
|
||||
// If record/replay is disabled, we never wait
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Called from random_steal() to override the ID of the randomly chosen victim
|
||||
* worker which this worker will attempt to steal from. Returns the worker id
|
||||
* of the next victim this worker was recorded stealing from, or -1 if the
|
||||
* next record in the log is not a STEAL.
|
||||
*
|
||||
* @note This call does NOT attempt to match the pedigree. That will be done
|
||||
* by replay_match_victim_pedigree() after random_steal() has locked the victim
|
||||
* worker.
|
||||
*
|
||||
* @param w The __cilkrts_worker we're executing on. The worker's replay log
|
||||
* is checked for a STEAL record. If we've got one, the stolen worker ID is
|
||||
* returned.
|
||||
* @param id The randomly chosen victim worker ID. If we're not replaying a
|
||||
* log, or if record/replay has been compiled out, this is the value that
|
||||
* will be returned.
|
||||
*
|
||||
* @return id if we're not replaying a log
|
||||
* @return -1 if the next record is not a STEAL
|
||||
* @return recorded stolen worker ID if we've got a matching STEAL record
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
|
||||
{
|
||||
// Only check if we're replaying a log
|
||||
if (REPLAY_LOG == w->g->record_or_replay)
|
||||
return replay_get_next_recorded_victim_internal(w);
|
||||
else
|
||||
return id;
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
|
||||
{
|
||||
// Record/replay is disabled. Always return the original worker id
|
||||
return id;
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Initialize per-worker data for record/replay. A noop if record/replay
|
||||
* is disabled, or if we're not recording or replaying anything.
|
||||
*
|
||||
* If we're recording a log, this will ready us to create the per-worker
|
||||
* logs.
|
||||
*
|
||||
* If we're replaying a log, this will read the logs into the per-worker
|
||||
* structures.
|
||||
*
|
||||
* @param g Cilk runtime global state
|
||||
*/
|
||||
void replay_init_workers(global_state_t *g);
|
||||
|
||||
/**
|
||||
* Record a record on a successful steal. A noop if record/replay is
|
||||
* diabled, or if we're not recording anything
|
||||
*
|
||||
* @param w The __cilkrts_worker we're executing on. The pedigree of
|
||||
* the stolen frame will be walked to generate the STEAL record.
|
||||
*
|
||||
* @param victim_id The worker ID of the worker w stole from.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
|
||||
{
|
||||
#if RECORD_ON_REPLAY
|
||||
// If we're recording on replay, write the record if we're recording or
|
||||
// replaying
|
||||
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
|
||||
return;
|
||||
#else
|
||||
// Only write the record if we're recording
|
||||
if (RECORD_LOG != w->g->record_or_replay)
|
||||
return;
|
||||
#endif
|
||||
|
||||
replay_record_steal_internal(w, victim_id);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
|
||||
{
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Record a record when continuing after a sync. A noop if record/replay is
|
||||
* diabled, or if we're not recording anything, or if the sync was abandoned,
|
||||
* meaning this isn't the worker that continues from the sync.
|
||||
*
|
||||
* @param w The __cilkrts_worker for we're executing on. The pedigree of
|
||||
* the sync-ing frame will be walked to generate the SYNC record.
|
||||
*
|
||||
* @param continuing True if this worker will be continuing from the
|
||||
* cilk_sync. A SYNC record will only be generated if continuing is true.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_sync(__cilkrts_worker *w, int continuing)
|
||||
{
|
||||
// If this was not the last worker to the syn, return
|
||||
if (! continuing)
|
||||
return;
|
||||
|
||||
#if RECORD_ON_REPLAY
|
||||
// If we're recording on replay, write the record if we're recording or
|
||||
// replaying
|
||||
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
|
||||
return;
|
||||
#else
|
||||
// Only write the record if we're recording
|
||||
if (RECORD_LOG != w->g->record_or_replay)
|
||||
return;
|
||||
#endif
|
||||
|
||||
replay_record_sync_internal(w);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_sync(__cilkrts_worker *w, int abandoned)
|
||||
{
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Record a record on a return to a stolen parent. A noop if record/replay is
|
||||
* diabled, or if we're not recording anything.
|
||||
*
|
||||
* @param w The __cilkrts_worker for we're executing on. The pedigree of the
|
||||
* frame that has discovered that its parent has been stolken will be walked
|
||||
* to generate the ORPHANED record.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_orphaned(__cilkrts_worker *w)
|
||||
{
|
||||
#if RECORD_ON_REPLAY
|
||||
// If we're recording on replay, write the record if we're recording or
|
||||
// replaying
|
||||
if (RECORD_REPLAY_NONE == w->g->record_or_replay)
|
||||
return;
|
||||
#else
|
||||
// Only write the record if we're recording
|
||||
if (RECORD_LOG != w->g->record_or_replay)
|
||||
return;
|
||||
#endif
|
||||
|
||||
replay_record_orphaned_internal(w);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
void replay_record_orphaned(__cilkrts_worker *w)
|
||||
{
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Test whether the frame at the head of the victim matches the pedigree of
|
||||
* the frame that was recorded being stolen. Called in random steal to verify
|
||||
* that we're about to steal the correct frame.
|
||||
*
|
||||
* @param w The __cilkrts_worker for we're executing on. The current worker
|
||||
* is needed to find the replay entry to be checked.
|
||||
*
|
||||
* @param victim The __cilkrts_worker for we're proposing to steal a frame
|
||||
* from. The victim's head entry is
|
||||
* is needed to find the replay entry to be checked.
|
||||
*
|
||||
* @return 0 if we're replaying a log and the victim's pedigree does NOT match
|
||||
* the next frame the worker is expected to steal.
|
||||
*
|
||||
* @return 1 in all other cases to indicate that the steal attempt should
|
||||
* continue
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
|
||||
{
|
||||
// We're not replaying a log. The victim is always acceptable
|
||||
if (REPLAY_LOG != w->g->record_or_replay)
|
||||
return 1;
|
||||
|
||||
// Return 1 if the victim's pedigree matches the frame the worker stole
|
||||
// when we recorded the log
|
||||
return replay_match_victim_pedigree_internal(w, victim);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
|
||||
{
|
||||
// Record/replay is disabled. The victim is always acceptable
|
||||
return 1;
|
||||
}
|
||||
#endif // CILK_RECORD_REPLAY
|
||||
|
||||
/**
|
||||
* Test whether the current replay entry is a sync record matching the
|
||||
* worker's pedigree.
|
||||
*
|
||||
* @param w The __cilkrts_worker for we're executing on.
|
||||
*
|
||||
* @return 1 if the current replay entry matches the current pedigree.
|
||||
* @return 0 if there's no match, or if we're not replaying a log.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
int replay_match_sync_pedigree(__cilkrts_worker *w)
|
||||
{
|
||||
// If we're not replaying, assume no match
|
||||
if (REPLAY_LOG != w->g->record_or_replay)
|
||||
return 0;
|
||||
|
||||
return replay_match_sync_pedigree_internal(w);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
int replay_match_sync_pedigree(__cilkrts_worker *w)
|
||||
{
|
||||
// Record/replay is disabled. Assume no match
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Marks a sync record seen, advancing to the next record in the replay list.
|
||||
*
|
||||
* This function will only advance to the next record if:
|
||||
* - Record/replay hasn't been compiled out AND
|
||||
* - We're replaying a log AND
|
||||
* - A match was found AND
|
||||
* - The sync is not being abandoned
|
||||
*
|
||||
* @param w The __cilkrts_worker for we're executing on.
|
||||
* @param match_found The value returned by replay_match_sync_pedigree(). If
|
||||
* match_found is false, nothing is done.
|
||||
* @param continuing Flag indicating whether this worker will continue from
|
||||
* the sync (it's the last worker to the sync) or if it will abandon the work
|
||||
* and go to the scheduling loop to look for more work it can steal.
|
||||
*/
|
||||
#ifdef CILK_RECORD_REPLAY
|
||||
__CILKRTS_INLINE
|
||||
void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
|
||||
{
|
||||
// If we're replaying a log, and the current sync wasn't abandoned, and we
|
||||
// found a match in the log, mark the sync record seen.
|
||||
if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
|
||||
replay_advance_from_sync_internal(w);
|
||||
}
|
||||
#else
|
||||
__CILKRTS_INLINE
|
||||
void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Release any resources used to read or write a replay log.
|
||||
*
|
||||
* @param g Cilk runtime global state
|
||||
*/
|
||||
void replay_term(global_state_t *g);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
|
1012
libcilkrts/runtime/reducer_impl.cpp
Normal file
1012
libcilkrts/runtime/reducer_impl.cpp
Normal file
File diff suppressed because it is too large
Load diff
128
libcilkrts/runtime/reducer_impl.h
Normal file
128
libcilkrts/runtime/reducer_impl.h
Normal file
|
@ -0,0 +1,128 @@
|
|||
/* reducer_impl.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file reducer_impl.h
|
||||
*
|
||||
* @brief Functions to implement reducers in the runtime.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_REDUCER_IMPL_DOT_H
|
||||
#define INCLUDED_REDUCER_IMPL_DOT_H
|
||||
|
||||
#include <cilk/common.h>
|
||||
#include <internal/abi.h>
|
||||
#include "rts-common.h"
|
||||
|
||||
__CILKRTS_BEGIN_EXTERN_C
|
||||
|
||||
/**
|
||||
* Construct an empty reducer map from the memory pool associated with the
|
||||
* given worker. This reducer map must be destroyed before the worker's
|
||||
* associated global context is destroyed.
|
||||
*
|
||||
* @param w __cilkrts_worker the cilkred_map is being created for.
|
||||
*
|
||||
* @return Pointer to the initialized cilkred_map.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w);
|
||||
|
||||
/**
|
||||
* Destroy a reducer map. The map must have been allocated from the worker's
|
||||
* global context and should have been allocated from the same worker.
|
||||
*
|
||||
* @param w __cilkrts_worker the cilkred_map was created for.
|
||||
* @param h The cilkred_map to be deallocated.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_destroy_reducer_map(__cilkrts_worker *w,
|
||||
cilkred_map *h);
|
||||
|
||||
/**
|
||||
* Set the specified reducer map as the leftmost map if is_leftmost is true,
|
||||
* otherwise, set it to not be the leftmost map.
|
||||
*
|
||||
* @param h The cilkred_map to be modified.
|
||||
* @param is_leftmost true if the reducer map is leftmost.
|
||||
*/
|
||||
COMMON_SYSDEP
|
||||
void __cilkrts_set_leftmost_reducer_map(cilkred_map *h,
|
||||
int is_leftmost);
|
||||
|
||||
/**
|
||||
* Merge reducer map RIGHT_MAP into LEFT_MAP and return the result of the
|
||||
* merge. Both maps must be allocated from the global context associated
|
||||
* with the specified worker. The returned reducer map must be destroyed
|
||||
* before the worker's associated global context is destroyed.
|
||||
*
|
||||
* If two cilkred_maps are specified, one will be destroyed and the other
|
||||
* one will be returned as the merged cilkred_map.
|
||||
*
|
||||
* When reducers can contain nested parallelism, execution can return
|
||||
* on a different worker than when it started (but still using the
|
||||
* same stack).
|
||||
*
|
||||
* Upon return, *w_ptr stores the pointer to the worker that execution
|
||||
* returns on.
|
||||
*
|
||||
* @param w_ptr Pointer to the currently executing worker.
|
||||
* @param left_map The left cilkred_map.
|
||||
* @param right_map The right cilkred_map.
|
||||
*
|
||||
* @return pointer to merged cilkred_map.
|
||||
*/
|
||||
extern
|
||||
cilkred_map *merge_reducer_maps(__cilkrts_worker **w_ptr,
|
||||
cilkred_map *left_map,
|
||||
cilkred_map *right_map);
|
||||
|
||||
/**
|
||||
* Similar to merge_reducer_maps(), except that after merging
|
||||
* RIGHT_MAP into LEFT_MAP, it repeatedly merges (*w_ptr)->reducer_map
|
||||
* into LEFT_MAP. This procedure ensures that any new reducers
|
||||
* created by the reductions themselves also get merged into LEFT_MAP.
|
||||
*/
|
||||
extern
|
||||
cilkred_map *repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
|
||||
cilkred_map *left_map,
|
||||
cilkred_map *right_map);
|
||||
|
||||
__CILKRTS_END_EXTERN_C
|
||||
|
||||
#endif // ! defined(INCLUDED_REDUCER_IMPL_DOT_H)
|
132
libcilkrts/runtime/rts-common.h
Normal file
132
libcilkrts/runtime/rts-common.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
/* rts-common.h -*-C++-*-
|
||||
*
|
||||
*************************************************************************
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2009-2013, Intel Corporation
|
||||
* All rights reserved.
|
||||
*
|
||||
* @copyright
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* @copyright
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_RTS_COMMON_DOT_H
|
||||
#define INCLUDED_RTS_COMMON_DOT_H
|
||||
|
||||
/* Abbreviations API functions returning different types. By using these
|
||||
* abbreviations instead of using CILK_API(ret) directly, etags and other
|
||||
* tools can more easily recognize function signatures.
|
||||
*/
|
||||
#define CILK_API_VOID CILK_API(void)
|
||||
#define CILK_API_VOID_PTR CILK_API(void*)
|
||||
#define CILK_API_INT CILK_API(int)
|
||||
#define CILK_API_SIZET CILK_API(size_t)
|
||||
#define CILK_API_TBB_RETCODE CILK_API(__cilk_tbb_retcode)
|
||||
#define CILK_API_PEDIGREE CILK_API(__cilkrts_pedigree)
|
||||
|
||||
/* Abbreviations ABI functions returning different types. By using these
|
||||
* abbreviations instead of using CILK_ABI(ret) directly, etags and other
|
||||
* tools can more easily recognize function signatures.
|
||||
*/
|
||||
#define CILK_ABI_VOID CILK_ABI(void)
|
||||
#define CILK_ABI_WORKER_PTR CILK_ABI(__cilkrts_worker_ptr)
|
||||
#define CILK_ABI_THROWS_VOID CILK_ABI_THROWS(void)
|
||||
|
||||
/* documentation aid to identify portable vs. nonportable
|
||||
parts of the runtime. See README for definitions. */
|
||||
#define COMMON_PORTABLE
|
||||
#define COMMON_SYSDEP
|
||||
#define NON_COMMON
|
||||
|
||||
#if !(defined __GNUC__ || defined __ICC)
|
||||
# define __builtin_expect(a_, b_) a_
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define cilk_nothrow throw()
|
||||
#else
|
||||
# define cilk_nothrow /*empty in C*/
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define NORETURN void __attribute__((noreturn))
|
||||
#else
|
||||
# define NORETURN void __declspec(noreturn)
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
# define NOINLINE __declspec(noinline)
|
||||
#endif
|
||||
|
||||
#ifndef __GNUC__
|
||||
# define __attribute__(X)
|
||||
#endif
|
||||
|
||||
/* Microsoft CL accepts "inline" for C++, but not for C. It accepts
|
||||
* __inline for both. Intel ICL accepts inline for C of /Qstd=c99
|
||||
* is set. The Cilk runtime is assumed to be compiled with /Qstd=c99
|
||||
*/
|
||||
#if defined(_MSC_VER) && ! defined(__INTEL_COMPILER)
|
||||
# error define inline
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
/* Compilers that build the Cilk runtime are assumed to know about zero-cost
|
||||
* intrinsics (__notify_intrinsic()). For those that don't, #undef the
|
||||
* following definition:
|
||||
*/
|
||||
//#define ENABLE_NOTIFY_ZC_INTRINSIC 1
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
/* The notify intrinsic was introduced in ICC 12.0. */
|
||||
# if __INTEL_COMPILER <= 1200
|
||||
# undef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
# endif
|
||||
#elif defined(__VXWORKS__)
|
||||
# undef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
#elif defined(__clang__)
|
||||
# if !defined(__has_extension) || !__has_extension(notify_zc_intrinsic)
|
||||
# undef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
# endif
|
||||
#elif defined(__arm__)
|
||||
// __notify_zc_intrinsic not yet supported by gcc for ARM
|
||||
# undef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
#endif
|
||||
|
||||
// If ENABLE_NOTIFY_ZC_INTRINSIC is defined, use __notify_zc_intrisic
|
||||
#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
|
||||
# define NOTIFY_ZC_INTRINSIC(annotation, data) \
|
||||
__notify_zc_intrinsic(annotation, data)
|
||||
#else
|
||||
# define NOTIFY_ZC_INTRINSIC(annotation, data)
|
||||
#endif
|
||||
|
||||
#endif // ! defined(INCLUDED_RTS_COMMON_DOT_H)
|
3940
libcilkrts/runtime/scheduler.c
Normal file
3940
libcilkrts/runtime/scheduler.c
Normal file
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue