@@ -4,60 +4,39 @@ INCFLAGS = -I. -I../../src -I$(TOOLSDIR)
44MODELLIB = model_sm
55OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here
66OMPFLAGS?= -fopenmp
7- CXXFLAGS = $(OPTFLAGS ) -std=c++17 $(INCFLAGS ) $(USE_NVTX ) -Wall -Wshadow -Wextra $(OMPFLAGS ) $( MGONGPU_CONFIG )
7+ CXXFLAGS = $(OPTFLAGS ) -std=c++17 $(INCFLAGS ) $(USE_NVTX ) -Wall -Wshadow -Wextra $(OMPFLAGS )
88CXXFLAGS+ = -ffast-math # see issue #117
99# ##CXXFLAGS+= -Ofast # performance is not different from --fast-math
1010# ##CXXFLAGS+= -g # FOR DEBUGGING ONLY
1111LIBFLAGS = -L$(LIBDIR ) -l$(MODELLIB )
1212CXX ?= g++
1313
14- # AVX choice (example: "make AVX=none")
15- ifneq ($(AVX ) ,)
16- # ##$(info Using AVX='$(AVX)' according to user input)
17- else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1)
18- override AVX = 512y
19- # ##$(info Using AVX='$(AVX)' as no user input exists)
20- else
21- override AVX = avx2
22- ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo) ,1)
23- $(warning Using AVX='$(AVX)' because host does not support avx512vl)
14+ # Set the default AVX (vectorization) choice
15+ ifeq ($(AVX ) ,)
16+ ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1)
17+ override AVX = 512y
18+ # ##$(info Using AVX='$(AVX)' as no user input exists)
19+ else
20+ override AVX = avx2
21+ ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1)
22+ $(warning Using AVX='$(AVX)' because host does not support avx512vl)
23+ else
24+ $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang)
25+ endif
26+ endif
2427else
25- $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang)
26- endif
28+ # ##$(info Using AVX='$(AVX)' according to user input)
2729endif
28- # ##$(info AVX=$(AVX))
2930
30- # Set the build flags appropriate to each AVX
31- # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
32- # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
33- ifeq ($(AVX ) ,sse4)
34- override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers)
35- else ifeq ($(AVX ) ,avx2)
36- override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang]
37- else ifeq ($(AVX ) ,512y)
38- override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc]
39- else ifeq ($(AVX ) ,512z)
40- override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers)
41- else ifneq ($(AVX ) ,none)
42- $(error Unknown AVX='$(AVX ) ' : only 'none', 'sse4', 'avx2', '512y' and '512z' are supported)
31+ # Set the default FPTYPE (floating point type) choice
32+ ifeq ($(FPTYPE ) ,)
33+ override FPTYPE = d
4334endif
4435
45- # For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations
46- CXXFLAGS+ = $(AVXFLAGS )
47-
48- # Build tag (defines target and path to the optional build directory)
49- override TAG = $(AVX )
50-
51- # Build directory: current directory by default, or build.$(TAG) if USEBUILDDIR==1
52- ifeq ($(USEBUILDDIR ) ,1)
53- override BUILDDIR = build.$(TAG )
54- override LIBDIR = ../../lib/$(BUILDDIR )
55- else
56- override BUILDDIR = .
57- override LIBDIR = ../../lib
36+ # Set the default RNDGEN (random number generator) choice
37+ ifeq ($(RNDGEN ) ,)
38+ override RNDGEN = curdev
5839endif
59- # ##$(info BUILDDIR=$(BUILDDIR))
60- $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG))
6140
6241# If CUDA_HOME is not set, try to set it from the location of nvcc
6342ifndef CUDA_HOME
@@ -68,7 +47,6 @@ ifndef CUDA_HOME
6847 CUDA_HOME := $(warning No CUDA_HOME exported. Using "$(CUDA_HOME ) ") $(CUDA_HOME )
6948 endif
7049endif
71-
7250ifneq ($(wildcard $(CUDA_HOME ) /bin/nvcc) ,)
7351 NVCC = $(CUDA_HOME ) /bin/nvcc
7452 CUARCHNUM =70
@@ -79,7 +57,7 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),)
7957 CUINC = -I$(CUDA_HOME ) /include/
8058 CULIBFLAGS = -L$(CUDA_HOME ) /lib64/ -lcuda -lcurand
8159 CUOPTFLAGS = -lineinfo
82- CUFLAGS = $(OPTFLAGS ) $(CUOPTFLAGS ) -std=c++14 $(INCFLAGS ) $(CUINC ) $(USE_NVTX ) $(CUARCHFLAGS ) -use_fast_math $( MGONGPU_CONFIG )
60+ CUFLAGS = $(OPTFLAGS ) $(CUOPTFLAGS ) -std=c++14 $(INCFLAGS ) $(CUINC ) $(USE_NVTX ) $(CUARCHFLAGS ) -use_fast_math
8361 # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
8462 # ##CUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
8563 # ##CUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
9270 NVCC := $(warning CUDA_HOME is not set or is invalid. Export CUDA_HOME to compile with cuda)
9371 USE_NVTX :=
9472 CULIBFLAGS :=
95- ifndef MGONGPU_CONFIG
96- export MGONGPU_CONFIG = -DMGONGPU_COMMONRAND_ONHOST
97- endif
73+ override RNDGEN = common
74+ endif
75+
76+ # Set the build flags appropriate to each AVX choice (example: "make AVX=none")
77+ # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
78+ # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
79+ $(info AVX=$(AVX))
80+ ifeq ($(AVX ) ,sse4)
81+ override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers)
82+ else ifeq ($(AVX ) ,avx2)
83+ override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang]
84+ else ifeq ($(AVX ) ,512y)
85+ override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc]
86+ else ifeq ($(AVX ) ,512z)
87+ override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers)
88+ else ifneq ($(AVX ) ,none)
89+ $(error Unknown AVX='$(AVX ) ' : only 'none', 'sse4', 'avx2', '512y' and '512z' are supported)
90+ endif
91+ # For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations?
92+ CXXFLAGS+ = $(AVXFLAGS )
93+
94+ # Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f")
95+ $(info FPTYPE=$(FPTYPE))
96+ ifeq ($(FPTYPE ) ,d)
97+ CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE
98+ CUFLAGS += -DMGONGPU_FPTYPE_DOUBLE
99+ else ifeq ($(FPTYPE),f)
100+ CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT
101+ CUFLAGS += -DMGONGPU_FPTYPE_FLOAT
102+ else
103+ $(error Unknown FPTYPE='$(FPTYPE ) ' : only 'f' and 'd' are supported)
104+ endif
105+
106+ # Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=common")
107+ $(info RNDGEN=$(RNDGEN))
108+ ifeq ($(RNDGEN ) ,curdev)
109+ CXXFLAGS += -DMGONGPU_CURAND_ONDEVICE
110+ CUFLAGS += -DMGONGPU_CURAND_ONDEVICE
111+ else ifeq ($(RNDGEN),curhst)
112+ CXXFLAGS += -DMGONGPU_CURAND_ONHOST
113+ CUFLAGS += -DMGONGPU_CURAND_ONHOST
114+ else ifeq ($(RNDGEN),common)
115+ CXXFLAGS += -DMGONGPU_COMMONRAND_ONHOST
116+ CUFLAGS += -DMGONGPU_COMMONRAND_ONHOST
117+ else
118+ $(error Unknown RNDGEN='$(RNDGEN ) ' : only 'curdev', 'curhst' and 'common' are supported)
98119endif
99120
121+ # Export AVX, FPTYPE, RNDGEN so that it is not necessary to pass them to the src Makefile too
122+ export AVX
123+ export FPTYPE
124+ export RNDGEN
125+
126+ # Build directory "short" tag (defines target and path to the optional build directory)
127+ # (Rationale: keep directory names shorter, e.g. do not include random number generator choice)
128+ override DIRTAG = $(AVX ) _$(FPTYPE )
129+
130+ # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed)
131+ # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators)
132+ override TAG = $(AVX ) _$(FPTYPE ) _$(RNDGEN )
133+
134+ # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1
135+ ifeq ($(USEBUILDDIR ) ,1)
136+ override BUILDDIR = build.$(DIRTAG )
137+ override LIBDIR = ../../lib/$(BUILDDIR )
138+ else
139+ override BUILDDIR = .
140+ override LIBDIR = ../../lib
141+ endif
142+ # ##$(info BUILDDIR=$(BUILDDIR))
143+ $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG))
144+
100145# Enable ccache if USECCACHE=1
101146ifeq ($(USECCACHE )$(shell echo $(CXX ) | grep ccache) ,1)
102- override CXX: =ccache $(CXX )
147+ override CXX: =ccache $(CXX )
103148endif
104149ifeq ($(USECCACHE )$(shell echo $(AR ) | grep ccache) ,1)
105- override AR: =ccache $(AR )
150+ override AR: =ccache $(AR )
106151endif
107152ifneq ($(NVCC ) ,)
108- ifeq ($(USECCACHE )$(shell echo $(NVCC ) | grep ccache) ,1)
109- override NVCC: =ccache $(NVCC )
110- endif
153+ ifeq ($(USECCACHE)$(shell echo $(NVCC) | grep ccache),1)
154+ override NVCC: =ccache $(NVCC )
155+ endif
111156endif
112157
113158GTESTLIBDIR = $(TESTDIR ) /googletest/build/lib/
@@ -126,24 +171,24 @@ ifeq ($(UNAME_P),ppc64le)
126171 CUFLAGS+ = -Xcompiler -mno-float128
127172endif
128173
129- all.$(TAG ) : ../../src/$(BUILDDIR ) /.build.tag_ $(TAG ) $(BUILDDIR ) /.build.tag_ $(TAG ) $(cu_main ) $(cxx_main ) $(testmain )
174+ all.$(TAG ) : ../../src/$(BUILDDIR ) /.build.$(TAG ) $(BUILDDIR ) /.build.$(TAG ) $(cu_main ) $(cxx_main ) $(testmain )
130175
131- override oldtags =` find $( BUILDDIR) -maxdepth 1 -name ' .build.tag_ *' ! -name ' .build.tag_ $(TAG)' `
132- $(BUILDDIR ) /.build.tag_ $(TAG ) :
176+ override oldtags =` find $( BUILDDIR) -maxdepth 1 -name ' .build.*' ! -name ' .build.$(TAG)' `
177+ $(BUILDDIR ) /.build.$(TAG ) :
133178 @if [ ! -d $( BUILDDIR) ]; then echo " mkdir $( BUILDDIR) " ; mkdir $(BUILDDIR ) ; fi
134- @if [ " $( oldtags) " != " " ]; then echo -e " Cannot build for tag=$( TAG) as old builds exist for other tags:\n$( oldtags) \nPlease run 'make clean' first or consider using 'make USEBUILDDIR=1 AVX=$( AVX) '" ; exit 1; fi
135- @touch $(BUILDDIR ) /.build.tag_ $(TAG )
179+ @if [ " $( oldtags) " != " " ]; then echo -e " Cannot build for tag=$( TAG) as old builds exist for other tags:\n$( oldtags) \nPlease run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$( AVX) FPTYPE= $( FPTYPE ) ' or 'make cleanall '" ; exit 1; fi
180+ @touch $(BUILDDIR ) /.build.$(TAG )
136181
137182debug : OPTFLAGS = -g -O0 -DDEBUG2
138183debug : CUOPTFLAGS = -G
139184debug : MAKEDEBUG := debug
140185debug : all.$(TAG )
141186
142- ../../src/$(BUILDDIR ) /.build.tag_ $(TAG ) :
143- $(MAKE ) -C ../../src AVX= $( AVX ) $(MAKEDEBUG )
187+ ../../src/$(BUILDDIR ) /.build.$(TAG ) :
188+ $(MAKE ) -C ../../src $(MAKEDEBUG )
144189
145190$(LIBDIR ) /lib$(MODELLIB ) .a : ../../src/* .h ../../src/* .cc
146- $(MAKE ) -C ../../src AVX= $( AVX ) $(MAKEDEBUG )
191+ $(MAKE ) -C ../../src $(MAKEDEBUG )
147192
148193$(BUILDDIR ) /gcheck.o : gcheck.cu * .h ../../src/* .h ../../src/* .cu
149194 @if [ ! -d $( BUILDDIR) ]; then mkdir $(BUILDDIR ) ; fi
@@ -208,17 +253,6 @@ $(GTESTLIBS):
208253check : $(testmain )
209254 $(testmain )
210255
211- .PHONY : clean
212-
213- clean :
214- make -C ../../src AVX=$(AVX ) clean
215- rm -f $(BUILDDIR ) /.build.tag*
216- ifneq ($(BUILDDIR ) ,.)
217- rm -rf $(BUILDDIR)
218- else
219- rm -f $(BUILDDIR)/*.o $(BUILDDIR)/*.exe
220- endif
221-
222256avxall :
223257 @echo
224258 make USEBUILDDIR=1 AVX=none
@@ -231,19 +265,22 @@ avxall:
231265 @echo
232266 make USEBUILDDIR=1 AVX=512z
233267
268+ .PHONY : clean
269+
270+ clean :
271+ make -C ../../src clean
272+ ifneq ($(BUILDDIR ) ,.)
273+ rm -rf $(BUILDDIR)
274+ else
275+ rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe
276+ endif
277+
234278cleanall :
235279 @echo
236280 make clean
237281 @echo
238- make USEBUILDDIR=1 AVX=none clean; make -C ../../src USEBUILDDIR=1 AVX=none clean
239- @echo
240- make USEBUILDDIR=1 AVX=sse4 clean; make -C ../../src USEBUILDDIR=1 AVX=sse4 clean
241- @echo
242- make USEBUILDDIR=1 AVX=avx2 clean; make -C ../../src USEBUILDDIR=1 AVX=avx2 clean
243- @echo
244- make USEBUILDDIR=1 AVX=512y clean; make -C ../../src USEBUILDDIR=1 AVX=512y clean
245- @echo
246- make USEBUILDDIR=1 AVX=512z clean; make -C ../../src USEBUILDDIR=1 AVX=512z clean
282+ make -C ../../src cleanall
283+ rm -rf build.*
247284
248285distclean : cleanall
249286 make -C $(TOOLSDIR ) clean
0 commit comments