|
马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?注册
x
前面讲了atlas+hpl的安装,测试了一下,效率不是很理想。传闻使用Goto blas库测得的效率略理想一些,于是今天试了一下,安装成功。将过程贴出来供大家讨论。
GotoBLAS之编译:
1、 到 http://www.tacc.utexas.edu/resources/software/ 网站上下载 Source Code v.1.00或者其他版本,我用的是Source Code v.1.00,网站需要注册,不过是免费的,很快就可以搞定。
2、用 tar -zxvf 解压之,可以看到GotoBLAS的文件夹
3、编辑Makefile.rule,详细情况见附件;更改getarch.c里面的archtecture,使之符合自己的情况
4、make
5、cd exports 执行make so
6、新编辑xerbla.c,内容见附件二,执行gcc -c xerbla.c -o xerbla.o
在GotoBLAS下可以看到libgoto.a,libgoto_opteronp-r1.00.a,libgoto_opteronp-r1.00.so,xerbla.o几个新东西,表明编译成功。
附件一:Makefile.rule
#
# Beginning of user configuration
#
# This library';s version
REVISION = -r1.00
# Which do you prefer to use for C compiler? Default is gcc.
# I recommend you to use GCC because inline assembler is required.
C_COMPILER = GNU
# C_COMPILER = INTEL
# Which do you prefer to use for C compiler? Default is GNU G77.
# F_COMPILER = G77
# F_COMPILER = G95
# F_COMPILER = GFORTRAN
# F_COMPILER = INTEL
# F_COMPILER = PGI
# F_COMPILER = PATHSCALE
# F_COMPILER = IBM
# F_COMPILER = COMPAQ
# F_COMPILER = SUN
# F_COMPILER = F2C
# If you want to build threaded version.
# You can specify number of threads by environment value
# "OMP_NUM_THREADS", otherwise, it';s automatically detected.
SMP = 2
# You may specify Maximum number of threads. It should be minimum.
MAX_THREADS = 2
# If you need 64bit binary; some architecture can accept both 32bit and
# 64bit binary(EM64T, Opteron, SPARC and Power/PowerPC).
BINARY64 = 1
# If you need 64bit integer interface.
INTERFACE64 = 1
# If you need Special memory management;
# Using HugeTLB file system(Linux / AIX / Solaris)
CCOMMON_OPT+= -DALLOC_HUGETLB
# Using static allocation instead of dynamic allocation
# CCOMMON_OPT+= -DALLOC_STATIC
# If you want to use CPU affinity
CCOMMON_OPT+= -DUSE_CPU_AFFINITY
# If you want to use memory affinity (for NUMA)
# CCOMMON_OPT+= -DUSE_MEMORY_AFFINITY
# If you have special compiler to run script to determine architecture.
GETARCH_CC =
GETARCH_FLAGS =
#
# End of user configuration
#
MACHINE := $(shell uname -m | sed -e s/i.86/i386/ )
OSNAME := $(shell uname -s)
ifeq ($(MACHINE), i386)
BINARY64=
NATIVEARCH= YES
endif
ifeq ($(MACHINE), ia64)
BINARY64= YES
NATIVEARCH= YES
endif
ifeq ($(MACHINE), alpha)
BINARY64= YES
NATIVEARCH= YES
endif
ifeq ($(OSNAME), AIX)
NATIVEARCH= YES
endif
ifeq ($(OSNAME), Darwin)
ifndef BINARY64
NATIVEARCH= YES
endif
endif
# If you need to access over 4GB chunk on 64bit system.
ifdef BINARY64
CCOMMON_OPT+= -D__64BIT__
ifdef INTERFACE64
CCOMMON_OPT+= -DUSE64BITINT
endif
endif
# If you need modified GEMV/GEMM to find best parameters;
# CCOMMON_OPT += -DPARAMTEST
# CCOMMON_OPT += -DPREFETCHTEST
# Common Optimization Flag
COMMON_OPT += -O2
# Optimization Flag for C compiler
CCOMMON_OPT +=
# Optimization Flag for Fortran Compiler
FCOMMON_OPT +=
# Profiling flags
COMMON_PROF = -pg
################## End of Main Configuration #####################
# TO suppress recursive includes
INCLUDED = 1
ifndef C_COMPILER
C_COMPILER = GNU
endif
ifndef F_COMPILER
F_COMPILER = G77
endif
ifeq ($(C_COMPILER), GNU)
COMPILER = gcc
CCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
CCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
CCOMMON_OPT += -m64
GETARCH_FLAGS = -m64
else
CCOMMON_OPT += -m32
GETARCH_FLAGS = -m32
endif
endif
COMMON_PROF += -fno-inline
endif
ifeq ($(C_COMPILER), INTEL)
COMPILER = icc
CCOMMON_OPT += -fPIC
endif
ifeq ($(F_COMPILER), G77)
COMPILER_F77 = g77
BU = _
CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif
ifeq ($(F_COMPILER), G95)
COMPILER_F77 = g95
BU = _
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif
ifeq ($(F_COMPILER), GFORTRAN)
COMPILER_F77 = gfortran
BU = _
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), CYGWIN_NT-5.1)
FCOMMON_OPT += -fPIC
endif
ifndef NATIVEARCH
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif
ifeq ($(F_COMPILER), INTEL)
COMPILER_F77 = ifort
BU = _
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -fPIC
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
ifeq ($(F_COMPILER), IBM)
COMPILER_F77 = xlf
BU =
# FCOMMON_OPT+= -qarch=440
ifdef BINARY64
FCOMMON_OPT += -q64
else
FCOMMON_OPT += -q32
endif
endif
ifeq ($(F_COMPILER), COMPAQ)
ifeq ($(OSNAME), Linux)
COMPILER_F77 = fort
FCOMMON_OPT += -fPIC
else
COMPILER_F77 = f77
endif
BU = _
endif
ifeq ($(F_COMPILER), PGI)
COMPILER_F77 = pgf77
BU = _
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -fPIC
COMMON_PROF += -DPGICOMPILER
ifndef BINARY64
# FCOMMON_OPT += -tp k8-32
FCOMMON_OPT += -tp p7
EXTRALIB += -L/opt/pgi/linux86/6.0/lib -lpgc
else
FCOMMON_OPT += -tp k8-64
EXTRALIB += -L/opt/pgi/linux86-64/6.0/lib -lpgc -lpgf90rtl
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
endif
ifdef SMP
EXTRALIB += -lpthread
endif
ifeq ($(F_COMPILER), PATHSCALE)
COMPILER_F77 = pathf90
BU= _
CCOMMON_OPT += -DAMD_ABI -DF_PATHSCALE
FCOMMON_OPT += -fPIC
ifndef BINARY64
CCOMMON_OPT += -DF_INTERFACE_F2C
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
ifdef INTERFACE64
FCOMMON_OPT += -i8
endif
endif
endif
ifeq ($(F_COMPILER), SUN)
COMPILER_F77 = f90
BU = _
CCOMMON_OPT += -DF_SUN
FCOMMON_OPT += -pic
ifndef BINARY64
CCOMMON_OPT += -DF_INTERFACE_F2C
endif
endif
ifeq ($(F_COMPILER), F2C)
COMPILER_F77 = f2cf77
BU = _
CCOMMON_OPT += -DF_INTERFACE_F2C -DNEED_F2CCONV
FCOMMON_OPT += -Wall -fPIC
endif
# Currently Windows version doesn';t support threads
ifeq ($(OSNAME), CYGWIN_NT-5.1)
SMP =
endif
ifdef SMP
CCOMMON_OPT+= -DSMP_SERVER
ifeq ($(C_COMPILER), GNU)
ifeq ($(OSNAME), Linux)
CCOMMON_OPT+= -pthread
endif
endif
endif
ifndef GETARCH_CC
GETARCH_CC = gcc
endif
ARCH := $(shell (cd $(TOPDIR); ./getarch 0 $(GETARCH_CC) $(GETARCH_FLAGS)))
SUBARCH := $(shell (cd $(TOPDIR); ./getarch 1 $(GETARCH_CC) $(GETARCH_FLAGS)))
ARCHSUBDIR := $(shell (cd $(TOPDIR); ./getarch 2 $(GETARCH_CC) $(GETARCH_FLAGS)))
CONFIG := $(shell (cd $(TOPDIR); ./getarch 3 $(GETARCH_CC) $(GETARCH_FLAGS)))
FU := $(shell (cd $(TOPDIR); ./getarch 4 $(GETARCH_CC) $(GETARCH_FLAGS)))
LIBSUBARCH := $(shell (cd $(TOPDIR); ./getarch 5 $(GETARCH_CC) $(GETARCH_FLAGS)))
CORE := $(shell (cd $(TOPDIR); ./getarch 6 $(GETARCH_CC) $(GETARCH_FLAGS)))
ifndef MAX_THREADS
MAX_THREADS := $(shell (cd $(TOPDIR); ./getarch 7 $(GETARCH_CC) $(GETARCH_FLAGS)))
endif
CCOMMON_OPT+= -DMAX_CPU_NUMBER=$(MAX_THREADS)
LIBPREFIX = libgoto
ARFLAGS=
CPP= $(CC) -E
AR= $(COMPILER_PREFIX)ar
AS= $(COMPILER_PREFIX)as
LD= $(COMPILER_PREFIX)ld
RANLIB= $(COMPILER_PREFIX)ranlib
include $(TOPDIR)/Makefile.$(ARCHSUBDIR)
CCOMMON_OPT+= -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F)
ifeq ($(CORE), PPC440)
CCOMMON_OPT+= -DALLOC_QALLOC
endif
ifeq ($(CORE), PPC440FP2)
CCOMMON_OPT+= -DALLOC_STATIC
endif
ifeq ($(FU), _)
CCOMMON_OPT+= -DFUNDERSCORE=$(FU) -DNEEDFUNDERSCORE
endif
ifeq ($(BU), _)
CCOMMON_OPT+= -DBUNDERSCORE=$(BU) -DNEEDBUNDERSCORE
endif
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG)
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) $(CONFIG) -DPROFILE $(COMMON_PROF)
# FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(CONFIG)
FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
FPFLAGS = $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
ifndef SMP
LIBNAME= $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION).a
LIBNAME_P= $(LIBPREFIX)_$(LIBSUBARCH)$(REVISION)_p.a
else
LIBNAME= $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION).a
LIBNAME_P= $(LIBPREFIX)_$(LIBSUBARCH)p$(REVISION)_p.a
endif
LIBSONAME = $(LIBNAME:.a=.so)
LIBDLLNAME = $(LIBNAME:.a=.dll)
LIBDYNNAME = $(LIBNAME:.a=.dylib)
LIBWIN2KNAME = $(LIBNAME:.a=.lib)
LIBDEFNAME = $(LIBNAME:.a=.def)
LIBEXPNAME = $(LIBNAME:.a=.exp)
LIBZIPNAME = $(LIBNAME:.a=.zip)
LIBS= $(TOPDIR)/$(LIBNAME)
LIBS_P= $(TOPDIR)/$(LIBNAME_P)
ifndef SMP
LIBPTHREAD=
else
LIBPTHREAD= -lpthread
endif
CC= $(COMPILER_PREFIX)$(COMPILER)
FC= $(COMPILER_PREFIX)$(COMPILER_F77)
.SUFFIXES: .po .o .f
.f.o:
$(FC) $(FFLAGS) -c $<
.f.po:
$(FC) $(FPFLAGS) -pg -c $<
附件二:xerbla.c#include <stdio.h>
#include <stdlib.h>
int xerbla_(char *message, int *info, long length){
fprintf(stderr, " ** On entry to %6s, parameter number %2d had an illegal value\n", message, *info);
exit(1);
}
现在紧接着安装hpl,解压方式和以前的一样
1、编辑Makefile,使arch = Linux_PII_Goto
2、新建并编辑Make.Linux_PII_Goto,使内容和附件一致,注意位置和自己机器相符
3、make
4、如无错误,在bin下的Linux_PII_Goto就有HPL.at和xhpl
附件:
#
# -- High Performance Computing Linpack Benchmark (HPL)
# HPL - 1.0a - January 20, 2004
# Antoine P. Petitet
# University of Tennessee, Knoxville
# Innovative Computing Laboratories
# (C) Copyright 2000-2004 All Rights Reserved
#
# -- Copyright notice and Licensing terms:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. All advertising materials mentioning features or use of this
# software must display the following acknowledgement:
# This product includes software developed at the University of
# Tennessee, Knoxville, Innovative Computing Laboratories.
#
# 4. The name of the University, the name of the Laboratory, or the
# names of its contributors may not be used to endorse or promote
# products derived from this software without specific written
# permission.
#
# -- Disclaimer:
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS';'; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ######################################################################
#
# ----------------------------------------------------------------------
# - shell --------------------------------------------------------------
# ----------------------------------------------------------------------
#
SHELL = /bin/sh
#
CD = cd
CP = cp
LN_S = ln -s
MKDIR = mkdir
RM = /bin/rm -f
TOUCH = touch
#
# ----------------------------------------------------------------------
# - Platform identifier ------------------------------------------------
# ----------------------------------------------------------------------
#
ARCH = Linux_PII_Goto
#
# ----------------------------------------------------------------------
# - HPL Directory Structure / HPL library ------------------------------
# ----------------------------------------------------------------------
#
TOPdir = $(HOME)/hpl
INCdir = $(TOPdir)/include
BINdir = $(TOPdir)/bin/$(ARCH)
LIBdir = $(TOPdir)/lib/$(ARCH)
#
HPLlib = $(LIBdir)/libhpl.a
#
# ----------------------------------------------------------------------
# - Message Passing library (MPI) --------------------------------------
# ----------------------------------------------------------------------
# MPinc tells the C compiler where to find the Message Passing library
# header files, MPlib is defined to be the name of the library to be
# used. The variable MPdir is only used for defining MPinc and MPlib.
#
MPdir =
MPinc =
MPlib =
#
# ----------------------------------------------------------------------
# - Linear Algebra library (BLAS or VSIPL) -----------------------------
# ----------------------------------------------------------------------
# LAinc tells the C compiler where to find the Linear Algebra library
# header files, LAlib is defined to be the name of the library to be
# used. The variable LAdir is only used for defining LAinc and LAlib.
#
LAdir = $(HOME)/GotoBLAS
LAinc =
LAlib = $(LAdir)/libgoto_opteronp-r1.00.so $(LAdir)/xerbla.o
#
# ----------------------------------------------------------------------
# - F77 / C interface --------------------------------------------------
# ----------------------------------------------------------------------
# You can skip this section if and only if you are not planning to use
# a BLAS library featuring a Fortran 77 interface. Otherwise, it is
# necessary to fill out the F2CDEFS variable with the appropriate
# options. **One and only one** option should be chosen in **each** of
# the 3 following categories:
#
# 1) name space (How C calls a Fortran 77 routine)
#
# -DAdd_ : all lower case and a suffixed underscore (Suns,
# Intel, ...), [default]
# -DNoChange : all lower case (IBM RS6000),
# -DUpCase : all upper case (Cray),
# -DAdd__ : the FORTRAN compiler in use is f2c.
#
# 2) C and Fortran 77 integer mapping
#
# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default]
# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long,
# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
#
# 3) Fortran 77 string handling
#
# -DStringSunStyle : The string address is passed at the string loca-
# tion on the stack, and the string length is then
# passed as an F77_INTEGER after all explicit
# stack arguments, [default]
# -DStringStructPtr : The address of a structure is passed by a
# Fortran 77 string, and the structure is of the
# form: struct {char *cp; F77_INTEGER len;},
# -DStringStructVal : A structure is passed by value for each Fortran
# 77 string, and the structure is of the form:
# struct {char *cp; F77_INTEGER len;},
# -DStringCrayStyle : Special option for Cray machines, which uses
# Cray fcd (fortran character descriptor) for
# interoperation.
#
F2CDEFS =
#
# ----------------------------------------------------------------------
# - HPL includes / libraries / specifics -------------------------------
# ----------------------------------------------------------------------
#
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib)
#
# - Compile time options -----------------------------------------------
#
# -DHPL_COPY_L force the copy of the panel L before bcast;
# -DHPL_CALL_CBLAS call the cblas interface;
# -DHPL_CALL_VSIPL call the vsip library;
# -DHPL_DETAILED_TIMING enable detailed timers;
#
# By default HPL will:
# *) not copy L before broadcast,
# *) call the BLAS Fortran 77 interface,
# *) not display detailed timing information.
#
HPL_OPTS =
#
# ----------------------------------------------------------------------
#
HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
#
# ----------------------------------------------------------------------
# - Compilers / linkers - Optimization flags ---------------------------
# ----------------------------------------------------------------------
#
#CC = /usr/bin/gcc
CC =mpicc
CCNOOPT = $(HPL_DEFS)
CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
#
# On some platforms, it is necessary to use the Fortran linker to find
# the Fortran internals used in the BLAS library.
#
#LINKER = /usr/bin/g77
LINKER =mpicc
LINKFLAGS = $(CCFLAGS) -lm
#
ARCHIVER = ar
ARFLAGS = r
RANLIB = echo
#
# ----------------------------------------------------------------------
|
|